From: stark!gene@newsserv.cs.sunysb.edu (Gene Stark)
[unix-history] / sys / kern / kern_physio.c
CommitLineData
dd403947
DG
1/*
2 * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This software is a component of "386BSD" developed by
16 William F. Jolitz, TeleMuse.
17 * 4. Neither the name of the developer nor the name "386BSD"
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
22 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
23 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
24 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
25 * NOT MAKE USE THIS WORK.
26 *
27 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
28 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
29 * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
30 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
31 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
32 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
33 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
34 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
82b95dad 48 * $Id: kern_physio.c,v 1.5 1994/03/30 02:31:26 davidg Exp $
dd403947
DG
49 */
50
51#include "param.h"
52#include "systm.h"
53#include "buf.h"
54#include "conf.h"
55#include "proc.h"
56#include "malloc.h"
57#include "vnode.h"
58#include "vm/vm.h"
41aefbec 59#include "vm/vm_page.h"
dd403947
DG
60#include "specdev.h"
61
4d107077
DG
62#define HOLD_WORKS_FOR_SHARING
63
dd403947
DG
64/*
65 * Driver interface to do "raw" I/O in the address space of a
66 * user process directly for read and write operations..
67 */
68
69int
70rawread(dev, uio)
71 dev_t dev; struct uio *uio;
72{
73 return (uioapply(physio, (caddr_t) cdevsw[major(dev)].d_strategy,
74 (caddr_t) (u_long) dev, uio));
75}
76
77int
78rawwrite(dev, uio)
79 dev_t dev; struct uio *uio;
80{
81 return (uioapply(physio, (caddr_t) cdevsw[major(dev)].d_strategy,
82 (caddr_t) (u_long) dev, uio));
83}
84
82b95dad
DG
85static void
86physwakeup(bp)
87 struct buf *bp;
88{
89 wakeup((caddr_t) bp);
90 bp->b_flags &= ~B_CALL;
91}
dd403947
DG
92
93int physio(strat, dev, bp, off, rw, base, len, p)
94 d_strategy_t strat;
95 dev_t dev;
96 struct buf *bp;
97 int rw, off;
98 caddr_t base;
99 int *len;
100 struct proc *p;
101{
102 int amttodo = *len;
103 int error, amtdone;
104 vm_prot_t ftype;
41aefbec 105 vm_offset_t v, lastv, pa;
dd403947
DG
106 caddr_t adr;
107 int oldflags;
108 int s;
109
110 int bp_alloc = (bp == 0);
111
112/*
113 * keep the process from being swapped
114 */
115 oldflags = p->p_flag;
116 p->p_flag |= SPHYSIO;
117
118 rw = rw == UIO_READ ? B_READ : 0;
119
120 /* create and build a buffer header for a transfer */
121
122 if (bp_alloc) {
123 bp = (struct buf *)getpbuf();
124 bzero((char *)bp, sizeof(*bp)); /* 09 Sep 92*/
125 } else {
126 s = splbio();
127 while (bp->b_flags & B_BUSY) {
128 bp->b_flags |= B_WANTED;
129 tsleep((caddr_t)bp, PRIBIO, "physbw", 0);
130 }
131 bp->b_flags |= B_BUSY;
132 splx(s);
133 }
134
dd403947
DG
135 bp->b_proc = p;
136 bp->b_dev = dev;
137 bp->b_error = 0;
138 bp->b_blkno = off/DEV_BSIZE;
139 amtdone = 0;
140
141 /* iteratively do I/O on as large a chunk as possible */
142 do {
82b95dad
DG
143 bp->b_flags = B_BUSY | B_PHYS | B_CALL | rw;
144 bp->b_iodone = physwakeup;
dd403947 145 bp->b_un.b_addr = base;
4d107077
DG
146 /*
147 * Notice that b_bufsize is more owned by the buffer
148 * allocating entity, while b_bcount might be modified
149 * by the called I/O routines. So after I/O is complete
150 * the only thing guaranteed to be unchanged is
151 * b_bufsize.
152 */
dd403947 153 bp->b_bcount = min (256*1024, amttodo);
4d107077 154 bp->b_bufsize = bp->b_bcount;
dd403947
DG
155
156 /* first, check if accessible */
4d107077 157 if (rw == B_READ && !useracc(base, bp->b_bufsize, B_WRITE)) {
dd403947
DG
158 error = EFAULT;
159 goto errrtn;
160 }
4d107077 161 if (rw == B_WRITE && !useracc(base, bp->b_bufsize, B_READ)) {
dd403947
DG
162 error = EFAULT;
163 goto errrtn;
164 }
165
166 /* update referenced and dirty bits, handle copy objects */
167 if (rw == B_READ)
168 ftype = VM_PROT_READ | VM_PROT_WRITE;
169 else
170 ftype = VM_PROT_READ;
171
172 lastv = 0;
4d107077 173 for (adr = (caddr_t)trunc_page(base); adr < base + bp->b_bufsize;
dd403947
DG
174 adr += NBPG) {
175
176/*
41aefbec 177 * make sure that the pde is valid and held
dd403947
DG
178 */
179 v = trunc_page(((vm_offset_t)vtopte(adr)));
180 if (v != lastv) {
41aefbec
DG
181
182 *(volatile int *)v += 0;
183 pa = pmap_extract(&p->p_vmspace->vm_pmap, v);
184 vm_page_hold(PHYS_TO_VM_PAGE(pa));
dd403947
DG
185 lastv = v;
186 }
187
188/*
189 * do the vm_fault if needed, do the copy-on-write thing when
190 * reading stuff off device into memory.
191 */
192 if (ftype & VM_PROT_WRITE) {
193 /*
194 * properly handle copy-on-write
195 */
196 *(volatile int *) adr += 0;
6232f830
DG
197 }
198#if defined(HOLD_WORKS_FOR_SHARING)
199 else {
dd403947
DG
200 *(volatile int *) adr;
201 }
41aefbec 202 pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t) adr);
dd403947 203/*
41aefbec 204 * hold the data page
dd403947 205 */
41aefbec 206 vm_page_hold(PHYS_TO_VM_PAGE(pa));
6232f830 207#endif
41aefbec 208 }
dd403947 209
6232f830 210#if !defined(HOLD_WORKS_FOR_SHARING)
4d107077 211 vslock(base, bp->b_bufsize);
6232f830 212#endif
4d107077
DG
213
214 vmapbuf(bp);
215
dd403947 216 /* perform transfer */
4d107077
DG
217 (*strat)(bp);
218
219 /* pageout daemon doesn't wait for pushed pages */
220 s = splbio();
221 while ((bp->b_flags & B_DONE) == 0)
222 tsleep((caddr_t)bp, PRIBIO, "physstr", 0);
223 splx(s);
224
225 vunmapbuf(bp);
226
6232f830 227#if !defined(HOLD_WORKS_FOR_SHARING)
4d107077 228 vsunlock(base, bp->b_bufsize);
6232f830 229#endif
dd403947 230
dd403947 231/*
41aefbec 232 * unhold the pde, and data pages
dd403947 233 */
41aefbec 234 lastv = 0;
4d107077 235 for (adr = (caddr_t)trunc_page(base); adr < base + bp->b_bufsize;
dd403947
DG
236 adr += NBPG) {
237 v = trunc_page(((vm_offset_t)vtopte(adr)));
238 if (v != lastv) {
41aefbec
DG
239 pa = pmap_extract(&p->p_vmspace->vm_pmap, v);
240 vm_page_unhold(PHYS_TO_VM_PAGE(pa));
dd403947
DG
241 lastv = v;
242 }
6232f830 243#if defined(HOLD_WORKS_FOR_SHARING)
41aefbec
DG
244 pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t) adr);
245 vm_page_unhold(PHYS_TO_VM_PAGE(pa));
6232f830 246#endif
dd403947
DG
247 }
248
249
4d107077
DG
250 /*
251 * in this case, we need to use b_bcount instead of
252 * b_bufsize.
253 */
dd403947
DG
254 amtdone = bp->b_bcount - bp->b_resid;
255 amttodo -= amtdone;
256 base += amtdone;
257 bp->b_blkno += amtdone/DEV_BSIZE;
258 } while (amttodo && (bp->b_flags & B_ERROR) == 0 && amtdone > 0);
259
260 error = bp->b_error;
261errrtn:
262 if (bp_alloc) {
263 relpbuf(bp);
264 } else {
265 bp->b_flags &= ~B_BUSY;
266 wakeup((caddr_t)bp);
267 }
268 *len = amttodo;
269
270/*
271 * allow the process to be swapped
272 */
273 p->p_flag &= ~SPHYSIO;
274 p->p_flag |= (oldflags & SPHYSIO);
275
276 return (error);
277}