minor optimization
[unix-history] / usr / src / sys / kern / kern_physio.c
CommitLineData
da7c5cc6 1/*
0880b18e 2 * Copyright (c) 1982, 1986 Regents of the University of California.
da7c5cc6
KM
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
26bd0870 6 * @(#)kern_physio.c 7.12 (Berkeley) %G%
da7c5cc6 7 */
961945a8 8
94368568
JB
9#include "param.h"
10#include "systm.h"
94368568
JB
11#include "user.h"
12#include "buf.h"
13#include "conf.h"
14#include "proc.h"
15#include "seg.h"
16#include "vm.h"
17#include "trace.h"
18#include "map.h"
c4ec2128 19#include "vnode.h"
d301d150
KM
20
21#include "machine/pte.h"
ec67a3ce
MK
22#ifdef SECSIZE
23#include "file.h"
24#include "ioctl.h"
25#include "disklabel.h"
26#endif SECSIZE
663dbc72 27
663dbc72
BJ
28/*
29 * Swap IO headers -
30 * They contain the necessary information for the swap I/O.
31 * At any given time, a swap header can be in three
32 * different lists. When free it is in the free list,
33 * when allocated and the I/O queued, it is on the swap
34 * device list, and finally, if the operation was a dirty
35 * page push, when the I/O completes, it is inserted
36 * in a list of cleaned pages to be processed by the pageout daemon.
37 */
4c05b581 38struct buf *swbuf;
663dbc72 39
663dbc72
BJ
40/*
41 * swap I/O -
42 *
43 * If the flag indicates a dirty page push initiated
44 * by the pageout daemon, we map the page into the i th
45 * virtual page of process 2 (the daemon itself) where i is
46 * the index of the swap header that has been allocated.
47 * We simply initialize the header and queue the I/O but
48 * do not wait for completion. When the I/O completes,
ec67a3ce 49 * biodone() will link the header to a list of cleaned
663dbc72
BJ
50 * pages to be processed by the pageout daemon.
51 */
c4ec2128 52swap(p, dblkno, addr, nbytes, rdflg, flag, vp, pfcent)
663dbc72
BJ
53 struct proc *p;
54 swblk_t dblkno;
55 caddr_t addr;
39d536e6 56 int nbytes, rdflg, flag;
c4ec2128 57 struct vnode *vp;
39d536e6 58 u_int pfcent;
663dbc72
BJ
59{
60 register struct buf *bp;
663dbc72 61 register struct pte *dpte, *vpte;
c5648f55
KB
62 register u_int c;
63 int p2dp, s, error = 0;
64 struct buf *getswbuf();
65 int swdone();
663dbc72 66
c5648f55 67 bp = getswbuf(PSWP+1);
663dbc72 68 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
ec67a3ce
MK
69#ifdef SECSIZE
70 bp->b_blksize = DEV_BSIZE;
71#endif SECSIZE
663dbc72
BJ
72 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
73 if (rdflg == B_READ)
74 sum.v_pswpin += btoc(nbytes);
75 else
76 sum.v_pswpout += btoc(nbytes);
77 bp->b_proc = p;
78 if (flag & B_DIRTY) {
79 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
80 dpte = dptopte(&proc[2], p2dp);
81 vpte = vtopte(p, btop(addr));
82 for (c = 0; c < nbytes; c += NBPG) {
83 if (vpte->pg_pfnum == 0 || vpte->pg_fod)
84 panic("swap bad pte");
85 *dpte++ = *vpte++;
86 }
d668d9ba
SL
87 bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], p2dp));
88 bp->b_flags |= B_CALL;
89 bp->b_iodone = swdone;
90 bp->b_pfcent = pfcent;
663dbc72
BJ
91 } else
92 bp->b_un.b_addr = addr;
93 while (nbytes > 0) {
c4ec2128 94 bp->b_blkno = dblkno;
343a57bd
KM
95 if (bp->b_vp)
96 brelvp(bp);
5dccc1f9 97 VHOLD(vp);
343a57bd
KM
98 bp->b_vp = vp;
99 bp->b_dev = vp->v_rdev;
e438ed8e 100 bp->b_bcount = nbytes;
26bd0870
KM
101 if ((bp->b_flags & B_READ) == 0)
102 vp->v_numoutput++;
e438ed8e
BJ
103 minphys(bp);
104 c = bp->b_bcount;
53f9ca20 105#ifdef TRACE
c4ec2128 106 trace(TR_SWAPIO, vp, bp->b_blkno);
53f9ca20 107#endif
c4ec2128 108 VOP_STRATEGY(bp);
c5648f55 109 /* pageout daemon doesn't wait for pushed pages */
663dbc72
BJ
110 if (flag & B_DIRTY) {
111 if (c < nbytes)
112 panic("big push");
ec67a3ce 113 return (0);
663dbc72 114 }
663dbc72
BJ
115 bp->b_un.b_addr += c;
116 bp->b_flags &= ~B_DONE;
117 if (bp->b_flags & B_ERROR) {
118 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
119 panic("hard IO err in swap");
d03b3d84 120 swkill(p, "swap: read error from swap device");
699e2902 121 error = EIO;
663dbc72
BJ
122 }
123 nbytes -= c;
ec67a3ce
MK
124#ifdef SECSIZE
125 if (flag & B_PGIN && nbytes > 0)
126 panic("big pgin");
127#endif SECSIZE
919fe934 128 dblkno += btodb(c);
663dbc72 129 }
663dbc72 130 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
c5648f55 131 freeswbuf(bp);
699e2902 132 return (error);
663dbc72
BJ
133}
134
d668d9ba
SL
135/*
136 * Put a buffer on the clean list after I/O is done.
137 * Called from biodone.
138 */
139swdone(bp)
140 register struct buf *bp;
141{
142 register int s;
143
144 if (bp->b_flags & B_ERROR)
145 panic("IO err in push");
d95fc990 146 s = splbio();
d668d9ba
SL
147 bp->av_forw = bclnlist;
148 cnt.v_pgout++;
149 cnt.v_pgpgout += bp->b_bcount / NBPG;
150 bclnlist = bp;
151 if (bswlist.b_flags & B_WANTED)
152 wakeup((caddr_t)&proc[2]);
153 splx(s);
154}
155
663dbc72
BJ
156/*
157 * If rout == 0 then killed on swap error, else
158 * rout is the name of the routine where we ran out of
159 * swap space.
160 */
161swkill(p, rout)
162 struct proc *p;
163 char *rout;
164{
165
7cd10076
JB
166 printf("pid %d: %s\n", p->p_pid, rout);
167 uprintf("sorry, pid %d was killed in %s\n", p->p_pid, rout);
663dbc72
BJ
168 /*
169 * To be sure no looping (e.g. in vmsched trying to
170 * swap out) mark process locked in core (as though
171 * done by user) after killing it so noone will try
172 * to swap it out.
173 */
a30d2e97 174 psignal(p, SIGKILL);
663dbc72
BJ
175 p->p_flag |= SULOCK;
176}
177
663dbc72
BJ
178/*
179 * Raw I/O. The arguments are
180 * The strategy routine for the device
c5648f55
KB
181 * A buffer, which will either be a special buffer header owned
182 * exclusively by the device for this purpose, or NULL,
183 * indicating that we should use a swap buffer
663dbc72
BJ
184 * The device number
185 * Read/write flag
186 * Essentially all the work is computing physical addresses and
187 * validating them.
188 * If the user has the proper access privilidges, the process is
189 * marked 'delayed unlock' and the pages involved in the I/O are
190 * faulted and locked. After the completion of the I/O, the above pages
191 * are unlocked.
192 */
d6d7360b
BJ
193physio(strat, bp, dev, rw, mincnt, uio)
194 int (*strat)();
195 register struct buf *bp;
196 dev_t dev;
197 int rw;
c5648f55 198 u_int (*mincnt)();
d6d7360b 199 struct uio *uio;
663dbc72 200{
a196746e 201 register struct iovec *iov;
58c3cad7 202 register int requested, done;
663dbc72 203 char *a;
c5648f55
KB
204 int s, allocbuf = 0, error = 0;
205 struct buf *getswbuf();
ec67a3ce
MK
206#ifdef SECSIZE
207 int bsize;
208 struct partinfo dpart;
209#endif SECSIZE
663dbc72 210
ec67a3ce
MK
211#ifdef SECSIZE
212 if ((unsigned)major(dev) < nchrdev &&
213 (*cdevsw[major(dev)].d_ioctl)(dev, DIOCGPART, (caddr_t)&dpart,
214 FREAD) == 0)
215 bsize = dpart.disklab->d_secsize;
216 else
217 bsize = DEV_BSIZE;
218#endif SECSIZE
219 for (;;) {
220 if (uio->uio_iovcnt == 0)
221 return (0);
222 iov = uio->uio_iov;
223 if (useracc(iov->iov_base, (u_int)iov->iov_len,
224 rw==B_READ? B_WRITE : B_READ) == NULL)
225 return (EFAULT);
226 s = splbio();
227 while (bp->b_flags&B_BUSY) {
228 bp->b_flags |= B_WANTED;
229 sleep((caddr_t)bp, PRIBIO+1);
230 }
c5648f55
KB
231 if (!allocbuf) { /* only if sharing caller's buffer */
232 s = splbio();
233 while (bp->b_flags&B_BUSY) {
234 bp->b_flags |= B_WANTED;
235 sleep((caddr_t)bp, PRIBIO+1);
236 }
237 splx(s);
238 }
ec67a3ce
MK
239 bp->b_error = 0;
240 bp->b_proc = u.u_procp;
241#ifdef SECSIZE
242 bp->b_blksize = bsize;
243#endif SECSIZE
244 bp->b_un.b_addr = iov->iov_base;
245 while (iov->iov_len > 0) {
246 bp->b_flags = B_BUSY | B_PHYS | rw;
247 bp->b_dev = dev;
248#ifdef SECSIZE
249 bp->b_blkno = uio->uio_offset / bsize;
250#else SECSIZE
251 bp->b_blkno = btodb(uio->uio_offset);
252#endif SECSIZE
253 bp->b_bcount = iov->iov_len;
254 (*mincnt)(bp);
255 c = bp->b_bcount;
256 u.u_procp->p_flag |= SPHYSIO;
257 vslock(a = bp->b_un.b_addr, c);
258 physstrat(bp, strat, PRIBIO);
259 (void) splbio();
260 vsunlock(a, c, rw);
261 u.u_procp->p_flag &= ~SPHYSIO;
262 if (bp->b_flags&B_WANTED)
263 wakeup((caddr_t)bp);
264 splx(s);
265 c -= bp->b_resid;
266 bp->b_un.b_addr += c;
267 iov->iov_len -= c;
268 uio->uio_resid -= c;
269 uio->uio_offset += c;
270 /* temp kludge for tape drives */
271 if (bp->b_resid || (bp->b_flags&B_ERROR))
272 break;
273 }
274 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
275 error = geterror(bp);
ec67a3ce
MK
276 if (bp->b_resid || error)
277 return (error);
278 uio->uio_iov++;
279 uio->uio_iovcnt--;
663dbc72 280 }
c5648f55
KB
281 if (allocbuf)
282 freeswbuf(bp);
283 return (error);
663dbc72
BJ
284}
285
c5648f55 286u_int
663dbc72 287minphys(bp)
d6d7360b 288 struct buf *bp;
663dbc72 289{
35a494b8
SL
290 if (bp->b_bcount > MAXPHYS)
291 bp->b_bcount = MAXPHYS;
663dbc72 292}
c5648f55
KB
293
294static
295struct buf *
296getswbuf(prio)
297 int prio;
298{
299 int s;
300 struct buf *bp;
301
302 s = splbio();
303 while (bswlist.av_forw == NULL) {
304 bswlist.b_flags |= B_WANTED;
305 sleep((caddr_t)&bswlist, prio);
306 }
307 bp = bswlist.av_forw;
308 bswlist.av_forw = bp->av_forw;
309 splx(s);
310 return (bp);
311}
312
313static
314freeswbuf(bp)
315 struct buf *bp;
316{
317 int s;
318
319 s = splbio();
320 bp->av_forw = bswlist.av_forw;
321 bswlist.av_forw = bp;
343a57bd
KM
322 if (bp->b_vp)
323 brelvp(bp);
c5648f55
KB
324 if (bswlist.b_flags & B_WANTED) {
325 bswlist.b_flags &= ~B_WANTED;
326 wakeup((caddr_t)&bswlist);
327 wakeup((caddr_t)&proc[2]);
328 }
329 splx(s);
330}
331
332rawread(dev, uio)
333 dev_t dev;
334 struct uio *uio;
335{
336 return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
337 dev, B_READ, minphys, uio));
338}
339
340rawwrite(dev, uio)
341 dev_t dev;
342 struct uio *uio;
343{
344 return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
345 dev, B_WRITE, minphys, uio));
346}