parameterize max proc id
[unix-history] / usr / src / sys / kern / kern_physio.c
CommitLineData
da7c5cc6 1/*
0880b18e 2 * Copyright (c) 1982, 1986 Regents of the University of California.
da7c5cc6
KM
3 * All rights reserved. The Berkeley software License Agreement
4 * specifies the terms and conditions for redistribution.
5 *
d293217c 6 * @(#)kern_physio.c 7.15 (Berkeley) %G%
da7c5cc6 7 */
961945a8 8
94368568
JB
9#include "param.h"
10#include "systm.h"
94368568
JB
11#include "user.h"
12#include "buf.h"
13#include "conf.h"
14#include "proc.h"
15#include "seg.h"
16#include "vm.h"
17#include "trace.h"
18#include "map.h"
c4ec2128 19#include "vnode.h"
0f93ba7b 20#include "specdev.h"
d301d150
KM
21
22#include "machine/pte.h"
ec67a3ce
MK
23#ifdef SECSIZE
24#include "file.h"
25#include "ioctl.h"
26#include "disklabel.h"
27#endif SECSIZE
663dbc72 28
663dbc72
BJ
29/*
30 * Swap IO headers -
31 * They contain the necessary information for the swap I/O.
32 * At any given time, a swap header can be in three
33 * different lists. When free it is in the free list,
34 * when allocated and the I/O queued, it is on the swap
35 * device list, and finally, if the operation was a dirty
36 * page push, when the I/O completes, it is inserted
37 * in a list of cleaned pages to be processed by the pageout daemon.
38 */
4c05b581 39struct buf *swbuf;
663dbc72 40
663dbc72
BJ
41/*
42 * swap I/O -
43 *
44 * If the flag indicates a dirty page push initiated
45 * by the pageout daemon, we map the page into the i th
46 * virtual page of process 2 (the daemon itself) where i is
47 * the index of the swap header that has been allocated.
48 * We simply initialize the header and queue the I/O but
49 * do not wait for completion. When the I/O completes,
ec67a3ce 50 * biodone() will link the header to a list of cleaned
663dbc72
BJ
51 * pages to be processed by the pageout daemon.
52 */
c4ec2128 53swap(p, dblkno, addr, nbytes, rdflg, flag, vp, pfcent)
663dbc72
BJ
54 struct proc *p;
55 swblk_t dblkno;
56 caddr_t addr;
39d536e6 57 int nbytes, rdflg, flag;
c4ec2128 58 struct vnode *vp;
39d536e6 59 u_int pfcent;
663dbc72
BJ
60{
61 register struct buf *bp;
663dbc72 62 register struct pte *dpte, *vpte;
c5648f55
KB
63 register u_int c;
64 int p2dp, s, error = 0;
65 struct buf *getswbuf();
66 int swdone();
663dbc72 67
c5648f55 68 bp = getswbuf(PSWP+1);
663dbc72 69 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
ec67a3ce
MK
70#ifdef SECSIZE
71 bp->b_blksize = DEV_BSIZE;
72#endif SECSIZE
663dbc72
BJ
73 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
74 if (rdflg == B_READ)
75 sum.v_pswpin += btoc(nbytes);
76 else
77 sum.v_pswpout += btoc(nbytes);
78 bp->b_proc = p;
79 if (flag & B_DIRTY) {
80 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
81 dpte = dptopte(&proc[2], p2dp);
82 vpte = vtopte(p, btop(addr));
83 for (c = 0; c < nbytes; c += NBPG) {
84 if (vpte->pg_pfnum == 0 || vpte->pg_fod)
85 panic("swap bad pte");
86 *dpte++ = *vpte++;
87 }
d668d9ba
SL
88 bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], p2dp));
89 bp->b_flags |= B_CALL;
90 bp->b_iodone = swdone;
91 bp->b_pfcent = pfcent;
663dbc72
BJ
92 } else
93 bp->b_un.b_addr = addr;
94 while (nbytes > 0) {
c4ec2128 95 bp->b_blkno = dblkno;
343a57bd
KM
96 if (bp->b_vp)
97 brelvp(bp);
5dccc1f9 98 VHOLD(vp);
343a57bd
KM
99 bp->b_vp = vp;
100 bp->b_dev = vp->v_rdev;
e438ed8e 101 bp->b_bcount = nbytes;
26bd0870
KM
102 if ((bp->b_flags & B_READ) == 0)
103 vp->v_numoutput++;
e438ed8e
BJ
104 minphys(bp);
105 c = bp->b_bcount;
53f9ca20 106#ifdef TRACE
c4ec2128 107 trace(TR_SWAPIO, vp, bp->b_blkno);
cd682858 108#endif
d293217c 109#if defined(hp300) || defined(i386)
cd682858 110 vmapbuf(bp);
53f9ca20 111#endif
c4ec2128 112 VOP_STRATEGY(bp);
c5648f55 113 /* pageout daemon doesn't wait for pushed pages */
663dbc72
BJ
114 if (flag & B_DIRTY) {
115 if (c < nbytes)
116 panic("big push");
ec67a3ce 117 return (0);
663dbc72 118 }
d293217c 119#if defined(hp300) || defined(i386)
cd682858
KM
120 vunmapbuf(bp);
121#endif
663dbc72
BJ
122 bp->b_un.b_addr += c;
123 bp->b_flags &= ~B_DONE;
124 if (bp->b_flags & B_ERROR) {
125 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
126 panic("hard IO err in swap");
d03b3d84 127 swkill(p, "swap: read error from swap device");
699e2902 128 error = EIO;
663dbc72
BJ
129 }
130 nbytes -= c;
ec67a3ce
MK
131#ifdef SECSIZE
132 if (flag & B_PGIN && nbytes > 0)
133 panic("big pgin");
134#endif SECSIZE
919fe934 135 dblkno += btodb(c);
663dbc72 136 }
663dbc72 137 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
c5648f55 138 freeswbuf(bp);
699e2902 139 return (error);
663dbc72
BJ
140}
141
d668d9ba
SL
142/*
143 * Put a buffer on the clean list after I/O is done.
144 * Called from biodone.
145 */
146swdone(bp)
147 register struct buf *bp;
148{
149 register int s;
150
151 if (bp->b_flags & B_ERROR)
152 panic("IO err in push");
d95fc990 153 s = splbio();
d668d9ba
SL
154 bp->av_forw = bclnlist;
155 cnt.v_pgout++;
156 cnt.v_pgpgout += bp->b_bcount / NBPG;
157 bclnlist = bp;
158 if (bswlist.b_flags & B_WANTED)
159 wakeup((caddr_t)&proc[2]);
d293217c 160#if defined(hp300) || defined(i386)
cd682858
KM
161 vunmapbuf(bp);
162#endif
d668d9ba
SL
163 splx(s);
164}
165
663dbc72
BJ
166/*
167 * If rout == 0 then killed on swap error, else
168 * rout is the name of the routine where we ran out of
169 * swap space.
170 */
171swkill(p, rout)
172 struct proc *p;
173 char *rout;
174{
175
7cd10076
JB
176 printf("pid %d: %s\n", p->p_pid, rout);
177 uprintf("sorry, pid %d was killed in %s\n", p->p_pid, rout);
663dbc72
BJ
178 /*
179 * To be sure no looping (e.g. in vmsched trying to
180 * swap out) mark process locked in core (as though
181 * done by user) after killing it so noone will try
182 * to swap it out.
183 */
a30d2e97 184 psignal(p, SIGKILL);
663dbc72
BJ
185 p->p_flag |= SULOCK;
186}
187
663dbc72
BJ
188/*
189 * Raw I/O. The arguments are
190 * The strategy routine for the device
c5648f55
KB
191 * A buffer, which will either be a special buffer header owned
192 * exclusively by the device for this purpose, or NULL,
193 * indicating that we should use a swap buffer
663dbc72
BJ
194 * The device number
195 * Read/write flag
196 * Essentially all the work is computing physical addresses and
197 * validating them.
198 * If the user has the proper access privilidges, the process is
199 * marked 'delayed unlock' and the pages involved in the I/O are
200 * faulted and locked. After the completion of the I/O, the above pages
201 * are unlocked.
202 */
d6d7360b
BJ
203physio(strat, bp, dev, rw, mincnt, uio)
204 int (*strat)();
205 register struct buf *bp;
206 dev_t dev;
207 int rw;
c5648f55 208 u_int (*mincnt)();
d6d7360b 209 struct uio *uio;
663dbc72 210{
a196746e 211 register struct iovec *iov;
58c3cad7 212 register int requested, done;
663dbc72 213 char *a;
c5648f55
KB
214 int s, allocbuf = 0, error = 0;
215 struct buf *getswbuf();
ec67a3ce
MK
216#ifdef SECSIZE
217 int bsize;
218 struct partinfo dpart;
219#endif SECSIZE
663dbc72 220
ec67a3ce
MK
221#ifdef SECSIZE
222 if ((unsigned)major(dev) < nchrdev &&
223 (*cdevsw[major(dev)].d_ioctl)(dev, DIOCGPART, (caddr_t)&dpart,
224 FREAD) == 0)
225 bsize = dpart.disklab->d_secsize;
226 else
227 bsize = DEV_BSIZE;
228#endif SECSIZE
229 for (;;) {
230 if (uio->uio_iovcnt == 0)
231 return (0);
232 iov = uio->uio_iov;
233 if (useracc(iov->iov_base, (u_int)iov->iov_len,
234 rw==B_READ? B_WRITE : B_READ) == NULL)
235 return (EFAULT);
236 s = splbio();
237 while (bp->b_flags&B_BUSY) {
238 bp->b_flags |= B_WANTED;
239 sleep((caddr_t)bp, PRIBIO+1);
240 }
c5648f55
KB
241 if (!allocbuf) { /* only if sharing caller's buffer */
242 s = splbio();
243 while (bp->b_flags&B_BUSY) {
244 bp->b_flags |= B_WANTED;
245 sleep((caddr_t)bp, PRIBIO+1);
246 }
247 splx(s);
248 }
ec67a3ce
MK
249 bp->b_error = 0;
250 bp->b_proc = u.u_procp;
251#ifdef SECSIZE
252 bp->b_blksize = bsize;
253#endif SECSIZE
254 bp->b_un.b_addr = iov->iov_base;
255 while (iov->iov_len > 0) {
256 bp->b_flags = B_BUSY | B_PHYS | rw;
257 bp->b_dev = dev;
258#ifdef SECSIZE
259 bp->b_blkno = uio->uio_offset / bsize;
260#else SECSIZE
261 bp->b_blkno = btodb(uio->uio_offset);
262#endif SECSIZE
263 bp->b_bcount = iov->iov_len;
264 (*mincnt)(bp);
265 c = bp->b_bcount;
266 u.u_procp->p_flag |= SPHYSIO;
267 vslock(a = bp->b_un.b_addr, c);
268 physstrat(bp, strat, PRIBIO);
269 (void) splbio();
270 vsunlock(a, c, rw);
271 u.u_procp->p_flag &= ~SPHYSIO;
272 if (bp->b_flags&B_WANTED)
273 wakeup((caddr_t)bp);
274 splx(s);
275 c -= bp->b_resid;
276 bp->b_un.b_addr += c;
277 iov->iov_len -= c;
278 uio->uio_resid -= c;
279 uio->uio_offset += c;
280 /* temp kludge for tape drives */
281 if (bp->b_resid || (bp->b_flags&B_ERROR))
282 break;
283 }
284 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
285 error = geterror(bp);
ec67a3ce
MK
286 if (bp->b_resid || error)
287 return (error);
288 uio->uio_iov++;
289 uio->uio_iovcnt--;
663dbc72 290 }
cd682858
KM
291#if defined(hp300)
292 DCIU();
293#endif
c5648f55
KB
294 if (allocbuf)
295 freeswbuf(bp);
296 return (error);
663dbc72
BJ
297}
298
c5648f55 299u_int
663dbc72 300minphys(bp)
d6d7360b 301 struct buf *bp;
663dbc72 302{
35a494b8
SL
303 if (bp->b_bcount > MAXPHYS)
304 bp->b_bcount = MAXPHYS;
663dbc72 305}
c5648f55
KB
306
307static
308struct buf *
309getswbuf(prio)
310 int prio;
311{
312 int s;
313 struct buf *bp;
314
315 s = splbio();
316 while (bswlist.av_forw == NULL) {
317 bswlist.b_flags |= B_WANTED;
318 sleep((caddr_t)&bswlist, prio);
319 }
320 bp = bswlist.av_forw;
321 bswlist.av_forw = bp->av_forw;
322 splx(s);
323 return (bp);
324}
325
326static
327freeswbuf(bp)
328 struct buf *bp;
329{
330 int s;
331
332 s = splbio();
333 bp->av_forw = bswlist.av_forw;
334 bswlist.av_forw = bp;
343a57bd
KM
335 if (bp->b_vp)
336 brelvp(bp);
c5648f55
KB
337 if (bswlist.b_flags & B_WANTED) {
338 bswlist.b_flags &= ~B_WANTED;
339 wakeup((caddr_t)&bswlist);
340 wakeup((caddr_t)&proc[2]);
341 }
342 splx(s);
343}
344
345rawread(dev, uio)
346 dev_t dev;
347 struct uio *uio;
348{
349 return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
350 dev, B_READ, minphys, uio));
351}
352
353rawwrite(dev, uio)
354 dev_t dev;
355 struct uio *uio;
356{
357 return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL,
358 dev, B_WRITE, minphys, uio));
359}