-a replaces -S; appending an option not the default
[unix-history] / usr / src / sys / kern / vfs_bio.c
CommitLineData
668cc26d 1/* vfs_bio.c 4.26 82/03/13 */
663dbc72
BJ
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/dir.h"
6#include "../h/user.h"
7#include "../h/buf.h"
8#include "../h/conf.h"
9#include "../h/proc.h"
10#include "../h/seg.h"
11#include "../h/pte.h"
12#include "../h/vm.h"
973ecc4f 13#include "../h/trace.h"
663dbc72 14
5603d07d
BJ
15/*
16 * The following several routines allocate and free
17 * buffers with various side effects. In general the
18 * arguments to an allocate routine are a device and
19 * a block number, and the value is a pointer to
20 * to the buffer header; the buffer is marked "busy"
21 * so that no one else can touch it. If the block was
22 * already in core, no I/O need be done; if it is
23 * already busy, the process waits until it becomes free.
24 * The following routines allocate a buffer:
25 * getblk
26 * bread
27 * breada
28 * baddr (if it is incore)
29 * Eventually the buffer must be released, possibly with the
30 * side effect of writing it out, by using one of
31 * bwrite
32 * bdwrite
33 * bawrite
34 * brelse
35 */
36
a0eab615
BJ
37struct buf bfreelist[BQUEUES];
38struct buf bswlist, *bclnlist;
39
5603d07d 40#define BUFHSZ 63
46387ee3 41struct bufhd bufhash[BUFHSZ];
337ed2cc
BJ
42#define BUFHASH(dev, dblkno) \
43 ((struct buf *)&bufhash[((int)(dev)+(int)(dblkno)) % BUFHSZ])
5603d07d
BJ
44
45/*
46 * Initialize hash links for buffers.
47 */
48bhinit()
49{
50 register int i;
46387ee3 51 register struct bufhd *bp;
5603d07d 52
46387ee3
BJ
53 for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++)
54 bp->b_forw = bp->b_back = (struct buf *)bp;
5603d07d
BJ
55}
56
663dbc72
BJ
57/* #define DISKMON 1 */
58
59#ifdef DISKMON
60struct {
61 int nbuf;
62 long nread;
63 long nreada;
64 long ncache;
65 long nwrite;
4c05b581 66 long bufcount[64];
663dbc72
BJ
67} io_info;
68#endif
69
70/*
71 * Swap IO headers -
72 * They contain the necessary information for the swap I/O.
73 * At any given time, a swap header can be in three
74 * different lists. When free it is in the free list,
75 * when allocated and the I/O queued, it is on the swap
76 * device list, and finally, if the operation was a dirty
77 * page push, when the I/O completes, it is inserted
78 * in a list of cleaned pages to be processed by the pageout daemon.
79 */
4c05b581
BJ
80struct buf *swbuf;
81short *swsize; /* CAN WE JUST USE B_BCOUNT? */
82int *swpf;
663dbc72 83
663dbc72 84
443c8066 85#ifndef UNFAST
663dbc72
BJ
86#define notavail(bp) \
87{ \
668cc26d 88 int x = spl6(); \
663dbc72
BJ
89 (bp)->av_back->av_forw = (bp)->av_forw; \
90 (bp)->av_forw->av_back = (bp)->av_back; \
91 (bp)->b_flags |= B_BUSY; \
668cc26d 92 splx(x); \
663dbc72
BJ
93}
94#endif
95
96/*
97 * Read in (if necessary) the block and return a buffer pointer.
98 */
99struct buf *
100bread(dev, blkno)
101dev_t dev;
102daddr_t blkno;
103{
104 register struct buf *bp;
105
106 bp = getblk(dev, blkno);
107 if (bp->b_flags&B_DONE) {
15f77b9b
BJ
108#ifdef TRACE
109 trace(TR_BREADHIT, dev, blkno);
973ecc4f 110#endif
663dbc72
BJ
111#ifdef DISKMON
112 io_info.ncache++;
113#endif
114 return(bp);
115 }
116 bp->b_flags |= B_READ;
117 bp->b_bcount = BSIZE;
118 (*bdevsw[major(dev)].d_strategy)(bp);
15f77b9b
BJ
119#ifdef TRACE
120 trace(TR_BREADMISS, dev, blkno);
973ecc4f 121#endif
663dbc72
BJ
122#ifdef DISKMON
123 io_info.nread++;
124#endif
125 u.u_vm.vm_inblk++; /* pay for read */
126 iowait(bp);
127 return(bp);
128}
129
130/*
131 * Read in the block, like bread, but also start I/O on the
132 * read-ahead block (which is not allocated to the caller)
133 */
134struct buf *
135breada(dev, blkno, rablkno)
136dev_t dev;
137daddr_t blkno, rablkno;
138{
139 register struct buf *bp, *rabp;
140
141 bp = NULL;
142 if (!incore(dev, blkno)) {
143 bp = getblk(dev, blkno);
144 if ((bp->b_flags&B_DONE) == 0) {
145 bp->b_flags |= B_READ;
146 bp->b_bcount = BSIZE;
147 (*bdevsw[major(dev)].d_strategy)(bp);
15f77b9b
BJ
148#ifdef TRACE
149 trace(TR_BREADMISS, dev, blkno);
973ecc4f 150#endif
663dbc72
BJ
151#ifdef DISKMON
152 io_info.nread++;
153#endif
154 u.u_vm.vm_inblk++; /* pay for read */
155 }
15f77b9b 156#ifdef TRACE
973ecc4f 157 else
15f77b9b 158 trace(TR_BREADHIT, dev, blkno);
973ecc4f 159#endif
663dbc72
BJ
160 }
161 if (rablkno && !incore(dev, rablkno)) {
162 rabp = getblk(dev, rablkno);
973ecc4f 163 if (rabp->b_flags & B_DONE) {
663dbc72 164 brelse(rabp);
15f77b9b
BJ
165#ifdef TRACE
166 trace(TR_BREADHITRA, dev, blkno);
973ecc4f
BJ
167#endif
168 } else {
663dbc72
BJ
169 rabp->b_flags |= B_READ|B_ASYNC;
170 rabp->b_bcount = BSIZE;
171 (*bdevsw[major(dev)].d_strategy)(rabp);
15f77b9b
BJ
172#ifdef TRACE
173 trace(TR_BREADMISSRA, dev, rablock);
973ecc4f 174#endif
663dbc72
BJ
175#ifdef DISKMON
176 io_info.nreada++;
177#endif
178 u.u_vm.vm_inblk++; /* pay in advance */
179 }
180 }
181 if(bp == NULL)
182 return(bread(dev, blkno));
183 iowait(bp);
184 return(bp);
185}
186
187/*
188 * Write the buffer, waiting for completion.
189 * Then release the buffer.
190 */
191bwrite(bp)
192register struct buf *bp;
193{
194 register flag;
195
196 flag = bp->b_flags;
197 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
198 bp->b_bcount = BSIZE;
199#ifdef DISKMON
200 io_info.nwrite++;
201#endif
202 if ((flag&B_DELWRI) == 0)
203 u.u_vm.vm_oublk++; /* noone paid yet */
15f77b9b 204#ifdef TRACE
53f9ca20 205 trace(TR_BWRITE, bp->b_dev, bp->b_blkno);
973ecc4f 206#endif
663dbc72
BJ
207 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
208 if ((flag&B_ASYNC) == 0) {
209 iowait(bp);
210 brelse(bp);
211 } else if (flag & B_DELWRI)
212 bp->b_flags |= B_AGE;
213 else
214 geterror(bp);
215}
216
217/*
218 * Release the buffer, marking it so that if it is grabbed
219 * for another purpose it will be written out before being
220 * given up (e.g. when writing a partial block where it is
221 * assumed that another write for the same block will soon follow).
222 * This can't be done for magtape, since writes must be done
223 * in the same order as requested.
224 */
225bdwrite(bp)
226register struct buf *bp;
227{
e1e57888 228 register int flags;
663dbc72
BJ
229
230 if ((bp->b_flags&B_DELWRI) == 0)
231 u.u_vm.vm_oublk++; /* noone paid yet */
e1e57888
RE
232 flags = bdevsw[major(bp->b_dev)].d_flags;
233 if(flags & B_TAPE)
663dbc72
BJ
234 bawrite(bp);
235 else {
236 bp->b_flags |= B_DELWRI | B_DONE;
237 brelse(bp);
238 }
239}
240
241/*
242 * Release the buffer, start I/O on it, but don't wait for completion.
243 */
244bawrite(bp)
245register struct buf *bp;
246{
247
248 bp->b_flags |= B_ASYNC;
249 bwrite(bp);
250}
251
252/*
253 * release the buffer, with no I/O implied.
254 */
255brelse(bp)
256register struct buf *bp;
257{
46387ee3 258 register struct buf *flist;
663dbc72
BJ
259 register s;
260
261 if (bp->b_flags&B_WANTED)
262 wakeup((caddr_t)bp);
46387ee3
BJ
263 if (bfreelist[0].b_flags&B_WANTED) {
264 bfreelist[0].b_flags &= ~B_WANTED;
265 wakeup((caddr_t)bfreelist);
663dbc72 266 }
60a71525
BJ
267 if (bp->b_flags&B_ERROR)
268 if (bp->b_flags & B_LOCKED)
269 bp->b_flags &= ~B_ERROR; /* try again later */
270 else
271 bp->b_dev = NODEV; /* no assoc */
663dbc72 272 s = spl6();
46387ee3
BJ
273 if (bp->b_flags & (B_ERROR|B_INVAL)) {
274 /* block has no info ... put at front of most free list */
275 flist = &bfreelist[BQUEUES-1];
276 flist->av_forw->av_back = bp;
277 bp->av_forw = flist->av_forw;
278 flist->av_forw = bp;
279 bp->av_back = flist;
663dbc72 280 } else {
46387ee3
BJ
281 if (bp->b_flags & B_LOCKED)
282 flist = &bfreelist[BQ_LOCKED];
283 else if (bp->b_flags & B_AGE)
284 flist = &bfreelist[BQ_AGE];
285 else
286 flist = &bfreelist[BQ_LRU];
287 flist->av_back->av_forw = bp;
288 bp->av_back = flist->av_back;
289 flist->av_back = bp;
290 bp->av_forw = flist;
663dbc72
BJ
291 }
292 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
293 splx(s);
294}
295
296/*
297 * See if the block is associated with some buffer
298 * (mainly to avoid getting hung up on a wait in breada)
299 */
300incore(dev, blkno)
301dev_t dev;
302daddr_t blkno;
303{
304 register struct buf *bp;
46387ee3 305 register struct buf *dp;
663dbc72
BJ
306 register int dblkno = fsbtodb(blkno);
307
337ed2cc 308 dp = BUFHASH(dev, dblkno);
46387ee3
BJ
309 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
310 if (bp->b_blkno == dblkno && bp->b_dev == dev &&
311 !(bp->b_flags & B_INVAL))
5603d07d 312 return (1);
5603d07d 313 return (0);
663dbc72
BJ
314}
315
316struct buf *
317baddr(dev, blkno)
318dev_t dev;
319daddr_t blkno;
320{
321
322 if (incore(dev, blkno))
323 return (bread(dev, blkno));
324 return (0);
325}
326
327/*
328 * Assign a buffer for the given block. If the appropriate
329 * block is already associated, return it; otherwise search
330 * for the oldest non-busy buffer and reassign it.
23900030
BJ
331 *
332 * We use splx here because this routine may be called
333 * on the interrupt stack during a dump, and we don't
334 * want to lower the ipl back to 0.
663dbc72
BJ
335 */
336struct buf *
337getblk(dev, blkno)
338dev_t dev;
339daddr_t blkno;
340{
5603d07d 341 register struct buf *bp, *dp, *ep;
46387ee3 342 register int dblkno = fsbtodb(blkno);
5aa9d5ea
RE
343#ifdef DISKMON
344 register int i;
345#endif
23900030 346 int s;
663dbc72 347
01659974
BJ
348 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
349 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
350 dblkno = fsbtodb(blkno);
46387ee3 351 dp = BUFHASH(dev, dblkno);
663dbc72 352 loop:
46387ee3
BJ
353 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
354 if (bp->b_blkno != dblkno || bp->b_dev != dev ||
355 bp->b_flags&B_INVAL)
663dbc72 356 continue;
23900030 357 s = spl6();
663dbc72
BJ
358 if (bp->b_flags&B_BUSY) {
359 bp->b_flags |= B_WANTED;
360 sleep((caddr_t)bp, PRIBIO+1);
23900030 361 splx(s);
663dbc72
BJ
362 goto loop;
363 }
23900030 364 splx(s);
663dbc72
BJ
365#ifdef DISKMON
366 i = 0;
367 dp = bp->av_forw;
46387ee3 368 while ((dp->b_flags & B_HEAD) == 0) {
663dbc72
BJ
369 i++;
370 dp = dp->av_forw;
371 }
4c05b581 372 if (i<64)
663dbc72
BJ
373 io_info.bufcount[i]++;
374#endif
375 notavail(bp);
376 bp->b_flags |= B_CACHE;
377 return(bp);
378 }
5603d07d
BJ
379 if (major(dev) >= nblkdev)
380 panic("blkdev");
23900030 381 s = spl6();
46387ee3
BJ
382 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--)
383 if (ep->av_forw != ep)
384 break;
385 if (ep == bfreelist) { /* no free blocks at all */
386 ep->b_flags |= B_WANTED;
387 sleep((caddr_t)ep, PRIBIO+1);
23900030 388 splx(s);
663dbc72
BJ
389 goto loop;
390 }
23900030 391 splx(s);
46387ee3 392 bp = ep->av_forw;
663dbc72
BJ
393 notavail(bp);
394 if (bp->b_flags & B_DELWRI) {
395 bp->b_flags |= B_ASYNC;
396 bwrite(bp);
397 goto loop;
398 }
15f77b9b 399#ifdef TRACE
53f9ca20 400 trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
973ecc4f 401#endif
663dbc72
BJ
402 bp->b_flags = B_BUSY;
403 bp->b_back->b_forw = bp->b_forw;
404 bp->b_forw->b_back = bp->b_back;
405 bp->b_forw = dp->b_forw;
406 bp->b_back = dp;
407 dp->b_forw->b_back = bp;
408 dp->b_forw = bp;
409 bp->b_dev = dev;
410 bp->b_blkno = dblkno;
411 return(bp);
412}
413
414/*
415 * get an empty block,
416 * not assigned to any particular device
417 */
418struct buf *
419geteblk()
420{
436518b9 421 register struct buf *bp, *dp;
530d0032 422 int s;
663dbc72
BJ
423
424loop:
530d0032 425 s = spl6();
46387ee3
BJ
426 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--)
427 if (dp->av_forw != dp)
428 break;
429 if (dp == bfreelist) { /* no free blocks */
430 dp->b_flags |= B_WANTED;
431 sleep((caddr_t)dp, PRIBIO+1);
432 goto loop;
663dbc72 433 }
530d0032 434 splx(s);
46387ee3 435 bp = dp->av_forw;
663dbc72
BJ
436 notavail(bp);
437 if (bp->b_flags & B_DELWRI) {
438 bp->b_flags |= B_ASYNC;
439 bwrite(bp);
440 goto loop;
441 }
15f77b9b 442#ifdef TRACE
53f9ca20 443 trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
973ecc4f 444#endif
46387ee3 445 bp->b_flags = B_BUSY|B_INVAL;
663dbc72
BJ
446 bp->b_back->b_forw = bp->b_forw;
447 bp->b_forw->b_back = bp->b_back;
448 bp->b_forw = dp->b_forw;
449 bp->b_back = dp;
450 dp->b_forw->b_back = bp;
451 dp->b_forw = bp;
452 bp->b_dev = (dev_t)NODEV;
453 return(bp);
454}
455
456/*
457 * Wait for I/O completion on the buffer; return errors
458 * to the user.
459 */
460iowait(bp)
461register struct buf *bp;
462{
530d0032 463 int s;
663dbc72 464
530d0032 465 s = spl6();
663dbc72
BJ
466 while ((bp->b_flags&B_DONE)==0)
467 sleep((caddr_t)bp, PRIBIO);
530d0032 468 splx(s);
663dbc72
BJ
469 geterror(bp);
470}
471
443c8066 472#ifdef UNFAST
663dbc72
BJ
473/*
474 * Unlink a buffer from the available list and mark it busy.
475 * (internal interface)
476 */
477notavail(bp)
478register struct buf *bp;
479{
480 register s;
481
482 s = spl6();
483 bp->av_back->av_forw = bp->av_forw;
484 bp->av_forw->av_back = bp->av_back;
485 bp->b_flags |= B_BUSY;
486 splx(s);
487}
488#endif
489
490/*
491 * Mark I/O complete on a buffer. If the header
492 * indicates a dirty page push completion, the
493 * header is inserted into the ``cleaned'' list
494 * to be processed by the pageout daemon. Otherwise
495 * release it if I/O is asynchronous, and wake
496 * up anyone waiting for it.
497 */
498iodone(bp)
499register struct buf *bp;
500{
501 register int s;
502
80e7c811
BJ
503 if (bp->b_flags & B_DONE)
504 panic("dup iodone");
663dbc72
BJ
505 bp->b_flags |= B_DONE;
506 if (bp->b_flags & B_DIRTY) {
507 if (bp->b_flags & B_ERROR)
508 panic("IO err in push");
509 s = spl6();
663dbc72
BJ
510 bp->av_forw = bclnlist;
511 bp->b_bcount = swsize[bp - swbuf];
512 bp->b_pfcent = swpf[bp - swbuf];
796c66c0
BJ
513 cnt.v_pgout++;
514 cnt.v_pgpgout += bp->b_bcount / NBPG;
663dbc72
BJ
515 bclnlist = bp;
516 if (bswlist.b_flags & B_WANTED)
517 wakeup((caddr_t)&proc[2]);
518 splx(s);
a3ee1d55 519 return;
663dbc72
BJ
520 }
521 if (bp->b_flags&B_ASYNC)
522 brelse(bp);
523 else {
524 bp->b_flags &= ~B_WANTED;
525 wakeup((caddr_t)bp);
526 }
527}
528
529/*
530 * Zero the core associated with a buffer.
531 */
532clrbuf(bp)
533struct buf *bp;
534{
535 register *p;
536 register c;
537
538 p = bp->b_un.b_words;
539 c = BSIZE/sizeof(int);
540 do
541 *p++ = 0;
542 while (--c);
543 bp->b_resid = 0;
544}
545
546/*
547 * swap I/O -
548 *
549 * If the flag indicates a dirty page push initiated
550 * by the pageout daemon, we map the page into the i th
551 * virtual page of process 2 (the daemon itself) where i is
552 * the index of the swap header that has been allocated.
553 * We simply initialize the header and queue the I/O but
554 * do not wait for completion. When the I/O completes,
555 * iodone() will link the header to a list of cleaned
556 * pages to be processed by the pageout daemon.
557 */
558swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
559 struct proc *p;
560 swblk_t dblkno;
561 caddr_t addr;
562 int flag, nbytes;
563 dev_t dev;
564 unsigned pfcent;
565{
566 register struct buf *bp;
567 register int c;
568 int p2dp;
569 register struct pte *dpte, *vpte;
530d0032 570 int s;
663dbc72 571
530d0032 572 s = spl6();
663dbc72
BJ
573 while (bswlist.av_forw == NULL) {
574 bswlist.b_flags |= B_WANTED;
575 sleep((caddr_t)&bswlist, PSWP+1);
576 }
577 bp = bswlist.av_forw;
578 bswlist.av_forw = bp->av_forw;
530d0032 579 splx(s);
663dbc72
BJ
580
581 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
582 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
583 if (rdflg == B_READ)
584 sum.v_pswpin += btoc(nbytes);
585 else
586 sum.v_pswpout += btoc(nbytes);
587 bp->b_proc = p;
588 if (flag & B_DIRTY) {
589 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
590 dpte = dptopte(&proc[2], p2dp);
591 vpte = vtopte(p, btop(addr));
592 for (c = 0; c < nbytes; c += NBPG) {
593 if (vpte->pg_pfnum == 0 || vpte->pg_fod)
594 panic("swap bad pte");
595 *dpte++ = *vpte++;
596 }
597 bp->b_un.b_addr = (caddr_t)ctob(p2dp);
598 } else
599 bp->b_un.b_addr = addr;
600 while (nbytes > 0) {
601 c = imin(ctob(120), nbytes);
602 bp->b_bcount = c;
603 bp->b_blkno = dblkno;
604 bp->b_dev = dev;
d2f87136
BJ
605 if (flag & B_DIRTY) {
606 swpf[bp - swbuf] = pfcent;
607 swsize[bp - swbuf] = nbytes;
608 }
53f9ca20
BJ
609#ifdef TRACE
610 trace(TR_SWAPIO, dev, bp->b_blkno);
611#endif
663dbc72
BJ
612 (*bdevsw[major(dev)].d_strategy)(bp);
613 if (flag & B_DIRTY) {
614 if (c < nbytes)
615 panic("big push");
663dbc72
BJ
616 return;
617 }
530d0032 618 s = spl6();
663dbc72
BJ
619 while((bp->b_flags&B_DONE)==0)
620 sleep((caddr_t)bp, PSWP);
530d0032 621 splx(s);
663dbc72
BJ
622 bp->b_un.b_addr += c;
623 bp->b_flags &= ~B_DONE;
624 if (bp->b_flags & B_ERROR) {
625 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
626 panic("hard IO err in swap");
627 swkill(p, (char *)0);
628 }
629 nbytes -= c;
630 dblkno += btoc(c);
631 }
530d0032 632 s = spl6();
663dbc72
BJ
633 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
634 bp->av_forw = bswlist.av_forw;
635 bswlist.av_forw = bp;
636 if (bswlist.b_flags & B_WANTED) {
637 bswlist.b_flags &= ~B_WANTED;
638 wakeup((caddr_t)&bswlist);
639 wakeup((caddr_t)&proc[2]);
640 }
530d0032 641 splx(s);
663dbc72
BJ
642}
643
644/*
645 * If rout == 0 then killed on swap error, else
646 * rout is the name of the routine where we ran out of
647 * swap space.
648 */
649swkill(p, rout)
650 struct proc *p;
651 char *rout;
652{
444f631c 653 char *mesg;
663dbc72 654
444f631c 655 printf("pid %d: ", p->p_pid);
663dbc72 656 if (rout)
444f631c 657 printf(mesg = "killed due to no swap space\n");
663dbc72 658 else
444f631c
BJ
659 printf(mesg = "killed on swap error\n");
660 uprintf("sorry, pid %d was %s", p->p_pid, mesg);
663dbc72
BJ
661 /*
662 * To be sure no looping (e.g. in vmsched trying to
663 * swap out) mark process locked in core (as though
664 * done by user) after killing it so noone will try
665 * to swap it out.
666 */
a30d2e97 667 psignal(p, SIGKILL);
663dbc72
BJ
668 p->p_flag |= SULOCK;
669}
670
671/*
672 * make sure all write-behind blocks
673 * on dev (or NODEV for all)
674 * are flushed out.
675 * (from umount and update)
676 */
677bflush(dev)
678dev_t dev;
679{
680 register struct buf *bp;
46387ee3 681 register struct buf *flist;
530d0032 682 int s;
663dbc72
BJ
683
684loop:
530d0032 685 s = spl6();
46387ee3
BJ
686 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++)
687 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
663dbc72
BJ
688 if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
689 bp->b_flags |= B_ASYNC;
690 notavail(bp);
691 bwrite(bp);
692 goto loop;
693 }
694 }
530d0032 695 splx(s);
663dbc72
BJ
696}
697
698/*
699 * Raw I/O. The arguments are
700 * The strategy routine for the device
701 * A buffer, which will always be a special buffer
702 * header owned exclusively by the device for this purpose
703 * The device number
704 * Read/write flag
705 * Essentially all the work is computing physical addresses and
706 * validating them.
707 * If the user has the proper access privilidges, the process is
708 * marked 'delayed unlock' and the pages involved in the I/O are
709 * faulted and locked. After the completion of the I/O, the above pages
710 * are unlocked.
711 */
712physio(strat, bp, dev, rw, mincnt)
713int (*strat)();
714register struct buf *bp;
715unsigned (*mincnt)();
716{
717 register int c;
718 char *a;
530d0032 719 int s;
663dbc72
BJ
720
721 if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
722 u.u_error = EFAULT;
723 return;
724 }
530d0032 725 s = spl6();
663dbc72
BJ
726 while (bp->b_flags&B_BUSY) {
727 bp->b_flags |= B_WANTED;
728 sleep((caddr_t)bp, PRIBIO+1);
729 }
730 bp->b_error = 0;
731 bp->b_proc = u.u_procp;
732 bp->b_un.b_addr = u.u_base;
52a593fa 733 while (u.u_count != 0) {
663dbc72
BJ
734 bp->b_flags = B_BUSY | B_PHYS | rw;
735 bp->b_dev = dev;
736 bp->b_blkno = u.u_offset >> PGSHIFT;
737 bp->b_bcount = u.u_count;
738 (*mincnt)(bp);
739 c = bp->b_bcount;
740 u.u_procp->p_flag |= SPHYSIO;
741 vslock(a = bp->b_un.b_addr, c);
742 (*strat)(bp);
81263dba 743 (void) spl6();
663dbc72
BJ
744 while ((bp->b_flags&B_DONE) == 0)
745 sleep((caddr_t)bp, PRIBIO);
746 vsunlock(a, c, rw);
747 u.u_procp->p_flag &= ~SPHYSIO;
748 if (bp->b_flags&B_WANTED)
749 wakeup((caddr_t)bp);
530d0032 750 splx(s);
663dbc72
BJ
751 bp->b_un.b_addr += c;
752 u.u_count -= c;
753 u.u_offset += c;
52a593fa
BJ
754 if (bp->b_flags&B_ERROR)
755 break;
663dbc72
BJ
756 }
757 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
758 u.u_count = bp->b_resid;
759 geterror(bp);
760}
761
762/*ARGSUSED*/
763unsigned
764minphys(bp)
765struct buf *bp;
766{
767
768 if (bp->b_bcount > 60 * 1024)
769 bp->b_bcount = 60 * 1024;
770}
771
772/*
773 * Pick up the device's error number and pass it to the user;
774 * if there is an error but the number is 0 set a generalized
775 * code. Actually the latter is always true because devices
776 * don't yet return specific errors.
777 */
778geterror(bp)
779register struct buf *bp;
780{
781
782 if (bp->b_flags&B_ERROR)
783 if ((u.u_error = bp->b_error)==0)
784 u.u_error = EIO;
785}
7b8b5a01
RE
786
787/*
788 * Invalidate in core blocks belonging to closed or umounted filesystem
789 *
790 * This is not nicely done at all - the buffer ought to be removed from the
791 * hash chains & have its dev/blkno fields clobbered, but unfortunately we
792 * can't do that here, as it is quite possible that the block is still
793 * being used for i/o. Eventually, all disc drivers should be forced to
794 * have a close routine, which ought ensure that the queue is empty, then
795 * properly flush the queues. Until that happy day, this suffices for
796 * correctness. ... kre
797 */
798binval(dev)
799dev_t dev;
800{
634ebdbe
RE
801 register struct buf *bp;
802 register struct bufhd *hp;
803#define dp ((struct buf *)hp)
7b8b5a01 804
634ebdbe
RE
805 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
806 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
807 if (bp->b_dev == dev)
808 bp->b_flags |= B_INVAL;
7b8b5a01 809}