buffer invalidate routine and checks for B_INVAL flag
[unix-history] / usr / src / sys / kern / vfs_bio.c
CommitLineData
7b8b5a01 1/* vfs_bio.c 4.5 %G% */
663dbc72
BJ
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/dir.h"
6#include "../h/user.h"
7#include "../h/buf.h"
8#include "../h/conf.h"
9#include "../h/proc.h"
10#include "../h/seg.h"
11#include "../h/pte.h"
12#include "../h/vm.h"
973ecc4f 13#include "../h/trace.h"
663dbc72 14
5603d07d
BJ
15/*
16 * The following several routines allocate and free
17 * buffers with various side effects. In general the
18 * arguments to an allocate routine are a device and
19 * a block number, and the value is a pointer to
20 * to the buffer header; the buffer is marked "busy"
21 * so that no one else can touch it. If the block was
22 * already in core, no I/O need be done; if it is
23 * already busy, the process waits until it becomes free.
24 * The following routines allocate a buffer:
25 * getblk
26 * bread
27 * breada
28 * baddr (if it is incore)
29 * Eventually the buffer must be released, possibly with the
30 * side effect of writing it out, by using one of
31 * bwrite
32 * bdwrite
33 * bawrite
34 * brelse
35 */
36
37#define BUFHSZ 63
38#define BUFHASH(blkno) (blkno % BUFHSZ)
39short bufhash[BUFHSZ];
40
41/*
42 * Initialize hash links for buffers.
43 */
44bhinit()
45{
46 register int i;
47
48 for (i = 0; i < BUFHSZ; i++)
49 bufhash[i] = -1;
50}
51
663dbc72
BJ
52/* #define DISKMON 1 */
53
54#ifdef DISKMON
55struct {
56 int nbuf;
57 long nread;
58 long nreada;
59 long ncache;
60 long nwrite;
61 long bufcount[NBUF];
62} io_info;
63#endif
64
65/*
66 * Swap IO headers -
67 * They contain the necessary information for the swap I/O.
68 * At any given time, a swap header can be in three
69 * different lists. When free it is in the free list,
70 * when allocated and the I/O queued, it is on the swap
71 * device list, and finally, if the operation was a dirty
72 * page push, when the I/O completes, it is inserted
73 * in a list of cleaned pages to be processed by the pageout daemon.
74 */
75struct buf swbuf[NSWBUF];
76short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */
77int swpf[NSWBUF];
78
663dbc72
BJ
79
80#ifdef FASTVAX
81#define notavail(bp) \
82{ \
83 int s = spl6(); \
84 (bp)->av_back->av_forw = (bp)->av_forw; \
85 (bp)->av_forw->av_back = (bp)->av_back; \
86 (bp)->b_flags |= B_BUSY; \
87 splx(s); \
88}
89#endif
90
91/*
92 * Read in (if necessary) the block and return a buffer pointer.
93 */
94struct buf *
95bread(dev, blkno)
96dev_t dev;
97daddr_t blkno;
98{
99 register struct buf *bp;
100
101 bp = getblk(dev, blkno);
102 if (bp->b_flags&B_DONE) {
973ecc4f
BJ
103#ifdef EPAWNJ
104 trace(TR_BREAD|TR_HIT, dev, blkno);
105#endif
663dbc72
BJ
106#ifdef DISKMON
107 io_info.ncache++;
108#endif
109 return(bp);
110 }
111 bp->b_flags |= B_READ;
112 bp->b_bcount = BSIZE;
113 (*bdevsw[major(dev)].d_strategy)(bp);
973ecc4f
BJ
114#ifdef EPAWNJ
115 trace(TR_BREAD|TR_MISS, dev, blkno);
116#endif
663dbc72
BJ
117#ifdef DISKMON
118 io_info.nread++;
119#endif
120 u.u_vm.vm_inblk++; /* pay for read */
121 iowait(bp);
122 return(bp);
123}
124
125/*
126 * Read in the block, like bread, but also start I/O on the
127 * read-ahead block (which is not allocated to the caller)
128 */
129struct buf *
130breada(dev, blkno, rablkno)
131dev_t dev;
132daddr_t blkno, rablkno;
133{
134 register struct buf *bp, *rabp;
135
136 bp = NULL;
137 if (!incore(dev, blkno)) {
138 bp = getblk(dev, blkno);
139 if ((bp->b_flags&B_DONE) == 0) {
140 bp->b_flags |= B_READ;
141 bp->b_bcount = BSIZE;
142 (*bdevsw[major(dev)].d_strategy)(bp);
973ecc4f
BJ
143#ifdef EPAWNJ
144 trace(TR_BREAD|TR_MISS, dev, blkno);
145#endif
663dbc72
BJ
146#ifdef DISKMON
147 io_info.nread++;
148#endif
149 u.u_vm.vm_inblk++; /* pay for read */
150 }
973ecc4f
BJ
151#ifdef EPAWNJ
152 else
153 trace(TR_BREAD|TR_HIT, dev, blkno);
154#endif
663dbc72
BJ
155 }
156 if (rablkno && !incore(dev, rablkno)) {
157 rabp = getblk(dev, rablkno);
973ecc4f 158 if (rabp->b_flags & B_DONE) {
663dbc72 159 brelse(rabp);
973ecc4f
BJ
160#ifdef EPAWNJ
161 trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno);
162#endif
163 } else {
663dbc72
BJ
164 rabp->b_flags |= B_READ|B_ASYNC;
165 rabp->b_bcount = BSIZE;
166 (*bdevsw[major(dev)].d_strategy)(rabp);
973ecc4f
BJ
167#ifdef EPAWNJ
168 trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock);
169#endif
663dbc72
BJ
170#ifdef DISKMON
171 io_info.nreada++;
172#endif
173 u.u_vm.vm_inblk++; /* pay in advance */
174 }
175 }
176 if(bp == NULL)
177 return(bread(dev, blkno));
178 iowait(bp);
179 return(bp);
180}
181
182/*
183 * Write the buffer, waiting for completion.
184 * Then release the buffer.
185 */
186bwrite(bp)
187register struct buf *bp;
188{
189 register flag;
190
191 flag = bp->b_flags;
192 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
193 bp->b_bcount = BSIZE;
194#ifdef DISKMON
195 io_info.nwrite++;
196#endif
197 if ((flag&B_DELWRI) == 0)
198 u.u_vm.vm_oublk++; /* noone paid yet */
973ecc4f
BJ
199#ifdef EPAWNJ
200 trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno));
201#endif
663dbc72
BJ
202 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
203 if ((flag&B_ASYNC) == 0) {
204 iowait(bp);
205 brelse(bp);
206 } else if (flag & B_DELWRI)
207 bp->b_flags |= B_AGE;
208 else
209 geterror(bp);
210}
211
212/*
213 * Release the buffer, marking it so that if it is grabbed
214 * for another purpose it will be written out before being
215 * given up (e.g. when writing a partial block where it is
216 * assumed that another write for the same block will soon follow).
217 * This can't be done for magtape, since writes must be done
218 * in the same order as requested.
219 */
220bdwrite(bp)
221register struct buf *bp;
222{
223 register struct buf *dp;
224
225 if ((bp->b_flags&B_DELWRI) == 0)
226 u.u_vm.vm_oublk++; /* noone paid yet */
227 dp = bdevsw[major(bp->b_dev)].d_tab;
228 if(dp->b_flags & B_TAPE)
229 bawrite(bp);
230 else {
231 bp->b_flags |= B_DELWRI | B_DONE;
232 brelse(bp);
233 }
234}
235
236/*
237 * Release the buffer, start I/O on it, but don't wait for completion.
238 */
239bawrite(bp)
240register struct buf *bp;
241{
242
243 bp->b_flags |= B_ASYNC;
244 bwrite(bp);
245}
246
247/*
248 * release the buffer, with no I/O implied.
249 */
250brelse(bp)
251register struct buf *bp;
252{
253 register struct buf **backp;
254 register s;
255
256 if (bp->b_flags&B_WANTED)
257 wakeup((caddr_t)bp);
258 if (bfreelist.b_flags&B_WANTED) {
259 bfreelist.b_flags &= ~B_WANTED;
260 wakeup((caddr_t)&bfreelist);
261 }
5603d07d
BJ
262 if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) {
263 bunhash(bp);
663dbc72 264 bp->b_dev = NODEV; /* no assoc. on error */
5603d07d 265 }
663dbc72
BJ
266 s = spl6();
267 if(bp->b_flags & (B_AGE|B_ERROR)) {
268 backp = &bfreelist.av_forw;
269 (*backp)->av_back = bp;
270 bp->av_forw = *backp;
271 *backp = bp;
272 bp->av_back = &bfreelist;
273 } else {
274 backp = &bfreelist.av_back;
275 (*backp)->av_forw = bp;
276 bp->av_back = *backp;
277 *backp = bp;
278 bp->av_forw = &bfreelist;
279 }
280 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
281 splx(s);
282}
283
284/*
285 * See if the block is associated with some buffer
286 * (mainly to avoid getting hung up on a wait in breada)
287 */
288incore(dev, blkno)
289dev_t dev;
290daddr_t blkno;
291{
292 register struct buf *bp;
663dbc72
BJ
293 register int dblkno = fsbtodb(blkno);
294
5603d07d
BJ
295 for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
296 bp = &buf[bp->b_hlink])
7b8b5a01
RE
297 if (bp->b_blkno == dblkno && bp->b_dev == dev
298 && !(bp->b_flags & B_INVAL))
5603d07d 299 return (1);
5603d07d 300 return (0);
663dbc72
BJ
301}
302
303struct buf *
304baddr(dev, blkno)
305dev_t dev;
306daddr_t blkno;
307{
308
309 if (incore(dev, blkno))
310 return (bread(dev, blkno));
311 return (0);
312}
313
314/*
315 * Assign a buffer for the given block. If the appropriate
316 * block is already associated, return it; otherwise search
317 * for the oldest non-busy buffer and reassign it.
318 */
319struct buf *
320getblk(dev, blkno)
321dev_t dev;
322daddr_t blkno;
323{
5603d07d 324 register struct buf *bp, *dp, *ep;
01659974 325 register int i, x, dblkno;
663dbc72 326
01659974
BJ
327 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
328 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
329 dblkno = fsbtodb(blkno);
663dbc72 330 loop:
81263dba 331 (void) spl0();
5603d07d
BJ
332 for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
333 bp = &buf[bp->b_hlink]) {
7b8b5a01
RE
334 if (bp->b_blkno != dblkno || bp->b_dev != dev
335 || bp->b_flags & B_INVAL)
663dbc72 336 continue;
81263dba 337 (void) spl6();
663dbc72
BJ
338 if (bp->b_flags&B_BUSY) {
339 bp->b_flags |= B_WANTED;
340 sleep((caddr_t)bp, PRIBIO+1);
341 goto loop;
342 }
81263dba 343 (void) spl0();
663dbc72
BJ
344#ifdef DISKMON
345 i = 0;
346 dp = bp->av_forw;
347 while (dp != &bfreelist) {
348 i++;
349 dp = dp->av_forw;
350 }
351 if (i<NBUF)
352 io_info.bufcount[i]++;
353#endif
354 notavail(bp);
355 bp->b_flags |= B_CACHE;
356 return(bp);
357 }
5603d07d
BJ
358 if (major(dev) >= nblkdev)
359 panic("blkdev");
360 dp = bdevsw[major(dev)].d_tab;
361 if (dp == NULL)
362 panic("devtab");
81263dba 363 (void) spl6();
663dbc72
BJ
364 if (bfreelist.av_forw == &bfreelist) {
365 bfreelist.b_flags |= B_WANTED;
366 sleep((caddr_t)&bfreelist, PRIBIO+1);
367 goto loop;
368 }
283cac0a 369 (void) spl0();
663dbc72
BJ
370 bp = bfreelist.av_forw;
371 notavail(bp);
372 if (bp->b_flags & B_DELWRI) {
373 bp->b_flags |= B_ASYNC;
374 bwrite(bp);
375 goto loop;
376 }
5603d07d
BJ
377 if (bp->b_dev == NODEV)
378 goto done;
379 /* INLINE EXPANSION OF bunhash(bp) */
973ecc4f
BJ
380#ifdef EPAWNJ
381 trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
382#endif
99fae0e8 383 (void) spl6();
5603d07d
BJ
384 i = BUFHASH(dbtofsb(bp->b_blkno));
385 x = bp - buf;
386 if (bufhash[i] == x) {
387 bufhash[i] = bp->b_hlink;
388 } else {
389 for (ep = &buf[bufhash[i]]; ep != &buf[-1];
390 ep = &buf[ep->b_hlink])
391 if (ep->b_hlink == x) {
392 ep->b_hlink = bp->b_hlink;
393 goto done;
394 }
395 panic("getblk");
396 }
397done:
99fae0e8 398 (void) spl0();
5603d07d 399 /* END INLINE EXPANSION */
663dbc72
BJ
400 bp->b_flags = B_BUSY;
401 bp->b_back->b_forw = bp->b_forw;
402 bp->b_forw->b_back = bp->b_back;
403 bp->b_forw = dp->b_forw;
404 bp->b_back = dp;
405 dp->b_forw->b_back = bp;
406 dp->b_forw = bp;
407 bp->b_dev = dev;
408 bp->b_blkno = dblkno;
5603d07d
BJ
409 i = BUFHASH(blkno);
410 bp->b_hlink = bufhash[i];
411 bufhash[i] = bp - buf;
663dbc72
BJ
412 return(bp);
413}
414
415/*
416 * get an empty block,
417 * not assigned to any particular device
418 */
419struct buf *
420geteblk()
421{
436518b9 422 register struct buf *bp, *dp;
663dbc72
BJ
423
424loop:
81263dba 425 (void) spl6();
663dbc72
BJ
426 while (bfreelist.av_forw == &bfreelist) {
427 bfreelist.b_flags |= B_WANTED;
428 sleep((caddr_t)&bfreelist, PRIBIO+1);
429 }
81263dba 430 (void) spl0();
663dbc72
BJ
431 dp = &bfreelist;
432 bp = bfreelist.av_forw;
433 notavail(bp);
434 if (bp->b_flags & B_DELWRI) {
435 bp->b_flags |= B_ASYNC;
436 bwrite(bp);
437 goto loop;
438 }
973ecc4f
BJ
439 if (bp->b_dev != NODEV) {
440#ifdef EPAWNJ
441 trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
442#endif
5603d07d 443 bunhash(bp);
973ecc4f 444 }
663dbc72
BJ
445 bp->b_flags = B_BUSY;
446 bp->b_back->b_forw = bp->b_forw;
447 bp->b_forw->b_back = bp->b_back;
448 bp->b_forw = dp->b_forw;
449 bp->b_back = dp;
450 dp->b_forw->b_back = bp;
451 dp->b_forw = bp;
452 bp->b_dev = (dev_t)NODEV;
5603d07d 453 bp->b_hlink = -1;
663dbc72
BJ
454 return(bp);
455}
456
5603d07d
BJ
457bunhash(bp)
458 register struct buf *bp;
459{
460 register struct buf *ep;
99fae0e8 461 register int i, x, s;
5603d07d
BJ
462
463 if (bp->b_dev == NODEV)
464 return;
99fae0e8 465 s = spl6();
5603d07d
BJ
466 i = BUFHASH(dbtofsb(bp->b_blkno));
467 x = bp - buf;
468 if (bufhash[i] == x) {
469 bufhash[i] = bp->b_hlink;
99fae0e8 470 goto ret;
5603d07d
BJ
471 }
472 for (ep = &buf[bufhash[i]]; ep != &buf[-1];
473 ep = &buf[ep->b_hlink])
474 if (ep->b_hlink == x) {
475 ep->b_hlink = bp->b_hlink;
99fae0e8 476 goto ret;
5603d07d
BJ
477 }
478 panic("bunhash");
99fae0e8
BJ
479ret:
480 splx(s);
5603d07d
BJ
481}
482
663dbc72
BJ
483/*
484 * Wait for I/O completion on the buffer; return errors
485 * to the user.
486 */
487iowait(bp)
488register struct buf *bp;
489{
490
81263dba 491 (void) spl6();
663dbc72
BJ
492 while ((bp->b_flags&B_DONE)==0)
493 sleep((caddr_t)bp, PRIBIO);
81263dba 494 (void) spl0();
663dbc72
BJ
495 geterror(bp);
496}
497
498#ifndef FASTVAX
499/*
500 * Unlink a buffer from the available list and mark it busy.
501 * (internal interface)
502 */
503notavail(bp)
504register struct buf *bp;
505{
506 register s;
507
508 s = spl6();
509 bp->av_back->av_forw = bp->av_forw;
510 bp->av_forw->av_back = bp->av_back;
511 bp->b_flags |= B_BUSY;
512 splx(s);
513}
514#endif
515
516/*
517 * Mark I/O complete on a buffer. If the header
518 * indicates a dirty page push completion, the
519 * header is inserted into the ``cleaned'' list
520 * to be processed by the pageout daemon. Otherwise
521 * release it if I/O is asynchronous, and wake
522 * up anyone waiting for it.
523 */
524iodone(bp)
525register struct buf *bp;
526{
527 register int s;
528
80e7c811
BJ
529 if (bp->b_flags & B_DONE)
530 panic("dup iodone");
663dbc72
BJ
531 bp->b_flags |= B_DONE;
532 if (bp->b_flags & B_DIRTY) {
533 if (bp->b_flags & B_ERROR)
534 panic("IO err in push");
535 s = spl6();
536 cnt.v_pgout++;
537 bp->av_forw = bclnlist;
538 bp->b_bcount = swsize[bp - swbuf];
539 bp->b_pfcent = swpf[bp - swbuf];
540 bclnlist = bp;
541 if (bswlist.b_flags & B_WANTED)
542 wakeup((caddr_t)&proc[2]);
543 splx(s);
a3ee1d55 544 return;
663dbc72
BJ
545 }
546 if (bp->b_flags&B_ASYNC)
547 brelse(bp);
548 else {
549 bp->b_flags &= ~B_WANTED;
550 wakeup((caddr_t)bp);
551 }
552}
553
554/*
555 * Zero the core associated with a buffer.
556 */
557clrbuf(bp)
558struct buf *bp;
559{
560 register *p;
561 register c;
562
563 p = bp->b_un.b_words;
564 c = BSIZE/sizeof(int);
565 do
566 *p++ = 0;
567 while (--c);
568 bp->b_resid = 0;
569}
570
571/*
572 * swap I/O -
573 *
574 * If the flag indicates a dirty page push initiated
575 * by the pageout daemon, we map the page into the i th
576 * virtual page of process 2 (the daemon itself) where i is
577 * the index of the swap header that has been allocated.
578 * We simply initialize the header and queue the I/O but
579 * do not wait for completion. When the I/O completes,
580 * iodone() will link the header to a list of cleaned
581 * pages to be processed by the pageout daemon.
582 */
583swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
584 struct proc *p;
585 swblk_t dblkno;
586 caddr_t addr;
587 int flag, nbytes;
588 dev_t dev;
589 unsigned pfcent;
590{
591 register struct buf *bp;
592 register int c;
593 int p2dp;
594 register struct pte *dpte, *vpte;
595
81263dba 596 (void) spl6();
663dbc72
BJ
597 while (bswlist.av_forw == NULL) {
598 bswlist.b_flags |= B_WANTED;
599 sleep((caddr_t)&bswlist, PSWP+1);
600 }
601 bp = bswlist.av_forw;
602 bswlist.av_forw = bp->av_forw;
81263dba 603 (void) spl0();
663dbc72
BJ
604
605 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
606 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
607 if (rdflg == B_READ)
608 sum.v_pswpin += btoc(nbytes);
609 else
610 sum.v_pswpout += btoc(nbytes);
611 bp->b_proc = p;
612 if (flag & B_DIRTY) {
613 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
614 dpte = dptopte(&proc[2], p2dp);
615 vpte = vtopte(p, btop(addr));
616 for (c = 0; c < nbytes; c += NBPG) {
617 if (vpte->pg_pfnum == 0 || vpte->pg_fod)
618 panic("swap bad pte");
619 *dpte++ = *vpte++;
620 }
621 bp->b_un.b_addr = (caddr_t)ctob(p2dp);
622 } else
623 bp->b_un.b_addr = addr;
624 while (nbytes > 0) {
625 c = imin(ctob(120), nbytes);
626 bp->b_bcount = c;
627 bp->b_blkno = dblkno;
628 bp->b_dev = dev;
d2f87136
BJ
629 if (flag & B_DIRTY) {
630 swpf[bp - swbuf] = pfcent;
631 swsize[bp - swbuf] = nbytes;
632 }
663dbc72
BJ
633 (*bdevsw[major(dev)].d_strategy)(bp);
634 if (flag & B_DIRTY) {
635 if (c < nbytes)
636 panic("big push");
663dbc72
BJ
637 return;
638 }
81263dba 639 (void) spl6();
663dbc72
BJ
640 while((bp->b_flags&B_DONE)==0)
641 sleep((caddr_t)bp, PSWP);
81263dba 642 (void) spl0();
663dbc72
BJ
643 bp->b_un.b_addr += c;
644 bp->b_flags &= ~B_DONE;
645 if (bp->b_flags & B_ERROR) {
646 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
647 panic("hard IO err in swap");
648 swkill(p, (char *)0);
649 }
650 nbytes -= c;
651 dblkno += btoc(c);
652 }
81263dba 653 (void) spl6();
663dbc72
BJ
654 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
655 bp->av_forw = bswlist.av_forw;
656 bswlist.av_forw = bp;
657 if (bswlist.b_flags & B_WANTED) {
658 bswlist.b_flags &= ~B_WANTED;
659 wakeup((caddr_t)&bswlist);
660 wakeup((caddr_t)&proc[2]);
661 }
81263dba 662 (void) spl0();
663dbc72
BJ
663}
664
665/*
666 * If rout == 0 then killed on swap error, else
667 * rout is the name of the routine where we ran out of
668 * swap space.
669 */
670swkill(p, rout)
671 struct proc *p;
672 char *rout;
673{
674
675 printf("%d: ", p->p_pid);
676 if (rout)
677 printf("out of swap space in %s\n", rout);
678 else
679 printf("killed on swap error\n");
680 /*
681 * To be sure no looping (e.g. in vmsched trying to
682 * swap out) mark process locked in core (as though
683 * done by user) after killing it so noone will try
684 * to swap it out.
685 */
a30d2e97 686 psignal(p, SIGKILL);
663dbc72
BJ
687 p->p_flag |= SULOCK;
688}
689
690/*
691 * make sure all write-behind blocks
692 * on dev (or NODEV for all)
693 * are flushed out.
694 * (from umount and update)
695 */
696bflush(dev)
697dev_t dev;
698{
699 register struct buf *bp;
700
701loop:
81263dba 702 (void) spl6();
663dbc72
BJ
703 for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
704 if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
705 bp->b_flags |= B_ASYNC;
706 notavail(bp);
707 bwrite(bp);
708 goto loop;
709 }
710 }
81263dba 711 (void) spl0();
663dbc72
BJ
712}
713
714/*
715 * Raw I/O. The arguments are
716 * The strategy routine for the device
717 * A buffer, which will always be a special buffer
718 * header owned exclusively by the device for this purpose
719 * The device number
720 * Read/write flag
721 * Essentially all the work is computing physical addresses and
722 * validating them.
723 * If the user has the proper access privilidges, the process is
724 * marked 'delayed unlock' and the pages involved in the I/O are
725 * faulted and locked. After the completion of the I/O, the above pages
726 * are unlocked.
727 */
728physio(strat, bp, dev, rw, mincnt)
729int (*strat)();
730register struct buf *bp;
731unsigned (*mincnt)();
732{
733 register int c;
734 char *a;
735
736 if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
737 u.u_error = EFAULT;
738 return;
739 }
81263dba 740 (void) spl6();
663dbc72
BJ
741 while (bp->b_flags&B_BUSY) {
742 bp->b_flags |= B_WANTED;
743 sleep((caddr_t)bp, PRIBIO+1);
744 }
745 bp->b_error = 0;
746 bp->b_proc = u.u_procp;
747 bp->b_un.b_addr = u.u_base;
748 while (u.u_count != 0 && bp->b_error==0) {
749 bp->b_flags = B_BUSY | B_PHYS | rw;
750 bp->b_dev = dev;
751 bp->b_blkno = u.u_offset >> PGSHIFT;
752 bp->b_bcount = u.u_count;
753 (*mincnt)(bp);
754 c = bp->b_bcount;
755 u.u_procp->p_flag |= SPHYSIO;
756 vslock(a = bp->b_un.b_addr, c);
757 (*strat)(bp);
81263dba 758 (void) spl6();
663dbc72
BJ
759 while ((bp->b_flags&B_DONE) == 0)
760 sleep((caddr_t)bp, PRIBIO);
761 vsunlock(a, c, rw);
762 u.u_procp->p_flag &= ~SPHYSIO;
763 if (bp->b_flags&B_WANTED)
764 wakeup((caddr_t)bp);
81263dba 765 (void) spl0();
663dbc72
BJ
766 bp->b_un.b_addr += c;
767 u.u_count -= c;
768 u.u_offset += c;
769 }
770 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
771 u.u_count = bp->b_resid;
772 geterror(bp);
773}
774
775/*ARGSUSED*/
776unsigned
777minphys(bp)
778struct buf *bp;
779{
780
781 if (bp->b_bcount > 60 * 1024)
782 bp->b_bcount = 60 * 1024;
783}
784
785/*
786 * Pick up the device's error number and pass it to the user;
787 * if there is an error but the number is 0 set a generalized
788 * code. Actually the latter is always true because devices
789 * don't yet return specific errors.
790 */
791geterror(bp)
792register struct buf *bp;
793{
794
795 if (bp->b_flags&B_ERROR)
796 if ((u.u_error = bp->b_error)==0)
797 u.u_error = EIO;
798}
7b8b5a01
RE
799
800/*
801 * Invalidate in core blocks belonging to closed or umounted filesystem
802 *
803 * This is not nicely done at all - the buffer ought to be removed from the
804 * hash chains & have its dev/blkno fields clobbered, but unfortunately we
805 * can't do that here, as it is quite possible that the block is still
806 * being used for i/o. Eventually, all disc drivers should be forced to
807 * have a close routine, which ought ensure that the queue is empty, then
808 * properly flush the queues. Until that happy day, this suffices for
809 * correctness. ... kre
810 */
811binval(dev)
812dev_t dev;
813{
814 register struct buf *bp, *dp;
815
816 dp = bdevsw[major(dev)].d_tab;
817
818 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
819 if (bp->b_dev == dev)
820 bp->b_flags |= B_INVAL;
821}