rid of NFILE
[unix-history] / usr / src / sys / kern / vfs_cluster.c
CommitLineData
443c8066 1/* vfs_cluster.c 4.13 %G% */
663dbc72
BJ
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/dir.h"
6#include "../h/user.h"
7#include "../h/buf.h"
8#include "../h/conf.h"
9#include "../h/proc.h"
10#include "../h/seg.h"
11#include "../h/pte.h"
12#include "../h/vm.h"
973ecc4f 13#include "../h/trace.h"
663dbc72 14
5603d07d
BJ
15/*
16 * The following several routines allocate and free
17 * buffers with various side effects. In general the
18 * arguments to an allocate routine are a device and
19 * a block number, and the value is a pointer to
20 * to the buffer header; the buffer is marked "busy"
21 * so that no one else can touch it. If the block was
22 * already in core, no I/O need be done; if it is
23 * already busy, the process waits until it becomes free.
24 * The following routines allocate a buffer:
25 * getblk
26 * bread
27 * breada
28 * baddr (if it is incore)
29 * Eventually the buffer must be released, possibly with the
30 * side effect of writing it out, by using one of
31 * bwrite
32 * bdwrite
33 * bawrite
34 * brelse
35 */
36
37#define BUFHSZ 63
46387ee3 38struct bufhd bufhash[BUFHSZ];
337ed2cc
BJ
39#define BUFHASH(dev, dblkno) \
40 ((struct buf *)&bufhash[((int)(dev)+(int)(dblkno)) % BUFHSZ])
5603d07d
BJ
41
42/*
43 * Initialize hash links for buffers.
44 */
45bhinit()
46{
47 register int i;
46387ee3 48 register struct bufhd *bp;
5603d07d 49
46387ee3
BJ
50 for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++)
51 bp->b_forw = bp->b_back = (struct buf *)bp;
5603d07d
BJ
52}
53
663dbc72
BJ
54/* #define DISKMON 1 */
55
56#ifdef DISKMON
57struct {
58 int nbuf;
59 long nread;
60 long nreada;
61 long ncache;
62 long nwrite;
63 long bufcount[NBUF];
64} io_info;
65#endif
66
67/*
68 * Swap IO headers -
69 * They contain the necessary information for the swap I/O.
70 * At any given time, a swap header can be in three
71 * different lists. When free it is in the free list,
72 * when allocated and the I/O queued, it is on the swap
73 * device list, and finally, if the operation was a dirty
74 * page push, when the I/O completes, it is inserted
75 * in a list of cleaned pages to be processed by the pageout daemon.
76 */
77struct buf swbuf[NSWBUF];
78short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */
79int swpf[NSWBUF];
80
663dbc72 81
443c8066 82#ifndef UNFAST
663dbc72
BJ
83#define notavail(bp) \
84{ \
85 int s = spl6(); \
86 (bp)->av_back->av_forw = (bp)->av_forw; \
87 (bp)->av_forw->av_back = (bp)->av_back; \
88 (bp)->b_flags |= B_BUSY; \
89 splx(s); \
90}
91#endif
92
93/*
94 * Read in (if necessary) the block and return a buffer pointer.
95 */
96struct buf *
97bread(dev, blkno)
98dev_t dev;
99daddr_t blkno;
100{
101 register struct buf *bp;
102
103 bp = getblk(dev, blkno);
104 if (bp->b_flags&B_DONE) {
973ecc4f
BJ
105#ifdef EPAWNJ
106 trace(TR_BREAD|TR_HIT, dev, blkno);
107#endif
663dbc72
BJ
108#ifdef DISKMON
109 io_info.ncache++;
110#endif
111 return(bp);
112 }
113 bp->b_flags |= B_READ;
114 bp->b_bcount = BSIZE;
115 (*bdevsw[major(dev)].d_strategy)(bp);
973ecc4f
BJ
116#ifdef EPAWNJ
117 trace(TR_BREAD|TR_MISS, dev, blkno);
118#endif
663dbc72
BJ
119#ifdef DISKMON
120 io_info.nread++;
121#endif
122 u.u_vm.vm_inblk++; /* pay for read */
123 iowait(bp);
124 return(bp);
125}
126
127/*
128 * Read in the block, like bread, but also start I/O on the
129 * read-ahead block (which is not allocated to the caller)
130 */
131struct buf *
132breada(dev, blkno, rablkno)
133dev_t dev;
134daddr_t blkno, rablkno;
135{
136 register struct buf *bp, *rabp;
137
138 bp = NULL;
139 if (!incore(dev, blkno)) {
140 bp = getblk(dev, blkno);
141 if ((bp->b_flags&B_DONE) == 0) {
142 bp->b_flags |= B_READ;
143 bp->b_bcount = BSIZE;
144 (*bdevsw[major(dev)].d_strategy)(bp);
973ecc4f
BJ
145#ifdef EPAWNJ
146 trace(TR_BREAD|TR_MISS, dev, blkno);
147#endif
663dbc72
BJ
148#ifdef DISKMON
149 io_info.nread++;
150#endif
151 u.u_vm.vm_inblk++; /* pay for read */
152 }
973ecc4f
BJ
153#ifdef EPAWNJ
154 else
155 trace(TR_BREAD|TR_HIT, dev, blkno);
156#endif
663dbc72
BJ
157 }
158 if (rablkno && !incore(dev, rablkno)) {
159 rabp = getblk(dev, rablkno);
973ecc4f 160 if (rabp->b_flags & B_DONE) {
663dbc72 161 brelse(rabp);
973ecc4f
BJ
162#ifdef EPAWNJ
163 trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno);
164#endif
165 } else {
663dbc72
BJ
166 rabp->b_flags |= B_READ|B_ASYNC;
167 rabp->b_bcount = BSIZE;
168 (*bdevsw[major(dev)].d_strategy)(rabp);
973ecc4f
BJ
169#ifdef EPAWNJ
170 trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock);
171#endif
663dbc72
BJ
172#ifdef DISKMON
173 io_info.nreada++;
174#endif
175 u.u_vm.vm_inblk++; /* pay in advance */
176 }
177 }
178 if(bp == NULL)
179 return(bread(dev, blkno));
180 iowait(bp);
181 return(bp);
182}
183
184/*
185 * Write the buffer, waiting for completion.
186 * Then release the buffer.
187 */
188bwrite(bp)
189register struct buf *bp;
190{
191 register flag;
192
193 flag = bp->b_flags;
194 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
195 bp->b_bcount = BSIZE;
196#ifdef DISKMON
197 io_info.nwrite++;
198#endif
199 if ((flag&B_DELWRI) == 0)
200 u.u_vm.vm_oublk++; /* noone paid yet */
973ecc4f
BJ
201#ifdef EPAWNJ
202 trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno));
203#endif
663dbc72
BJ
204 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
205 if ((flag&B_ASYNC) == 0) {
206 iowait(bp);
207 brelse(bp);
208 } else if (flag & B_DELWRI)
209 bp->b_flags |= B_AGE;
210 else
211 geterror(bp);
212}
213
214/*
215 * Release the buffer, marking it so that if it is grabbed
216 * for another purpose it will be written out before being
217 * given up (e.g. when writing a partial block where it is
218 * assumed that another write for the same block will soon follow).
219 * This can't be done for magtape, since writes must be done
220 * in the same order as requested.
221 */
222bdwrite(bp)
223register struct buf *bp;
224{
e1e57888 225 register int flags;
663dbc72
BJ
226
227 if ((bp->b_flags&B_DELWRI) == 0)
228 u.u_vm.vm_oublk++; /* noone paid yet */
e1e57888
RE
229 flags = bdevsw[major(bp->b_dev)].d_flags;
230 if(flags & B_TAPE)
663dbc72
BJ
231 bawrite(bp);
232 else {
233 bp->b_flags |= B_DELWRI | B_DONE;
234 brelse(bp);
235 }
236}
237
238/*
239 * Release the buffer, start I/O on it, but don't wait for completion.
240 */
241bawrite(bp)
242register struct buf *bp;
243{
244
245 bp->b_flags |= B_ASYNC;
246 bwrite(bp);
247}
248
249/*
250 * release the buffer, with no I/O implied.
251 */
252brelse(bp)
253register struct buf *bp;
254{
46387ee3 255 register struct buf *flist;
663dbc72
BJ
256 register s;
257
258 if (bp->b_flags&B_WANTED)
259 wakeup((caddr_t)bp);
46387ee3
BJ
260 if (bfreelist[0].b_flags&B_WANTED) {
261 bfreelist[0].b_flags &= ~B_WANTED;
262 wakeup((caddr_t)bfreelist);
663dbc72 263 }
60a71525
BJ
264 if (bp->b_flags&B_ERROR)
265 if (bp->b_flags & B_LOCKED)
266 bp->b_flags &= ~B_ERROR; /* try again later */
267 else
268 bp->b_dev = NODEV; /* no assoc */
663dbc72 269 s = spl6();
46387ee3
BJ
270 if (bp->b_flags & (B_ERROR|B_INVAL)) {
271 /* block has no info ... put at front of most free list */
272 flist = &bfreelist[BQUEUES-1];
273 flist->av_forw->av_back = bp;
274 bp->av_forw = flist->av_forw;
275 flist->av_forw = bp;
276 bp->av_back = flist;
663dbc72 277 } else {
46387ee3
BJ
278 if (bp->b_flags & B_LOCKED)
279 flist = &bfreelist[BQ_LOCKED];
280 else if (bp->b_flags & B_AGE)
281 flist = &bfreelist[BQ_AGE];
282 else
283 flist = &bfreelist[BQ_LRU];
284 flist->av_back->av_forw = bp;
285 bp->av_back = flist->av_back;
286 flist->av_back = bp;
287 bp->av_forw = flist;
663dbc72
BJ
288 }
289 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
290 splx(s);
291}
292
293/*
294 * See if the block is associated with some buffer
295 * (mainly to avoid getting hung up on a wait in breada)
296 */
297incore(dev, blkno)
298dev_t dev;
299daddr_t blkno;
300{
301 register struct buf *bp;
46387ee3 302 register struct buf *dp;
663dbc72
BJ
303 register int dblkno = fsbtodb(blkno);
304
337ed2cc 305 dp = BUFHASH(dev, dblkno);
46387ee3
BJ
306 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
307 if (bp->b_blkno == dblkno && bp->b_dev == dev &&
308 !(bp->b_flags & B_INVAL))
5603d07d 309 return (1);
5603d07d 310 return (0);
663dbc72
BJ
311}
312
313struct buf *
314baddr(dev, blkno)
315dev_t dev;
316daddr_t blkno;
317{
318
319 if (incore(dev, blkno))
320 return (bread(dev, blkno));
321 return (0);
322}
323
324/*
325 * Assign a buffer for the given block. If the appropriate
326 * block is already associated, return it; otherwise search
327 * for the oldest non-busy buffer and reassign it.
328 */
329struct buf *
330getblk(dev, blkno)
331dev_t dev;
332daddr_t blkno;
333{
5603d07d 334 register struct buf *bp, *dp, *ep;
46387ee3 335 register int dblkno = fsbtodb(blkno);
5aa9d5ea
RE
336#ifdef DISKMON
337 register int i;
338#endif
663dbc72 339
01659974
BJ
340 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
341 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
342 dblkno = fsbtodb(blkno);
46387ee3 343 dp = BUFHASH(dev, dblkno);
663dbc72 344 loop:
81263dba 345 (void) spl0();
46387ee3
BJ
346 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
347 if (bp->b_blkno != dblkno || bp->b_dev != dev ||
348 bp->b_flags&B_INVAL)
663dbc72 349 continue;
81263dba 350 (void) spl6();
663dbc72
BJ
351 if (bp->b_flags&B_BUSY) {
352 bp->b_flags |= B_WANTED;
353 sleep((caddr_t)bp, PRIBIO+1);
354 goto loop;
355 }
81263dba 356 (void) spl0();
663dbc72
BJ
357#ifdef DISKMON
358 i = 0;
359 dp = bp->av_forw;
46387ee3 360 while ((dp->b_flags & B_HEAD) == 0) {
663dbc72
BJ
361 i++;
362 dp = dp->av_forw;
363 }
364 if (i<NBUF)
365 io_info.bufcount[i]++;
366#endif
367 notavail(bp);
368 bp->b_flags |= B_CACHE;
369 return(bp);
370 }
5603d07d
BJ
371 if (major(dev) >= nblkdev)
372 panic("blkdev");
81263dba 373 (void) spl6();
46387ee3
BJ
374 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--)
375 if (ep->av_forw != ep)
376 break;
377 if (ep == bfreelist) { /* no free blocks at all */
378 ep->b_flags |= B_WANTED;
379 sleep((caddr_t)ep, PRIBIO+1);
663dbc72
BJ
380 goto loop;
381 }
283cac0a 382 (void) spl0();
46387ee3 383 bp = ep->av_forw;
663dbc72
BJ
384 notavail(bp);
385 if (bp->b_flags & B_DELWRI) {
386 bp->b_flags |= B_ASYNC;
387 bwrite(bp);
388 goto loop;
389 }
973ecc4f
BJ
390#ifdef EPAWNJ
391 trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
392#endif
663dbc72
BJ
393 bp->b_flags = B_BUSY;
394 bp->b_back->b_forw = bp->b_forw;
395 bp->b_forw->b_back = bp->b_back;
396 bp->b_forw = dp->b_forw;
397 bp->b_back = dp;
398 dp->b_forw->b_back = bp;
399 dp->b_forw = bp;
400 bp->b_dev = dev;
401 bp->b_blkno = dblkno;
402 return(bp);
403}
404
405/*
406 * get an empty block,
407 * not assigned to any particular device
408 */
409struct buf *
410geteblk()
411{
436518b9 412 register struct buf *bp, *dp;
663dbc72
BJ
413
414loop:
81263dba 415 (void) spl6();
46387ee3
BJ
416 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--)
417 if (dp->av_forw != dp)
418 break;
419 if (dp == bfreelist) { /* no free blocks */
420 dp->b_flags |= B_WANTED;
421 sleep((caddr_t)dp, PRIBIO+1);
422 goto loop;
663dbc72 423 }
81263dba 424 (void) spl0();
46387ee3 425 bp = dp->av_forw;
663dbc72
BJ
426 notavail(bp);
427 if (bp->b_flags & B_DELWRI) {
428 bp->b_flags |= B_ASYNC;
429 bwrite(bp);
430 goto loop;
431 }
973ecc4f 432#ifdef EPAWNJ
46387ee3 433 trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
973ecc4f 434#endif
46387ee3 435 bp->b_flags = B_BUSY|B_INVAL;
663dbc72
BJ
436 bp->b_back->b_forw = bp->b_forw;
437 bp->b_forw->b_back = bp->b_back;
438 bp->b_forw = dp->b_forw;
439 bp->b_back = dp;
440 dp->b_forw->b_back = bp;
441 dp->b_forw = bp;
442 bp->b_dev = (dev_t)NODEV;
443 return(bp);
444}
445
446/*
447 * Wait for I/O completion on the buffer; return errors
448 * to the user.
449 */
450iowait(bp)
451register struct buf *bp;
452{
453
81263dba 454 (void) spl6();
663dbc72
BJ
455 while ((bp->b_flags&B_DONE)==0)
456 sleep((caddr_t)bp, PRIBIO);
81263dba 457 (void) spl0();
663dbc72
BJ
458 geterror(bp);
459}
460
443c8066 461#ifdef UNFAST
663dbc72
BJ
462/*
463 * Unlink a buffer from the available list and mark it busy.
464 * (internal interface)
465 */
466notavail(bp)
467register struct buf *bp;
468{
469 register s;
470
471 s = spl6();
472 bp->av_back->av_forw = bp->av_forw;
473 bp->av_forw->av_back = bp->av_back;
474 bp->b_flags |= B_BUSY;
475 splx(s);
476}
477#endif
478
479/*
480 * Mark I/O complete on a buffer. If the header
481 * indicates a dirty page push completion, the
482 * header is inserted into the ``cleaned'' list
483 * to be processed by the pageout daemon. Otherwise
484 * release it if I/O is asynchronous, and wake
485 * up anyone waiting for it.
486 */
487iodone(bp)
488register struct buf *bp;
489{
490 register int s;
491
80e7c811
BJ
492 if (bp->b_flags & B_DONE)
493 panic("dup iodone");
663dbc72
BJ
494 bp->b_flags |= B_DONE;
495 if (bp->b_flags & B_DIRTY) {
496 if (bp->b_flags & B_ERROR)
497 panic("IO err in push");
498 s = spl6();
499 cnt.v_pgout++;
500 bp->av_forw = bclnlist;
501 bp->b_bcount = swsize[bp - swbuf];
502 bp->b_pfcent = swpf[bp - swbuf];
503 bclnlist = bp;
504 if (bswlist.b_flags & B_WANTED)
505 wakeup((caddr_t)&proc[2]);
506 splx(s);
a3ee1d55 507 return;
663dbc72
BJ
508 }
509 if (bp->b_flags&B_ASYNC)
510 brelse(bp);
511 else {
512 bp->b_flags &= ~B_WANTED;
513 wakeup((caddr_t)bp);
514 }
515}
516
517/*
518 * Zero the core associated with a buffer.
519 */
520clrbuf(bp)
521struct buf *bp;
522{
523 register *p;
524 register c;
525
526 p = bp->b_un.b_words;
527 c = BSIZE/sizeof(int);
528 do
529 *p++ = 0;
530 while (--c);
531 bp->b_resid = 0;
532}
533
534/*
535 * swap I/O -
536 *
537 * If the flag indicates a dirty page push initiated
538 * by the pageout daemon, we map the page into the i th
539 * virtual page of process 2 (the daemon itself) where i is
540 * the index of the swap header that has been allocated.
541 * We simply initialize the header and queue the I/O but
542 * do not wait for completion. When the I/O completes,
543 * iodone() will link the header to a list of cleaned
544 * pages to be processed by the pageout daemon.
545 */
546swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
547 struct proc *p;
548 swblk_t dblkno;
549 caddr_t addr;
550 int flag, nbytes;
551 dev_t dev;
552 unsigned pfcent;
553{
554 register struct buf *bp;
555 register int c;
556 int p2dp;
557 register struct pte *dpte, *vpte;
558
81263dba 559 (void) spl6();
663dbc72
BJ
560 while (bswlist.av_forw == NULL) {
561 bswlist.b_flags |= B_WANTED;
562 sleep((caddr_t)&bswlist, PSWP+1);
563 }
564 bp = bswlist.av_forw;
565 bswlist.av_forw = bp->av_forw;
81263dba 566 (void) spl0();
663dbc72
BJ
567
568 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
569 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
570 if (rdflg == B_READ)
571 sum.v_pswpin += btoc(nbytes);
572 else
573 sum.v_pswpout += btoc(nbytes);
574 bp->b_proc = p;
575 if (flag & B_DIRTY) {
576 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
577 dpte = dptopte(&proc[2], p2dp);
578 vpte = vtopte(p, btop(addr));
579 for (c = 0; c < nbytes; c += NBPG) {
580 if (vpte->pg_pfnum == 0 || vpte->pg_fod)
581 panic("swap bad pte");
582 *dpte++ = *vpte++;
583 }
584 bp->b_un.b_addr = (caddr_t)ctob(p2dp);
585 } else
586 bp->b_un.b_addr = addr;
587 while (nbytes > 0) {
588 c = imin(ctob(120), nbytes);
589 bp->b_bcount = c;
590 bp->b_blkno = dblkno;
591 bp->b_dev = dev;
d2f87136
BJ
592 if (flag & B_DIRTY) {
593 swpf[bp - swbuf] = pfcent;
594 swsize[bp - swbuf] = nbytes;
595 }
663dbc72
BJ
596 (*bdevsw[major(dev)].d_strategy)(bp);
597 if (flag & B_DIRTY) {
598 if (c < nbytes)
599 panic("big push");
663dbc72
BJ
600 return;
601 }
81263dba 602 (void) spl6();
663dbc72
BJ
603 while((bp->b_flags&B_DONE)==0)
604 sleep((caddr_t)bp, PSWP);
81263dba 605 (void) spl0();
663dbc72
BJ
606 bp->b_un.b_addr += c;
607 bp->b_flags &= ~B_DONE;
608 if (bp->b_flags & B_ERROR) {
609 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
610 panic("hard IO err in swap");
611 swkill(p, (char *)0);
612 }
613 nbytes -= c;
614 dblkno += btoc(c);
615 }
81263dba 616 (void) spl6();
663dbc72
BJ
617 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
618 bp->av_forw = bswlist.av_forw;
619 bswlist.av_forw = bp;
620 if (bswlist.b_flags & B_WANTED) {
621 bswlist.b_flags &= ~B_WANTED;
622 wakeup((caddr_t)&bswlist);
623 wakeup((caddr_t)&proc[2]);
624 }
81263dba 625 (void) spl0();
663dbc72
BJ
626}
627
628/*
629 * If rout == 0 then killed on swap error, else
630 * rout is the name of the routine where we ran out of
631 * swap space.
632 */
633swkill(p, rout)
634 struct proc *p;
635 char *rout;
636{
637
638 printf("%d: ", p->p_pid);
639 if (rout)
640 printf("out of swap space in %s\n", rout);
641 else
642 printf("killed on swap error\n");
643 /*
644 * To be sure no looping (e.g. in vmsched trying to
645 * swap out) mark process locked in core (as though
646 * done by user) after killing it so noone will try
647 * to swap it out.
648 */
a30d2e97 649 psignal(p, SIGKILL);
663dbc72
BJ
650 p->p_flag |= SULOCK;
651}
652
653/*
654 * make sure all write-behind blocks
655 * on dev (or NODEV for all)
656 * are flushed out.
657 * (from umount and update)
658 */
659bflush(dev)
660dev_t dev;
661{
662 register struct buf *bp;
46387ee3 663 register struct buf *flist;
663dbc72
BJ
664
665loop:
81263dba 666 (void) spl6();
46387ee3
BJ
667 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++)
668 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
663dbc72
BJ
669 if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
670 bp->b_flags |= B_ASYNC;
671 notavail(bp);
672 bwrite(bp);
673 goto loop;
674 }
675 }
81263dba 676 (void) spl0();
663dbc72
BJ
677}
678
679/*
680 * Raw I/O. The arguments are
681 * The strategy routine for the device
682 * A buffer, which will always be a special buffer
683 * header owned exclusively by the device for this purpose
684 * The device number
685 * Read/write flag
686 * Essentially all the work is computing physical addresses and
687 * validating them.
688 * If the user has the proper access privilidges, the process is
689 * marked 'delayed unlock' and the pages involved in the I/O are
690 * faulted and locked. After the completion of the I/O, the above pages
691 * are unlocked.
692 */
693physio(strat, bp, dev, rw, mincnt)
694int (*strat)();
695register struct buf *bp;
696unsigned (*mincnt)();
697{
698 register int c;
699 char *a;
700
701 if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
702 u.u_error = EFAULT;
703 return;
704 }
81263dba 705 (void) spl6();
663dbc72
BJ
706 while (bp->b_flags&B_BUSY) {
707 bp->b_flags |= B_WANTED;
708 sleep((caddr_t)bp, PRIBIO+1);
709 }
710 bp->b_error = 0;
711 bp->b_proc = u.u_procp;
712 bp->b_un.b_addr = u.u_base;
713 while (u.u_count != 0 && bp->b_error==0) {
714 bp->b_flags = B_BUSY | B_PHYS | rw;
715 bp->b_dev = dev;
716 bp->b_blkno = u.u_offset >> PGSHIFT;
717 bp->b_bcount = u.u_count;
718 (*mincnt)(bp);
719 c = bp->b_bcount;
720 u.u_procp->p_flag |= SPHYSIO;
721 vslock(a = bp->b_un.b_addr, c);
722 (*strat)(bp);
81263dba 723 (void) spl6();
663dbc72
BJ
724 while ((bp->b_flags&B_DONE) == 0)
725 sleep((caddr_t)bp, PRIBIO);
726 vsunlock(a, c, rw);
727 u.u_procp->p_flag &= ~SPHYSIO;
728 if (bp->b_flags&B_WANTED)
729 wakeup((caddr_t)bp);
81263dba 730 (void) spl0();
663dbc72
BJ
731 bp->b_un.b_addr += c;
732 u.u_count -= c;
733 u.u_offset += c;
734 }
735 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
736 u.u_count = bp->b_resid;
737 geterror(bp);
738}
739
740/*ARGSUSED*/
741unsigned
742minphys(bp)
743struct buf *bp;
744{
745
746 if (bp->b_bcount > 60 * 1024)
747 bp->b_bcount = 60 * 1024;
748}
749
750/*
751 * Pick up the device's error number and pass it to the user;
752 * if there is an error but the number is 0 set a generalized
753 * code. Actually the latter is always true because devices
754 * don't yet return specific errors.
755 */
756geterror(bp)
757register struct buf *bp;
758{
759
760 if (bp->b_flags&B_ERROR)
761 if ((u.u_error = bp->b_error)==0)
762 u.u_error = EIO;
763}
7b8b5a01
RE
764
765/*
766 * Invalidate in core blocks belonging to closed or umounted filesystem
767 *
768 * This is not nicely done at all - the buffer ought to be removed from the
769 * hash chains & have its dev/blkno fields clobbered, but unfortunately we
770 * can't do that here, as it is quite possible that the block is still
771 * being used for i/o. Eventually, all disc drivers should be forced to
772 * have a close routine, which ought ensure that the queue is empty, then
773 * properly flush the queues. Until that happy day, this suffices for
774 * correctness. ... kre
775 */
776binval(dev)
777dev_t dev;
778{
634ebdbe
RE
779 register struct buf *bp;
780 register struct bufhd *hp;
781#define dp ((struct buf *)hp)
7b8b5a01 782
634ebdbe
RE
783 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
784 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
785 if (bp->b_dev == dev)
786 bp->b_flags |= B_INVAL;
7b8b5a01 787}