fix B_WRITE->B_READ; DCLR & NSECT in hp; dont search if static 1 drive
[unix-history] / usr / src / sys / kern / vfs_cluster.c
CommitLineData
973ecc4f 1/* vfs_cluster.c 4.4 %G% */
663dbc72
BJ
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/dir.h"
6#include "../h/user.h"
7#include "../h/buf.h"
8#include "../h/conf.h"
9#include "../h/proc.h"
10#include "../h/seg.h"
11#include "../h/pte.h"
12#include "../h/vm.h"
973ecc4f 13#include "../h/trace.h"
663dbc72 14
5603d07d
BJ
15/*
16 * The following several routines allocate and free
17 * buffers with various side effects. In general the
18 * arguments to an allocate routine are a device and
19 * a block number, and the value is a pointer to
20 * to the buffer header; the buffer is marked "busy"
21 * so that no one else can touch it. If the block was
22 * already in core, no I/O need be done; if it is
23 * already busy, the process waits until it becomes free.
24 * The following routines allocate a buffer:
25 * getblk
26 * bread
27 * breada
28 * baddr (if it is incore)
29 * Eventually the buffer must be released, possibly with the
30 * side effect of writing it out, by using one of
31 * bwrite
32 * bdwrite
33 * bawrite
34 * brelse
35 */
36
37#define BUFHSZ 63
38#define BUFHASH(blkno) (blkno % BUFHSZ)
39short bufhash[BUFHSZ];
40
41/*
42 * Initialize hash links for buffers.
43 */
44bhinit()
45{
46 register int i;
47
48 for (i = 0; i < BUFHSZ; i++)
49 bufhash[i] = -1;
50}
51
663dbc72
BJ
52/* #define DISKMON 1 */
53
54#ifdef DISKMON
55struct {
56 int nbuf;
57 long nread;
58 long nreada;
59 long ncache;
60 long nwrite;
61 long bufcount[NBUF];
62} io_info;
63#endif
64
65/*
66 * Swap IO headers -
67 * They contain the necessary information for the swap I/O.
68 * At any given time, a swap header can be in three
69 * different lists. When free it is in the free list,
70 * when allocated and the I/O queued, it is on the swap
71 * device list, and finally, if the operation was a dirty
72 * page push, when the I/O completes, it is inserted
73 * in a list of cleaned pages to be processed by the pageout daemon.
74 */
75struct buf swbuf[NSWBUF];
76short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */
77int swpf[NSWBUF];
78
663dbc72
BJ
79
80#ifdef FASTVAX
81#define notavail(bp) \
82{ \
83 int s = spl6(); \
84 (bp)->av_back->av_forw = (bp)->av_forw; \
85 (bp)->av_forw->av_back = (bp)->av_back; \
86 (bp)->b_flags |= B_BUSY; \
87 splx(s); \
88}
89#endif
90
91/*
92 * Read in (if necessary) the block and return a buffer pointer.
93 */
94struct buf *
95bread(dev, blkno)
96dev_t dev;
97daddr_t blkno;
98{
99 register struct buf *bp;
100
101 bp = getblk(dev, blkno);
102 if (bp->b_flags&B_DONE) {
973ecc4f
BJ
103#ifdef EPAWNJ
104 trace(TR_BREAD|TR_HIT, dev, blkno);
105#endif
663dbc72
BJ
106#ifdef DISKMON
107 io_info.ncache++;
108#endif
109 return(bp);
110 }
111 bp->b_flags |= B_READ;
112 bp->b_bcount = BSIZE;
113 (*bdevsw[major(dev)].d_strategy)(bp);
973ecc4f
BJ
114#ifdef EPAWNJ
115 trace(TR_BREAD|TR_MISS, dev, blkno);
116#endif
663dbc72
BJ
117#ifdef DISKMON
118 io_info.nread++;
119#endif
120 u.u_vm.vm_inblk++; /* pay for read */
121 iowait(bp);
122 return(bp);
123}
124
125/*
126 * Read in the block, like bread, but also start I/O on the
127 * read-ahead block (which is not allocated to the caller)
128 */
129struct buf *
130breada(dev, blkno, rablkno)
131dev_t dev;
132daddr_t blkno, rablkno;
133{
134 register struct buf *bp, *rabp;
135
136 bp = NULL;
137 if (!incore(dev, blkno)) {
138 bp = getblk(dev, blkno);
139 if ((bp->b_flags&B_DONE) == 0) {
140 bp->b_flags |= B_READ;
141 bp->b_bcount = BSIZE;
142 (*bdevsw[major(dev)].d_strategy)(bp);
973ecc4f
BJ
143#ifdef EPAWNJ
144 trace(TR_BREAD|TR_MISS, dev, blkno);
145#endif
663dbc72
BJ
146#ifdef DISKMON
147 io_info.nread++;
148#endif
149 u.u_vm.vm_inblk++; /* pay for read */
150 }
973ecc4f
BJ
151#ifdef EPAWNJ
152 else
153 trace(TR_BREAD|TR_HIT, dev, blkno);
154#endif
663dbc72
BJ
155 }
156 if (rablkno && !incore(dev, rablkno)) {
157 rabp = getblk(dev, rablkno);
973ecc4f 158 if (rabp->b_flags & B_DONE) {
663dbc72 159 brelse(rabp);
973ecc4f
BJ
160#ifdef EPAWNJ
161 trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno);
162#endif
163 } else {
663dbc72
BJ
164 rabp->b_flags |= B_READ|B_ASYNC;
165 rabp->b_bcount = BSIZE;
166 (*bdevsw[major(dev)].d_strategy)(rabp);
973ecc4f
BJ
167#ifdef EPAWNJ
168 trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock);
169#endif
663dbc72
BJ
170#ifdef DISKMON
171 io_info.nreada++;
172#endif
173 u.u_vm.vm_inblk++; /* pay in advance */
174 }
175 }
176 if(bp == NULL)
177 return(bread(dev, blkno));
178 iowait(bp);
179 return(bp);
180}
181
182/*
183 * Write the buffer, waiting for completion.
184 * Then release the buffer.
185 */
186bwrite(bp)
187register struct buf *bp;
188{
189 register flag;
190
191 flag = bp->b_flags;
192 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
193 bp->b_bcount = BSIZE;
194#ifdef DISKMON
195 io_info.nwrite++;
196#endif
197 if ((flag&B_DELWRI) == 0)
198 u.u_vm.vm_oublk++; /* noone paid yet */
973ecc4f
BJ
199#ifdef EPAWNJ
200 trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno));
201#endif
663dbc72
BJ
202 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
203 if ((flag&B_ASYNC) == 0) {
204 iowait(bp);
205 brelse(bp);
206 } else if (flag & B_DELWRI)
207 bp->b_flags |= B_AGE;
208 else
209 geterror(bp);
210}
211
212/*
213 * Release the buffer, marking it so that if it is grabbed
214 * for another purpose it will be written out before being
215 * given up (e.g. when writing a partial block where it is
216 * assumed that another write for the same block will soon follow).
217 * This can't be done for magtape, since writes must be done
218 * in the same order as requested.
219 */
220bdwrite(bp)
221register struct buf *bp;
222{
223 register struct buf *dp;
224
225 if ((bp->b_flags&B_DELWRI) == 0)
226 u.u_vm.vm_oublk++; /* noone paid yet */
227 dp = bdevsw[major(bp->b_dev)].d_tab;
228 if(dp->b_flags & B_TAPE)
229 bawrite(bp);
230 else {
231 bp->b_flags |= B_DELWRI | B_DONE;
232 brelse(bp);
233 }
234}
235
236/*
237 * Release the buffer, start I/O on it, but don't wait for completion.
238 */
239bawrite(bp)
240register struct buf *bp;
241{
242
243 bp->b_flags |= B_ASYNC;
244 bwrite(bp);
245}
246
247/*
248 * release the buffer, with no I/O implied.
249 */
250brelse(bp)
251register struct buf *bp;
252{
253 register struct buf **backp;
254 register s;
255
256 if (bp->b_flags&B_WANTED)
257 wakeup((caddr_t)bp);
258 if (bfreelist.b_flags&B_WANTED) {
259 bfreelist.b_flags &= ~B_WANTED;
260 wakeup((caddr_t)&bfreelist);
261 }
5603d07d
BJ
262 if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) {
263 bunhash(bp);
663dbc72 264 bp->b_dev = NODEV; /* no assoc. on error */
5603d07d 265 }
663dbc72
BJ
266 s = spl6();
267 if(bp->b_flags & (B_AGE|B_ERROR)) {
268 backp = &bfreelist.av_forw;
269 (*backp)->av_back = bp;
270 bp->av_forw = *backp;
271 *backp = bp;
272 bp->av_back = &bfreelist;
273 } else {
274 backp = &bfreelist.av_back;
275 (*backp)->av_forw = bp;
276 bp->av_back = *backp;
277 *backp = bp;
278 bp->av_forw = &bfreelist;
279 }
280 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
281 splx(s);
282}
283
284/*
285 * See if the block is associated with some buffer
286 * (mainly to avoid getting hung up on a wait in breada)
287 */
288incore(dev, blkno)
289dev_t dev;
290daddr_t blkno;
291{
292 register struct buf *bp;
663dbc72
BJ
293 register int dblkno = fsbtodb(blkno);
294
5603d07d
BJ
295 for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
296 bp = &buf[bp->b_hlink])
fe8987fb 297 if (bp->b_blkno == dblkno && bp->b_dev == dev)
5603d07d 298 return (1);
5603d07d 299 return (0);
663dbc72
BJ
300}
301
302struct buf *
303baddr(dev, blkno)
304dev_t dev;
305daddr_t blkno;
306{
307
308 if (incore(dev, blkno))
309 return (bread(dev, blkno));
310 return (0);
311}
312
313/*
314 * Assign a buffer for the given block. If the appropriate
315 * block is already associated, return it; otherwise search
316 * for the oldest non-busy buffer and reassign it.
317 */
318struct buf *
319getblk(dev, blkno)
320dev_t dev;
321daddr_t blkno;
322{
5603d07d 323 register struct buf *bp, *dp, *ep;
01659974 324 register int i, x, dblkno;
663dbc72 325
01659974
BJ
326 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
327 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
328 dblkno = fsbtodb(blkno);
663dbc72 329 loop:
81263dba 330 (void) spl0();
5603d07d
BJ
331 for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1];
332 bp = &buf[bp->b_hlink]) {
333 if (bp->b_blkno != dblkno || bp->b_dev != dev)
663dbc72 334 continue;
81263dba 335 (void) spl6();
663dbc72
BJ
336 if (bp->b_flags&B_BUSY) {
337 bp->b_flags |= B_WANTED;
338 sleep((caddr_t)bp, PRIBIO+1);
339 goto loop;
340 }
81263dba 341 (void) spl0();
663dbc72
BJ
342#ifdef DISKMON
343 i = 0;
344 dp = bp->av_forw;
345 while (dp != &bfreelist) {
346 i++;
347 dp = dp->av_forw;
348 }
349 if (i<NBUF)
350 io_info.bufcount[i]++;
351#endif
352 notavail(bp);
353 bp->b_flags |= B_CACHE;
354 return(bp);
355 }
5603d07d
BJ
356 if (major(dev) >= nblkdev)
357 panic("blkdev");
358 dp = bdevsw[major(dev)].d_tab;
359 if (dp == NULL)
360 panic("devtab");
81263dba 361 (void) spl6();
663dbc72
BJ
362 if (bfreelist.av_forw == &bfreelist) {
363 bfreelist.b_flags |= B_WANTED;
364 sleep((caddr_t)&bfreelist, PRIBIO+1);
365 goto loop;
366 }
283cac0a 367 (void) spl0();
663dbc72
BJ
368 bp = bfreelist.av_forw;
369 notavail(bp);
370 if (bp->b_flags & B_DELWRI) {
371 bp->b_flags |= B_ASYNC;
372 bwrite(bp);
373 goto loop;
374 }
5603d07d
BJ
375 if (bp->b_dev == NODEV)
376 goto done;
377 /* INLINE EXPANSION OF bunhash(bp) */
973ecc4f
BJ
378#ifdef EPAWNJ
379 trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
380#endif
99fae0e8 381 (void) spl6();
5603d07d
BJ
382 i = BUFHASH(dbtofsb(bp->b_blkno));
383 x = bp - buf;
384 if (bufhash[i] == x) {
385 bufhash[i] = bp->b_hlink;
386 } else {
387 for (ep = &buf[bufhash[i]]; ep != &buf[-1];
388 ep = &buf[ep->b_hlink])
389 if (ep->b_hlink == x) {
390 ep->b_hlink = bp->b_hlink;
391 goto done;
392 }
393 panic("getblk");
394 }
395done:
99fae0e8 396 (void) spl0();
5603d07d 397 /* END INLINE EXPANSION */
663dbc72
BJ
398 bp->b_flags = B_BUSY;
399 bp->b_back->b_forw = bp->b_forw;
400 bp->b_forw->b_back = bp->b_back;
401 bp->b_forw = dp->b_forw;
402 bp->b_back = dp;
403 dp->b_forw->b_back = bp;
404 dp->b_forw = bp;
405 bp->b_dev = dev;
406 bp->b_blkno = dblkno;
5603d07d
BJ
407 i = BUFHASH(blkno);
408 bp->b_hlink = bufhash[i];
409 bufhash[i] = bp - buf;
663dbc72
BJ
410 return(bp);
411}
412
413/*
414 * get an empty block,
415 * not assigned to any particular device
416 */
417struct buf *
418geteblk()
419{
436518b9 420 register struct buf *bp, *dp;
663dbc72
BJ
421
422loop:
81263dba 423 (void) spl6();
663dbc72
BJ
424 while (bfreelist.av_forw == &bfreelist) {
425 bfreelist.b_flags |= B_WANTED;
426 sleep((caddr_t)&bfreelist, PRIBIO+1);
427 }
81263dba 428 (void) spl0();
663dbc72
BJ
429 dp = &bfreelist;
430 bp = bfreelist.av_forw;
431 notavail(bp);
432 if (bp->b_flags & B_DELWRI) {
433 bp->b_flags |= B_ASYNC;
434 bwrite(bp);
435 goto loop;
436 }
973ecc4f
BJ
437 if (bp->b_dev != NODEV) {
438#ifdef EPAWNJ
439 trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno));
440#endif
5603d07d 441 bunhash(bp);
973ecc4f 442 }
663dbc72
BJ
443 bp->b_flags = B_BUSY;
444 bp->b_back->b_forw = bp->b_forw;
445 bp->b_forw->b_back = bp->b_back;
446 bp->b_forw = dp->b_forw;
447 bp->b_back = dp;
448 dp->b_forw->b_back = bp;
449 dp->b_forw = bp;
450 bp->b_dev = (dev_t)NODEV;
5603d07d 451 bp->b_hlink = -1;
663dbc72
BJ
452 return(bp);
453}
454
5603d07d
BJ
455bunhash(bp)
456 register struct buf *bp;
457{
458 register struct buf *ep;
99fae0e8 459 register int i, x, s;
5603d07d
BJ
460
461 if (bp->b_dev == NODEV)
462 return;
99fae0e8 463 s = spl6();
5603d07d
BJ
464 i = BUFHASH(dbtofsb(bp->b_blkno));
465 x = bp - buf;
466 if (bufhash[i] == x) {
467 bufhash[i] = bp->b_hlink;
99fae0e8 468 goto ret;
5603d07d
BJ
469 }
470 for (ep = &buf[bufhash[i]]; ep != &buf[-1];
471 ep = &buf[ep->b_hlink])
472 if (ep->b_hlink == x) {
473 ep->b_hlink = bp->b_hlink;
99fae0e8 474 goto ret;
5603d07d
BJ
475 }
476 panic("bunhash");
99fae0e8
BJ
477ret:
478 splx(s);
5603d07d
BJ
479}
480
663dbc72
BJ
481/*
482 * Wait for I/O completion on the buffer; return errors
483 * to the user.
484 */
485iowait(bp)
486register struct buf *bp;
487{
488
81263dba 489 (void) spl6();
663dbc72
BJ
490 while ((bp->b_flags&B_DONE)==0)
491 sleep((caddr_t)bp, PRIBIO);
81263dba 492 (void) spl0();
663dbc72
BJ
493 geterror(bp);
494}
495
496#ifndef FASTVAX
497/*
498 * Unlink a buffer from the available list and mark it busy.
499 * (internal interface)
500 */
501notavail(bp)
502register struct buf *bp;
503{
504 register s;
505
506 s = spl6();
507 bp->av_back->av_forw = bp->av_forw;
508 bp->av_forw->av_back = bp->av_back;
509 bp->b_flags |= B_BUSY;
510 splx(s);
511}
512#endif
513
514/*
515 * Mark I/O complete on a buffer. If the header
516 * indicates a dirty page push completion, the
517 * header is inserted into the ``cleaned'' list
518 * to be processed by the pageout daemon. Otherwise
519 * release it if I/O is asynchronous, and wake
520 * up anyone waiting for it.
521 */
522iodone(bp)
523register struct buf *bp;
524{
525 register int s;
526
80e7c811
BJ
527 if (bp->b_flags & B_DONE)
528 panic("dup iodone");
663dbc72
BJ
529 bp->b_flags |= B_DONE;
530 if (bp->b_flags & B_DIRTY) {
531 if (bp->b_flags & B_ERROR)
532 panic("IO err in push");
533 s = spl6();
534 cnt.v_pgout++;
535 bp->av_forw = bclnlist;
536 bp->b_bcount = swsize[bp - swbuf];
537 bp->b_pfcent = swpf[bp - swbuf];
538 bclnlist = bp;
539 if (bswlist.b_flags & B_WANTED)
540 wakeup((caddr_t)&proc[2]);
541 splx(s);
a3ee1d55 542 return;
663dbc72
BJ
543 }
544 if (bp->b_flags&B_ASYNC)
545 brelse(bp);
546 else {
547 bp->b_flags &= ~B_WANTED;
548 wakeup((caddr_t)bp);
549 }
550}
551
552/*
553 * Zero the core associated with a buffer.
554 */
555clrbuf(bp)
556struct buf *bp;
557{
558 register *p;
559 register c;
560
561 p = bp->b_un.b_words;
562 c = BSIZE/sizeof(int);
563 do
564 *p++ = 0;
565 while (--c);
566 bp->b_resid = 0;
567}
568
569/*
570 * swap I/O -
571 *
572 * If the flag indicates a dirty page push initiated
573 * by the pageout daemon, we map the page into the i th
574 * virtual page of process 2 (the daemon itself) where i is
575 * the index of the swap header that has been allocated.
576 * We simply initialize the header and queue the I/O but
577 * do not wait for completion. When the I/O completes,
578 * iodone() will link the header to a list of cleaned
579 * pages to be processed by the pageout daemon.
580 */
581swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
582 struct proc *p;
583 swblk_t dblkno;
584 caddr_t addr;
585 int flag, nbytes;
586 dev_t dev;
587 unsigned pfcent;
588{
589 register struct buf *bp;
590 register int c;
591 int p2dp;
592 register struct pte *dpte, *vpte;
593
81263dba 594 (void) spl6();
663dbc72
BJ
595 while (bswlist.av_forw == NULL) {
596 bswlist.b_flags |= B_WANTED;
597 sleep((caddr_t)&bswlist, PSWP+1);
598 }
599 bp = bswlist.av_forw;
600 bswlist.av_forw = bp->av_forw;
81263dba 601 (void) spl0();
663dbc72
BJ
602
603 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
604 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
605 if (rdflg == B_READ)
606 sum.v_pswpin += btoc(nbytes);
607 else
608 sum.v_pswpout += btoc(nbytes);
609 bp->b_proc = p;
610 if (flag & B_DIRTY) {
611 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
612 dpte = dptopte(&proc[2], p2dp);
613 vpte = vtopte(p, btop(addr));
614 for (c = 0; c < nbytes; c += NBPG) {
615 if (vpte->pg_pfnum == 0 || vpte->pg_fod)
616 panic("swap bad pte");
617 *dpte++ = *vpte++;
618 }
619 bp->b_un.b_addr = (caddr_t)ctob(p2dp);
620 } else
621 bp->b_un.b_addr = addr;
622 while (nbytes > 0) {
623 c = imin(ctob(120), nbytes);
624 bp->b_bcount = c;
625 bp->b_blkno = dblkno;
626 bp->b_dev = dev;
d2f87136
BJ
627 if (flag & B_DIRTY) {
628 swpf[bp - swbuf] = pfcent;
629 swsize[bp - swbuf] = nbytes;
630 }
663dbc72
BJ
631 (*bdevsw[major(dev)].d_strategy)(bp);
632 if (flag & B_DIRTY) {
633 if (c < nbytes)
634 panic("big push");
663dbc72
BJ
635 return;
636 }
81263dba 637 (void) spl6();
663dbc72
BJ
638 while((bp->b_flags&B_DONE)==0)
639 sleep((caddr_t)bp, PSWP);
81263dba 640 (void) spl0();
663dbc72
BJ
641 bp->b_un.b_addr += c;
642 bp->b_flags &= ~B_DONE;
643 if (bp->b_flags & B_ERROR) {
644 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
645 panic("hard IO err in swap");
646 swkill(p, (char *)0);
647 }
648 nbytes -= c;
649 dblkno += btoc(c);
650 }
81263dba 651 (void) spl6();
663dbc72
BJ
652 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
653 bp->av_forw = bswlist.av_forw;
654 bswlist.av_forw = bp;
655 if (bswlist.b_flags & B_WANTED) {
656 bswlist.b_flags &= ~B_WANTED;
657 wakeup((caddr_t)&bswlist);
658 wakeup((caddr_t)&proc[2]);
659 }
81263dba 660 (void) spl0();
663dbc72
BJ
661}
662
663/*
664 * If rout == 0 then killed on swap error, else
665 * rout is the name of the routine where we ran out of
666 * swap space.
667 */
668swkill(p, rout)
669 struct proc *p;
670 char *rout;
671{
672
673 printf("%d: ", p->p_pid);
674 if (rout)
675 printf("out of swap space in %s\n", rout);
676 else
677 printf("killed on swap error\n");
678 /*
679 * To be sure no looping (e.g. in vmsched trying to
680 * swap out) mark process locked in core (as though
681 * done by user) after killing it so noone will try
682 * to swap it out.
683 */
a30d2e97 684 psignal(p, SIGKILL);
663dbc72
BJ
685 p->p_flag |= SULOCK;
686}
687
688/*
689 * make sure all write-behind blocks
690 * on dev (or NODEV for all)
691 * are flushed out.
692 * (from umount and update)
693 */
694bflush(dev)
695dev_t dev;
696{
697 register struct buf *bp;
698
699loop:
81263dba 700 (void) spl6();
663dbc72
BJ
701 for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) {
702 if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
703 bp->b_flags |= B_ASYNC;
704 notavail(bp);
705 bwrite(bp);
706 goto loop;
707 }
708 }
81263dba 709 (void) spl0();
663dbc72
BJ
710}
711
712/*
713 * Raw I/O. The arguments are
714 * The strategy routine for the device
715 * A buffer, which will always be a special buffer
716 * header owned exclusively by the device for this purpose
717 * The device number
718 * Read/write flag
719 * Essentially all the work is computing physical addresses and
720 * validating them.
721 * If the user has the proper access privilidges, the process is
722 * marked 'delayed unlock' and the pages involved in the I/O are
723 * faulted and locked. After the completion of the I/O, the above pages
724 * are unlocked.
725 */
726physio(strat, bp, dev, rw, mincnt)
727int (*strat)();
728register struct buf *bp;
729unsigned (*mincnt)();
730{
731 register int c;
732 char *a;
733
734 if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
735 u.u_error = EFAULT;
736 return;
737 }
81263dba 738 (void) spl6();
663dbc72
BJ
739 while (bp->b_flags&B_BUSY) {
740 bp->b_flags |= B_WANTED;
741 sleep((caddr_t)bp, PRIBIO+1);
742 }
743 bp->b_error = 0;
744 bp->b_proc = u.u_procp;
745 bp->b_un.b_addr = u.u_base;
746 while (u.u_count != 0 && bp->b_error==0) {
747 bp->b_flags = B_BUSY | B_PHYS | rw;
748 bp->b_dev = dev;
749 bp->b_blkno = u.u_offset >> PGSHIFT;
750 bp->b_bcount = u.u_count;
751 (*mincnt)(bp);
752 c = bp->b_bcount;
753 u.u_procp->p_flag |= SPHYSIO;
754 vslock(a = bp->b_un.b_addr, c);
755 (*strat)(bp);
81263dba 756 (void) spl6();
663dbc72
BJ
757 while ((bp->b_flags&B_DONE) == 0)
758 sleep((caddr_t)bp, PRIBIO);
759 vsunlock(a, c, rw);
760 u.u_procp->p_flag &= ~SPHYSIO;
761 if (bp->b_flags&B_WANTED)
762 wakeup((caddr_t)bp);
81263dba 763 (void) spl0();
663dbc72
BJ
764 bp->b_un.b_addr += c;
765 u.u_count -= c;
766 u.u_offset += c;
767 }
768 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
769 u.u_count = bp->b_resid;
770 geterror(bp);
771}
772
773/*ARGSUSED*/
774unsigned
775minphys(bp)
776struct buf *bp;
777{
778
779 if (bp->b_bcount > 60 * 1024)
780 bp->b_bcount = 60 * 1024;
781}
782
783/*
784 * Pick up the device's error number and pass it to the user;
785 * if there is an error but the number is 0 set a generalized
786 * code. Actually the latter is always true because devices
787 * don't yet return specific errors.
788 */
789geterror(bp)
790register struct buf *bp;
791{
792
793 if (bp->b_flags&B_ERROR)
794 if ((u.u_error = bp->b_error)==0)
795 u.u_error = EIO;
796}