heavily reorganized
[unix-history] / usr / src / sys / kern / vfs_bio.c
CommitLineData
3ac90e3c 1/* vfs_bio.c 4.30 82/05/12 */
ad30fb67
KM
2
3/* merged into kernel: @(#)bio.c 2.3 4/8/82 */
663dbc72
BJ
4
5#include "../h/param.h"
6#include "../h/systm.h"
7#include "../h/dir.h"
8#include "../h/user.h"
9#include "../h/buf.h"
10#include "../h/conf.h"
11#include "../h/proc.h"
12#include "../h/seg.h"
13#include "../h/pte.h"
14#include "../h/vm.h"
973ecc4f 15#include "../h/trace.h"
663dbc72 16
5603d07d
BJ
17/*
18 * The following several routines allocate and free
19 * buffers with various side effects. In general the
20 * arguments to an allocate routine are a device and
21 * a block number, and the value is a pointer to
22 * to the buffer header; the buffer is marked "busy"
23 * so that no one else can touch it. If the block was
24 * already in core, no I/O need be done; if it is
25 * already busy, the process waits until it becomes free.
26 * The following routines allocate a buffer:
27 * getblk
28 * bread
29 * breada
30 * baddr (if it is incore)
31 * Eventually the buffer must be released, possibly with the
32 * side effect of writing it out, by using one of
33 * bwrite
34 * bdwrite
35 * bawrite
36 * brelse
37 */
38
a0eab615
BJ
39struct buf bfreelist[BQUEUES];
40struct buf bswlist, *bclnlist;
41
5603d07d 42#define BUFHSZ 63
ad30fb67 43#define RND (MAXBSIZE/DEV_BSIZE)
46387ee3 44struct bufhd bufhash[BUFHSZ];
337ed2cc 45#define BUFHASH(dev, dblkno) \
ad30fb67 46 ((struct buf *)&bufhash[((int)(dev)+(((int)(dblkno))/RND)) % BUFHSZ])
5603d07d
BJ
47
48/*
49 * Initialize hash links for buffers.
50 */
51bhinit()
52{
53 register int i;
46387ee3 54 register struct bufhd *bp;
5603d07d 55
46387ee3
BJ
56 for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++)
57 bp->b_forw = bp->b_back = (struct buf *)bp;
5603d07d
BJ
58}
59
663dbc72
BJ
60/* #define DISKMON 1 */
61
62#ifdef DISKMON
63struct {
64 int nbuf;
65 long nread;
66 long nreada;
67 long ncache;
68 long nwrite;
4c05b581 69 long bufcount[64];
663dbc72
BJ
70} io_info;
71#endif
72
73/*
74 * Swap IO headers -
75 * They contain the necessary information for the swap I/O.
76 * At any given time, a swap header can be in three
77 * different lists. When free it is in the free list,
78 * when allocated and the I/O queued, it is on the swap
79 * device list, and finally, if the operation was a dirty
80 * page push, when the I/O completes, it is inserted
81 * in a list of cleaned pages to be processed by the pageout daemon.
82 */
4c05b581
BJ
83struct buf *swbuf;
84short *swsize; /* CAN WE JUST USE B_BCOUNT? */
85int *swpf;
663dbc72 86
663dbc72 87
443c8066 88#ifndef UNFAST
663dbc72
BJ
89#define notavail(bp) \
90{ \
668cc26d 91 int x = spl6(); \
663dbc72
BJ
92 (bp)->av_back->av_forw = (bp)->av_forw; \
93 (bp)->av_forw->av_back = (bp)->av_back; \
94 (bp)->b_flags |= B_BUSY; \
668cc26d 95 splx(x); \
663dbc72
BJ
96}
97#endif
98
99/*
100 * Read in (if necessary) the block and return a buffer pointer.
101 */
102struct buf *
ad30fb67
KM
103bread(dev, blkno, size)
104 dev_t dev;
105 daddr_t blkno;
106 int size;
663dbc72
BJ
107{
108 register struct buf *bp;
109
ad30fb67 110 bp = getblk(dev, blkno, size);
663dbc72 111 if (bp->b_flags&B_DONE) {
15f77b9b
BJ
112#ifdef TRACE
113 trace(TR_BREADHIT, dev, blkno);
973ecc4f 114#endif
663dbc72
BJ
115#ifdef DISKMON
116 io_info.ncache++;
117#endif
118 return(bp);
119 }
120 bp->b_flags |= B_READ;
663dbc72 121 (*bdevsw[major(dev)].d_strategy)(bp);
15f77b9b
BJ
122#ifdef TRACE
123 trace(TR_BREADMISS, dev, blkno);
973ecc4f 124#endif
663dbc72
BJ
125#ifdef DISKMON
126 io_info.nread++;
127#endif
128 u.u_vm.vm_inblk++; /* pay for read */
129 iowait(bp);
130 return(bp);
131}
132
133/*
134 * Read in the block, like bread, but also start I/O on the
135 * read-ahead block (which is not allocated to the caller)
136 */
137struct buf *
ad30fb67
KM
138breada(dev, blkno, rablkno, size)
139 dev_t dev;
140 daddr_t blkno, rablkno;
141 int size;
663dbc72
BJ
142{
143 register struct buf *bp, *rabp;
144
145 bp = NULL;
146 if (!incore(dev, blkno)) {
ad30fb67 147 bp = getblk(dev, blkno, size);
663dbc72
BJ
148 if ((bp->b_flags&B_DONE) == 0) {
149 bp->b_flags |= B_READ;
663dbc72 150 (*bdevsw[major(dev)].d_strategy)(bp);
15f77b9b
BJ
151#ifdef TRACE
152 trace(TR_BREADMISS, dev, blkno);
973ecc4f 153#endif
663dbc72
BJ
154#ifdef DISKMON
155 io_info.nread++;
156#endif
157 u.u_vm.vm_inblk++; /* pay for read */
158 }
15f77b9b 159#ifdef TRACE
973ecc4f 160 else
15f77b9b 161 trace(TR_BREADHIT, dev, blkno);
973ecc4f 162#endif
663dbc72
BJ
163 }
164 if (rablkno && !incore(dev, rablkno)) {
ad30fb67 165 rabp = getblk(dev, rablkno, size);
973ecc4f 166 if (rabp->b_flags & B_DONE) {
663dbc72 167 brelse(rabp);
15f77b9b
BJ
168#ifdef TRACE
169 trace(TR_BREADHITRA, dev, blkno);
973ecc4f
BJ
170#endif
171 } else {
663dbc72 172 rabp->b_flags |= B_READ|B_ASYNC;
663dbc72 173 (*bdevsw[major(dev)].d_strategy)(rabp);
15f77b9b
BJ
174#ifdef TRACE
175 trace(TR_BREADMISSRA, dev, rablock);
973ecc4f 176#endif
663dbc72
BJ
177#ifdef DISKMON
178 io_info.nreada++;
179#endif
180 u.u_vm.vm_inblk++; /* pay in advance */
181 }
182 }
183 if(bp == NULL)
ad30fb67 184 return(bread(dev, blkno, size));
663dbc72
BJ
185 iowait(bp);
186 return(bp);
187}
188
189/*
190 * Write the buffer, waiting for completion.
191 * Then release the buffer.
192 */
193bwrite(bp)
194register struct buf *bp;
195{
196 register flag;
197
198 flag = bp->b_flags;
199 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
663dbc72
BJ
200#ifdef DISKMON
201 io_info.nwrite++;
202#endif
203 if ((flag&B_DELWRI) == 0)
204 u.u_vm.vm_oublk++; /* noone paid yet */
15f77b9b 205#ifdef TRACE
53f9ca20 206 trace(TR_BWRITE, bp->b_dev, bp->b_blkno);
973ecc4f 207#endif
663dbc72
BJ
208 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
209 if ((flag&B_ASYNC) == 0) {
210 iowait(bp);
211 brelse(bp);
212 } else if (flag & B_DELWRI)
213 bp->b_flags |= B_AGE;
214 else
215 geterror(bp);
216}
217
218/*
219 * Release the buffer, marking it so that if it is grabbed
220 * for another purpose it will be written out before being
221 * given up (e.g. when writing a partial block where it is
222 * assumed that another write for the same block will soon follow).
223 * This can't be done for magtape, since writes must be done
224 * in the same order as requested.
225 */
226bdwrite(bp)
227register struct buf *bp;
228{
e1e57888 229 register int flags;
663dbc72
BJ
230
231 if ((bp->b_flags&B_DELWRI) == 0)
232 u.u_vm.vm_oublk++; /* noone paid yet */
e1e57888
RE
233 flags = bdevsw[major(bp->b_dev)].d_flags;
234 if(flags & B_TAPE)
663dbc72
BJ
235 bawrite(bp);
236 else {
237 bp->b_flags |= B_DELWRI | B_DONE;
238 brelse(bp);
239 }
240}
241
242/*
243 * Release the buffer, start I/O on it, but don't wait for completion.
244 */
245bawrite(bp)
246register struct buf *bp;
247{
248
249 bp->b_flags |= B_ASYNC;
250 bwrite(bp);
251}
252
253/*
254 * release the buffer, with no I/O implied.
255 */
256brelse(bp)
257register struct buf *bp;
258{
46387ee3 259 register struct buf *flist;
663dbc72
BJ
260 register s;
261
262 if (bp->b_flags&B_WANTED)
263 wakeup((caddr_t)bp);
46387ee3
BJ
264 if (bfreelist[0].b_flags&B_WANTED) {
265 bfreelist[0].b_flags &= ~B_WANTED;
266 wakeup((caddr_t)bfreelist);
663dbc72 267 }
60a71525
BJ
268 if (bp->b_flags&B_ERROR)
269 if (bp->b_flags & B_LOCKED)
270 bp->b_flags &= ~B_ERROR; /* try again later */
271 else
272 bp->b_dev = NODEV; /* no assoc */
663dbc72 273 s = spl6();
46387ee3
BJ
274 if (bp->b_flags & (B_ERROR|B_INVAL)) {
275 /* block has no info ... put at front of most free list */
276 flist = &bfreelist[BQUEUES-1];
277 flist->av_forw->av_back = bp;
278 bp->av_forw = flist->av_forw;
279 flist->av_forw = bp;
280 bp->av_back = flist;
663dbc72 281 } else {
46387ee3
BJ
282 if (bp->b_flags & B_LOCKED)
283 flist = &bfreelist[BQ_LOCKED];
284 else if (bp->b_flags & B_AGE)
285 flist = &bfreelist[BQ_AGE];
286 else
287 flist = &bfreelist[BQ_LRU];
288 flist->av_back->av_forw = bp;
289 bp->av_back = flist->av_back;
290 flist->av_back = bp;
291 bp->av_forw = flist;
663dbc72
BJ
292 }
293 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
294 splx(s);
295}
296
297/*
298 * See if the block is associated with some buffer
299 * (mainly to avoid getting hung up on a wait in breada)
300 */
301incore(dev, blkno)
302dev_t dev;
303daddr_t blkno;
304{
305 register struct buf *bp;
46387ee3 306 register struct buf *dp;
663dbc72 307
ad30fb67 308 dp = BUFHASH(dev, blkno);
46387ee3 309 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
ad30fb67 310 if (bp->b_blkno == blkno && bp->b_dev == dev &&
46387ee3 311 !(bp->b_flags & B_INVAL))
5603d07d 312 return (1);
5603d07d 313 return (0);
663dbc72
BJ
314}
315
316struct buf *
ad30fb67
KM
317baddr(dev, blkno, size)
318 dev_t dev;
319 daddr_t blkno;
320 int size;
663dbc72
BJ
321{
322
323 if (incore(dev, blkno))
ad30fb67 324 return (bread(dev, blkno, size));
663dbc72
BJ
325 return (0);
326}
327
328/*
329 * Assign a buffer for the given block. If the appropriate
330 * block is already associated, return it; otherwise search
331 * for the oldest non-busy buffer and reassign it.
23900030
BJ
332 *
333 * We use splx here because this routine may be called
334 * on the interrupt stack during a dump, and we don't
335 * want to lower the ipl back to 0.
663dbc72
BJ
336 */
337struct buf *
ad30fb67
KM
338getblk(dev, blkno, size)
339 dev_t dev;
340 daddr_t blkno;
341 int size;
663dbc72 342{
5603d07d 343 register struct buf *bp, *dp, *ep;
5aa9d5ea
RE
344#ifdef DISKMON
345 register int i;
346#endif
23900030 347 int s;
663dbc72 348
01659974
BJ
349 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
350 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
ad30fb67 351 dp = BUFHASH(dev, blkno);
663dbc72 352 loop:
46387ee3 353 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
ad30fb67 354 if (bp->b_blkno != blkno || bp->b_dev != dev ||
46387ee3 355 bp->b_flags&B_INVAL)
663dbc72 356 continue;
23900030 357 s = spl6();
663dbc72
BJ
358 if (bp->b_flags&B_BUSY) {
359 bp->b_flags |= B_WANTED;
360 sleep((caddr_t)bp, PRIBIO+1);
23900030 361 splx(s);
663dbc72
BJ
362 goto loop;
363 }
23900030 364 splx(s);
663dbc72
BJ
365#ifdef DISKMON
366 i = 0;
367 dp = bp->av_forw;
46387ee3 368 while ((dp->b_flags & B_HEAD) == 0) {
663dbc72
BJ
369 i++;
370 dp = dp->av_forw;
371 }
4c05b581 372 if (i<64)
663dbc72
BJ
373 io_info.bufcount[i]++;
374#endif
375 notavail(bp);
ad30fb67 376 brealloc(bp, size);
663dbc72
BJ
377 bp->b_flags |= B_CACHE;
378 return(bp);
379 }
5603d07d
BJ
380 if (major(dev) >= nblkdev)
381 panic("blkdev");
23900030 382 s = spl6();
46387ee3
BJ
383 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--)
384 if (ep->av_forw != ep)
385 break;
386 if (ep == bfreelist) { /* no free blocks at all */
387 ep->b_flags |= B_WANTED;
388 sleep((caddr_t)ep, PRIBIO+1);
23900030 389 splx(s);
663dbc72
BJ
390 goto loop;
391 }
23900030 392 splx(s);
46387ee3 393 bp = ep->av_forw;
663dbc72
BJ
394 notavail(bp);
395 if (bp->b_flags & B_DELWRI) {
396 bp->b_flags |= B_ASYNC;
397 bwrite(bp);
398 goto loop;
399 }
15f77b9b 400#ifdef TRACE
53f9ca20 401 trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
973ecc4f 402#endif
663dbc72 403 bp->b_flags = B_BUSY;
ad30fb67 404 bfree(bp);
663dbc72
BJ
405 bp->b_back->b_forw = bp->b_forw;
406 bp->b_forw->b_back = bp->b_back;
407 bp->b_forw = dp->b_forw;
408 bp->b_back = dp;
409 dp->b_forw->b_back = bp;
410 dp->b_forw = bp;
411 bp->b_dev = dev;
ad30fb67
KM
412 bp->b_blkno = blkno;
413 brealloc(bp, size);
663dbc72
BJ
414 return(bp);
415}
416
417/*
418 * get an empty block,
419 * not assigned to any particular device
420 */
421struct buf *
ad30fb67
KM
422geteblk(size)
423 int size;
663dbc72 424{
436518b9 425 register struct buf *bp, *dp;
530d0032 426 int s;
663dbc72
BJ
427
428loop:
530d0032 429 s = spl6();
46387ee3
BJ
430 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--)
431 if (dp->av_forw != dp)
432 break;
433 if (dp == bfreelist) { /* no free blocks */
434 dp->b_flags |= B_WANTED;
435 sleep((caddr_t)dp, PRIBIO+1);
436 goto loop;
663dbc72 437 }
530d0032 438 splx(s);
46387ee3 439 bp = dp->av_forw;
663dbc72
BJ
440 notavail(bp);
441 if (bp->b_flags & B_DELWRI) {
442 bp->b_flags |= B_ASYNC;
443 bwrite(bp);
444 goto loop;
445 }
15f77b9b 446#ifdef TRACE
53f9ca20 447 trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
973ecc4f 448#endif
46387ee3 449 bp->b_flags = B_BUSY|B_INVAL;
663dbc72
BJ
450 bp->b_back->b_forw = bp->b_forw;
451 bp->b_forw->b_back = bp->b_back;
452 bp->b_forw = dp->b_forw;
453 bp->b_back = dp;
454 dp->b_forw->b_back = bp;
455 dp->b_forw = bp;
456 bp->b_dev = (dev_t)NODEV;
ad30fb67 457 bp->b_bcount = size;
663dbc72
BJ
458 return(bp);
459}
460
ad30fb67
KM
461/*
462 * Allocate space associated with a buffer.
463 */
464brealloc(bp, size)
465 register struct buf *bp;
466 int size;
467{
468 daddr_t start, last;
469 register struct buf *ep;
470 struct buf *dp;
471 int s;
472
473 /*
474 * First need to make sure that all overlaping previous I/O
475 * is dispatched with.
476 */
477 if (size == bp->b_bcount)
478 return;
479 if (size < bp->b_bcount) {
480 bp->b_bcount = size;
481 return;
482 }
483 start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE);
484 last = bp->b_blkno + (size / DEV_BSIZE) - 1;
485 if (bp->b_bcount == 0) {
486 start++;
487 if (start == last)
488 goto allocit;
489 }
490 dp = BUFHASH(bp->b_dev, bp->b_blkno);
491loop:
492 (void) spl0();
493 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
494 if (ep->b_blkno < start || ep->b_blkno > last ||
495 ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL)
496 continue;
497 s = spl6();
498 if (ep->b_flags&B_BUSY) {
499 ep->b_flags |= B_WANTED;
500 sleep((caddr_t)ep, PRIBIO+1);
501 splx(s);
502 goto loop;
503 }
504 (void) spl0();
505 /*
506 * What we would really like to do is kill this
507 * I/O since it is now useless. We cannot do that
508 * so we force it to complete, so that it cannot
509 * over-write our useful data later.
510 */
511 if (ep->b_flags & B_DELWRI) {
512 notavail(ep);
513 ep->b_flags |= B_ASYNC;
514 bwrite(ep);
515 goto loop;
516 }
517 }
518allocit:
519 /*
520 * Here the buffer is already available, so all we
521 * need to do is set the size. Someday a better memory
522 * management scheme will be implemented.
523 */
524 bp->b_bcount = size;
525}
526
527/*
528 * Release space associated with a buffer.
529 */
530bfree(bp)
531 struct buf *bp;
532{
533 /*
534 * Here the buffer does not change, so all we
535 * need to do is set the size. Someday a better memory
536 * management scheme will be implemented.
537 */
538 bp->b_bcount = 0;
539}
540
663dbc72
BJ
541/*
542 * Wait for I/O completion on the buffer; return errors
543 * to the user.
544 */
545iowait(bp)
ad30fb67 546 register struct buf *bp;
663dbc72 547{
530d0032 548 int s;
663dbc72 549
530d0032 550 s = spl6();
663dbc72
BJ
551 while ((bp->b_flags&B_DONE)==0)
552 sleep((caddr_t)bp, PRIBIO);
530d0032 553 splx(s);
663dbc72
BJ
554 geterror(bp);
555}
556
443c8066 557#ifdef UNFAST
663dbc72
BJ
558/*
559 * Unlink a buffer from the available list and mark it busy.
560 * (internal interface)
561 */
562notavail(bp)
563register struct buf *bp;
564{
565 register s;
566
567 s = spl6();
568 bp->av_back->av_forw = bp->av_forw;
569 bp->av_forw->av_back = bp->av_back;
570 bp->b_flags |= B_BUSY;
571 splx(s);
572}
573#endif
574
575/*
576 * Mark I/O complete on a buffer. If the header
577 * indicates a dirty page push completion, the
578 * header is inserted into the ``cleaned'' list
579 * to be processed by the pageout daemon. Otherwise
580 * release it if I/O is asynchronous, and wake
581 * up anyone waiting for it.
582 */
583iodone(bp)
584register struct buf *bp;
585{
586 register int s;
587
80e7c811
BJ
588 if (bp->b_flags & B_DONE)
589 panic("dup iodone");
663dbc72
BJ
590 bp->b_flags |= B_DONE;
591 if (bp->b_flags & B_DIRTY) {
592 if (bp->b_flags & B_ERROR)
593 panic("IO err in push");
594 s = spl6();
663dbc72
BJ
595 bp->av_forw = bclnlist;
596 bp->b_bcount = swsize[bp - swbuf];
597 bp->b_pfcent = swpf[bp - swbuf];
796c66c0
BJ
598 cnt.v_pgout++;
599 cnt.v_pgpgout += bp->b_bcount / NBPG;
663dbc72
BJ
600 bclnlist = bp;
601 if (bswlist.b_flags & B_WANTED)
602 wakeup((caddr_t)&proc[2]);
603 splx(s);
a3ee1d55 604 return;
663dbc72
BJ
605 }
606 if (bp->b_flags&B_ASYNC)
607 brelse(bp);
608 else {
609 bp->b_flags &= ~B_WANTED;
610 wakeup((caddr_t)bp);
611 }
612}
613
614/*
615 * Zero the core associated with a buffer.
616 */
617clrbuf(bp)
ad30fb67 618 struct buf *bp;
663dbc72 619{
ad30fb67
KM
620 register int *p;
621 register int c;
663dbc72
BJ
622
623 p = bp->b_un.b_words;
ad30fb67 624 c = bp->b_bcount/sizeof(int);
663dbc72
BJ
625 do
626 *p++ = 0;
627 while (--c);
628 bp->b_resid = 0;
629}
630
631/*
632 * swap I/O -
633 *
634 * If the flag indicates a dirty page push initiated
635 * by the pageout daemon, we map the page into the i th
636 * virtual page of process 2 (the daemon itself) where i is
637 * the index of the swap header that has been allocated.
638 * We simply initialize the header and queue the I/O but
639 * do not wait for completion. When the I/O completes,
640 * iodone() will link the header to a list of cleaned
641 * pages to be processed by the pageout daemon.
642 */
643swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
644 struct proc *p;
645 swblk_t dblkno;
646 caddr_t addr;
647 int flag, nbytes;
648 dev_t dev;
649 unsigned pfcent;
650{
651 register struct buf *bp;
652 register int c;
653 int p2dp;
654 register struct pte *dpte, *vpte;
530d0032 655 int s;
663dbc72 656
530d0032 657 s = spl6();
663dbc72
BJ
658 while (bswlist.av_forw == NULL) {
659 bswlist.b_flags |= B_WANTED;
660 sleep((caddr_t)&bswlist, PSWP+1);
661 }
662 bp = bswlist.av_forw;
663 bswlist.av_forw = bp->av_forw;
530d0032 664 splx(s);
663dbc72
BJ
665
666 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
667 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
668 if (rdflg == B_READ)
669 sum.v_pswpin += btoc(nbytes);
670 else
671 sum.v_pswpout += btoc(nbytes);
672 bp->b_proc = p;
673 if (flag & B_DIRTY) {
674 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
675 dpte = dptopte(&proc[2], p2dp);
676 vpte = vtopte(p, btop(addr));
677 for (c = 0; c < nbytes; c += NBPG) {
678 if (vpte->pg_pfnum == 0 || vpte->pg_fod)
679 panic("swap bad pte");
680 *dpte++ = *vpte++;
681 }
682 bp->b_un.b_addr = (caddr_t)ctob(p2dp);
683 } else
684 bp->b_un.b_addr = addr;
685 while (nbytes > 0) {
686 c = imin(ctob(120), nbytes);
687 bp->b_bcount = c;
688 bp->b_blkno = dblkno;
689 bp->b_dev = dev;
d2f87136
BJ
690 if (flag & B_DIRTY) {
691 swpf[bp - swbuf] = pfcent;
692 swsize[bp - swbuf] = nbytes;
693 }
53f9ca20
BJ
694#ifdef TRACE
695 trace(TR_SWAPIO, dev, bp->b_blkno);
696#endif
663dbc72
BJ
697 (*bdevsw[major(dev)].d_strategy)(bp);
698 if (flag & B_DIRTY) {
699 if (c < nbytes)
700 panic("big push");
663dbc72
BJ
701 return;
702 }
530d0032 703 s = spl6();
663dbc72
BJ
704 while((bp->b_flags&B_DONE)==0)
705 sleep((caddr_t)bp, PSWP);
530d0032 706 splx(s);
663dbc72
BJ
707 bp->b_un.b_addr += c;
708 bp->b_flags &= ~B_DONE;
709 if (bp->b_flags & B_ERROR) {
710 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
711 panic("hard IO err in swap");
712 swkill(p, (char *)0);
713 }
714 nbytes -= c;
715 dblkno += btoc(c);
716 }
530d0032 717 s = spl6();
663dbc72
BJ
718 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
719 bp->av_forw = bswlist.av_forw;
720 bswlist.av_forw = bp;
721 if (bswlist.b_flags & B_WANTED) {
722 bswlist.b_flags &= ~B_WANTED;
723 wakeup((caddr_t)&bswlist);
724 wakeup((caddr_t)&proc[2]);
725 }
530d0032 726 splx(s);
663dbc72
BJ
727}
728
729/*
730 * If rout == 0 then killed on swap error, else
731 * rout is the name of the routine where we ran out of
732 * swap space.
733 */
734swkill(p, rout)
735 struct proc *p;
736 char *rout;
737{
444f631c 738 char *mesg;
663dbc72 739
444f631c 740 printf("pid %d: ", p->p_pid);
663dbc72 741 if (rout)
444f631c 742 printf(mesg = "killed due to no swap space\n");
663dbc72 743 else
444f631c
BJ
744 printf(mesg = "killed on swap error\n");
745 uprintf("sorry, pid %d was %s", p->p_pid, mesg);
663dbc72
BJ
746 /*
747 * To be sure no looping (e.g. in vmsched trying to
748 * swap out) mark process locked in core (as though
749 * done by user) after killing it so noone will try
750 * to swap it out.
751 */
a30d2e97 752 psignal(p, SIGKILL);
663dbc72
BJ
753 p->p_flag |= SULOCK;
754}
755
756/*
757 * make sure all write-behind blocks
758 * on dev (or NODEV for all)
759 * are flushed out.
760 * (from umount and update)
ad30fb67 761 * (and temporarily pagein)
663dbc72
BJ
762 */
763bflush(dev)
764dev_t dev;
765{
766 register struct buf *bp;
46387ee3 767 register struct buf *flist;
530d0032 768 int s;
663dbc72
BJ
769
770loop:
530d0032 771 s = spl6();
46387ee3
BJ
772 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++)
773 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
663dbc72
BJ
774 if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
775 bp->b_flags |= B_ASYNC;
776 notavail(bp);
777 bwrite(bp);
778 goto loop;
779 }
780 }
530d0032 781 splx(s);
663dbc72
BJ
782}
783
784/*
785 * Raw I/O. The arguments are
786 * The strategy routine for the device
787 * A buffer, which will always be a special buffer
788 * header owned exclusively by the device for this purpose
789 * The device number
790 * Read/write flag
791 * Essentially all the work is computing physical addresses and
792 * validating them.
793 * If the user has the proper access privilidges, the process is
794 * marked 'delayed unlock' and the pages involved in the I/O are
795 * faulted and locked. After the completion of the I/O, the above pages
796 * are unlocked.
797 */
798physio(strat, bp, dev, rw, mincnt)
799int (*strat)();
800register struct buf *bp;
801unsigned (*mincnt)();
802{
803 register int c;
804 char *a;
530d0032 805 int s;
663dbc72
BJ
806
807 if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
808 u.u_error = EFAULT;
809 return;
810 }
530d0032 811 s = spl6();
663dbc72
BJ
812 while (bp->b_flags&B_BUSY) {
813 bp->b_flags |= B_WANTED;
814 sleep((caddr_t)bp, PRIBIO+1);
815 }
ef3b3d5a 816 splx(s);
663dbc72
BJ
817 bp->b_error = 0;
818 bp->b_proc = u.u_procp;
819 bp->b_un.b_addr = u.u_base;
52a593fa 820 while (u.u_count != 0) {
663dbc72
BJ
821 bp->b_flags = B_BUSY | B_PHYS | rw;
822 bp->b_dev = dev;
823 bp->b_blkno = u.u_offset >> PGSHIFT;
824 bp->b_bcount = u.u_count;
825 (*mincnt)(bp);
826 c = bp->b_bcount;
827 u.u_procp->p_flag |= SPHYSIO;
828 vslock(a = bp->b_un.b_addr, c);
829 (*strat)(bp);
81263dba 830 (void) spl6();
663dbc72
BJ
831 while ((bp->b_flags&B_DONE) == 0)
832 sleep((caddr_t)bp, PRIBIO);
833 vsunlock(a, c, rw);
834 u.u_procp->p_flag &= ~SPHYSIO;
835 if (bp->b_flags&B_WANTED)
836 wakeup((caddr_t)bp);
530d0032 837 splx(s);
663dbc72
BJ
838 bp->b_un.b_addr += c;
839 u.u_count -= c;
840 u.u_offset += c;
52a593fa
BJ
841 if (bp->b_flags&B_ERROR)
842 break;
663dbc72
BJ
843 }
844 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
845 u.u_count = bp->b_resid;
846 geterror(bp);
847}
848
849/*ARGSUSED*/
850unsigned
851minphys(bp)
852struct buf *bp;
853{
854
2ec65c94
BJ
855 if (bp->b_bcount > 63 * 1024)
856 bp->b_bcount = 63 * 1024;
663dbc72
BJ
857}
858
ad30fb67 859
663dbc72
BJ
860/*
861 * Pick up the device's error number and pass it to the user;
862 * if there is an error but the number is 0 set a generalized
863 * code. Actually the latter is always true because devices
864 * don't yet return specific errors.
865 */
866geterror(bp)
867register struct buf *bp;
868{
869
870 if (bp->b_flags&B_ERROR)
871 if ((u.u_error = bp->b_error)==0)
872 u.u_error = EIO;
873}
7b8b5a01
RE
874
875/*
876 * Invalidate in core blocks belonging to closed or umounted filesystem
877 *
878 * This is not nicely done at all - the buffer ought to be removed from the
879 * hash chains & have its dev/blkno fields clobbered, but unfortunately we
880 * can't do that here, as it is quite possible that the block is still
881 * being used for i/o. Eventually, all disc drivers should be forced to
882 * have a close routine, which ought ensure that the queue is empty, then
883 * properly flush the queues. Until that happy day, this suffices for
884 * correctness. ... kre
885 */
886binval(dev)
887dev_t dev;
888{
634ebdbe
RE
889 register struct buf *bp;
890 register struct bufhd *hp;
891#define dp ((struct buf *)hp)
7b8b5a01 892
634ebdbe
RE
893 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
894 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
895 if (bp->b_dev == dev)
896 bp->b_flags |= B_INVAL;
7b8b5a01 897}