nulldev and nullioctl return values
[unix-history] / usr / src / sys / kern / vfs_bio.c
CommitLineData
fb99a9a1 1/* vfs_bio.c 4.36 82/09/04 */
663dbc72
BJ
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/dir.h"
6#include "../h/user.h"
7#include "../h/buf.h"
8#include "../h/conf.h"
9#include "../h/proc.h"
10#include "../h/seg.h"
11#include "../h/pte.h"
12#include "../h/vm.h"
973ecc4f 13#include "../h/trace.h"
663dbc72 14
9d6d37ce
BJ
15int bioprintfs = 0;
16
663dbc72
BJ
17/*
18 * Read in (if necessary) the block and return a buffer pointer.
19 */
20struct buf *
ad30fb67
KM
21bread(dev, blkno, size)
22 dev_t dev;
23 daddr_t blkno;
24 int size;
663dbc72
BJ
25{
26 register struct buf *bp;
27
ad30fb67 28 bp = getblk(dev, blkno, size);
663dbc72 29 if (bp->b_flags&B_DONE) {
15f77b9b 30 trace(TR_BREADHIT, dev, blkno);
663dbc72
BJ
31 return(bp);
32 }
33 bp->b_flags |= B_READ;
663dbc72 34 (*bdevsw[major(dev)].d_strategy)(bp);
15f77b9b 35 trace(TR_BREADMISS, dev, blkno);
fb99a9a1 36 u.u_ru.ru_inblock++; /* pay for read */
3efdd860 37 biowait(bp);
663dbc72
BJ
38 return(bp);
39}
40
41/*
42 * Read in the block, like bread, but also start I/O on the
43 * read-ahead block (which is not allocated to the caller)
44 */
45struct buf *
84baaab3 46breada(dev, blkno, size, rablkno, rasize)
ad30fb67 47 dev_t dev;
84baaab3
KM
48 daddr_t blkno; int size;
49 daddr_t rablkno; int rasize;
663dbc72
BJ
50{
51 register struct buf *bp, *rabp;
52
53 bp = NULL;
3efdd860
KM
54 /*
55 * If the block isn't in core, then allocate
56 * a buffer and initiate i/o (getblk checks
57 * for a cache hit).
58 */
663dbc72 59 if (!incore(dev, blkno)) {
ad30fb67 60 bp = getblk(dev, blkno, size);
663dbc72
BJ
61 if ((bp->b_flags&B_DONE) == 0) {
62 bp->b_flags |= B_READ;
663dbc72 63 (*bdevsw[major(dev)].d_strategy)(bp);
15f77b9b 64 trace(TR_BREADMISS, dev, blkno);
fb99a9a1 65 u.u_ru.ru_inblock++; /* pay for read */
3efdd860 66 } else
15f77b9b 67 trace(TR_BREADHIT, dev, blkno);
663dbc72 68 }
3efdd860
KM
69
70 /*
71 * If there's a read-ahead block, start i/o
72 * on it also (as above).
73 */
663dbc72 74 if (rablkno && !incore(dev, rablkno)) {
84baaab3 75 rabp = getblk(dev, rablkno, rasize);
973ecc4f 76 if (rabp->b_flags & B_DONE) {
663dbc72 77 brelse(rabp);
15f77b9b 78 trace(TR_BREADHITRA, dev, blkno);
973ecc4f 79 } else {
663dbc72 80 rabp->b_flags |= B_READ|B_ASYNC;
663dbc72 81 (*bdevsw[major(dev)].d_strategy)(rabp);
15f77b9b 82 trace(TR_BREADMISSRA, dev, rablock);
fb99a9a1 83 u.u_ru.ru_inblock++; /* pay in advance */
663dbc72
BJ
84 }
85 }
3efdd860
KM
86
87 /*
84baaab3
KM
88 * If block was in core, let bread get it.
89 * If block wasn't in core, then the read was started
90 * above, and just wait for it.
3efdd860 91 */
84baaab3
KM
92 if (bp == NULL)
93 return (bread(dev, blkno, size));
3efdd860 94 biowait(bp);
84baaab3 95 return (bp);
663dbc72
BJ
96}
97
98/*
99 * Write the buffer, waiting for completion.
100 * Then release the buffer.
101 */
102bwrite(bp)
3efdd860 103 register struct buf *bp;
663dbc72
BJ
104{
105 register flag;
106
107 flag = bp->b_flags;
108 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
663dbc72 109 if ((flag&B_DELWRI) == 0)
fb99a9a1 110 u.u_ru.ru_oublock++; /* noone paid yet */
53f9ca20 111 trace(TR_BWRITE, bp->b_dev, bp->b_blkno);
9d6d37ce
BJ
112if (bioprintfs)
113printf("write %x blk %d count %d\n", bp->b_dev, bp->b_blkno, bp->b_bcount);
663dbc72 114 (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
3efdd860
KM
115
116 /*
117 * If the write was synchronous, then await i/o completion.
118 * If the write was "delayed", then we put the buffer on
119 * the q of blocks awaiting i/o completion status.
120 * Otherwise, the i/o must be finished and we check for
121 * an error.
122 */
663dbc72 123 if ((flag&B_ASYNC) == 0) {
3efdd860 124 biowait(bp);
663dbc72
BJ
125 brelse(bp);
126 } else if (flag & B_DELWRI)
127 bp->b_flags |= B_AGE;
128 else
d6d7360b 129 u.u_error = geterror(bp);
663dbc72
BJ
130}
131
132/*
133 * Release the buffer, marking it so that if it is grabbed
134 * for another purpose it will be written out before being
135 * given up (e.g. when writing a partial block where it is
136 * assumed that another write for the same block will soon follow).
137 * This can't be done for magtape, since writes must be done
138 * in the same order as requested.
139 */
140bdwrite(bp)
3efdd860 141 register struct buf *bp;
663dbc72 142{
e1e57888 143 register int flags;
663dbc72
BJ
144
145 if ((bp->b_flags&B_DELWRI) == 0)
fb99a9a1 146 u.u_ru.ru_oublock++; /* noone paid yet */
e1e57888
RE
147 flags = bdevsw[major(bp->b_dev)].d_flags;
148 if(flags & B_TAPE)
663dbc72
BJ
149 bawrite(bp);
150 else {
151 bp->b_flags |= B_DELWRI | B_DONE;
152 brelse(bp);
153 }
154}
155
156/*
157 * Release the buffer, start I/O on it, but don't wait for completion.
158 */
159bawrite(bp)
3efdd860 160 register struct buf *bp;
663dbc72
BJ
161{
162
163 bp->b_flags |= B_ASYNC;
164 bwrite(bp);
165}
166
167/*
3efdd860 168 * Release the buffer, with no I/O implied.
663dbc72
BJ
169 */
170brelse(bp)
3efdd860 171 register struct buf *bp;
663dbc72 172{
46387ee3 173 register struct buf *flist;
663dbc72
BJ
174 register s;
175
3efdd860
KM
176 /*
177 * If someone's waiting for the buffer, or
178 * is waiting for a buffer wake 'em up.
179 */
663dbc72
BJ
180 if (bp->b_flags&B_WANTED)
181 wakeup((caddr_t)bp);
46387ee3
BJ
182 if (bfreelist[0].b_flags&B_WANTED) {
183 bfreelist[0].b_flags &= ~B_WANTED;
184 wakeup((caddr_t)bfreelist);
663dbc72 185 }
60a71525
BJ
186 if (bp->b_flags&B_ERROR)
187 if (bp->b_flags & B_LOCKED)
188 bp->b_flags &= ~B_ERROR; /* try again later */
189 else
190 bp->b_dev = NODEV; /* no assoc */
3efdd860
KM
191
192 /*
193 * Stick the buffer back on a free list.
194 */
663dbc72 195 s = spl6();
46387ee3
BJ
196 if (bp->b_flags & (B_ERROR|B_INVAL)) {
197 /* block has no info ... put at front of most free list */
198 flist = &bfreelist[BQUEUES-1];
3efdd860 199 binsheadfree(bp, flist);
663dbc72 200 } else {
46387ee3
BJ
201 if (bp->b_flags & B_LOCKED)
202 flist = &bfreelist[BQ_LOCKED];
203 else if (bp->b_flags & B_AGE)
204 flist = &bfreelist[BQ_AGE];
205 else
206 flist = &bfreelist[BQ_LRU];
3efdd860 207 binstailfree(bp, flist);
663dbc72
BJ
208 }
209 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
210 splx(s);
211}
212
213/*
214 * See if the block is associated with some buffer
215 * (mainly to avoid getting hung up on a wait in breada)
216 */
217incore(dev, blkno)
3efdd860
KM
218 dev_t dev;
219 daddr_t blkno;
663dbc72
BJ
220{
221 register struct buf *bp;
46387ee3 222 register struct buf *dp;
663dbc72 223
ad30fb67 224 dp = BUFHASH(dev, blkno);
46387ee3 225 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
ad30fb67 226 if (bp->b_blkno == blkno && bp->b_dev == dev &&
3efdd860 227 (bp->b_flags & B_INVAL) == 0)
5603d07d 228 return (1);
5603d07d 229 return (0);
663dbc72
BJ
230}
231
232struct buf *
ad30fb67
KM
233baddr(dev, blkno, size)
234 dev_t dev;
235 daddr_t blkno;
236 int size;
663dbc72
BJ
237{
238
239 if (incore(dev, blkno))
ad30fb67 240 return (bread(dev, blkno, size));
663dbc72
BJ
241 return (0);
242}
243
244/*
245 * Assign a buffer for the given block. If the appropriate
246 * block is already associated, return it; otherwise search
247 * for the oldest non-busy buffer and reassign it.
23900030
BJ
248 *
249 * We use splx here because this routine may be called
250 * on the interrupt stack during a dump, and we don't
251 * want to lower the ipl back to 0.
663dbc72
BJ
252 */
253struct buf *
ad30fb67
KM
254getblk(dev, blkno, size)
255 dev_t dev;
256 daddr_t blkno;
257 int size;
663dbc72 258{
5603d07d 259 register struct buf *bp, *dp, *ep;
23900030 260 int s;
663dbc72 261
01659974
BJ
262 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
263 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
3efdd860
KM
264 /*
265 * Search the cache for the block. If we hit, but
266 * the buffer is in use for i/o, then we wait until
267 * the i/o has completed.
268 */
ad30fb67 269 dp = BUFHASH(dev, blkno);
3efdd860 270loop:
46387ee3 271 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
ad30fb67 272 if (bp->b_blkno != blkno || bp->b_dev != dev ||
46387ee3 273 bp->b_flags&B_INVAL)
663dbc72 274 continue;
23900030 275 s = spl6();
663dbc72
BJ
276 if (bp->b_flags&B_BUSY) {
277 bp->b_flags |= B_WANTED;
278 sleep((caddr_t)bp, PRIBIO+1);
23900030 279 splx(s);
663dbc72
BJ
280 goto loop;
281 }
23900030 282 splx(s);
663dbc72 283 notavail(bp);
9d6d37ce
BJ
284 if (brealloc(bp, size) == 0)
285 goto loop;
663dbc72
BJ
286 bp->b_flags |= B_CACHE;
287 return(bp);
288 }
5603d07d
BJ
289 if (major(dev) >= nblkdev)
290 panic("blkdev");
3efdd860
KM
291 /*
292 * Not found in the cache, select something from
293 * a free list. Preference is to LRU list, then AGE list.
294 */
23900030 295 s = spl6();
46387ee3
BJ
296 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--)
297 if (ep->av_forw != ep)
298 break;
299 if (ep == bfreelist) { /* no free blocks at all */
300 ep->b_flags |= B_WANTED;
301 sleep((caddr_t)ep, PRIBIO+1);
23900030 302 splx(s);
663dbc72
BJ
303 goto loop;
304 }
23900030 305 splx(s);
46387ee3 306 bp = ep->av_forw;
663dbc72
BJ
307 notavail(bp);
308 if (bp->b_flags & B_DELWRI) {
309 bp->b_flags |= B_ASYNC;
310 bwrite(bp);
311 goto loop;
312 }
53f9ca20 313 trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
663dbc72 314 bp->b_flags = B_BUSY;
ad30fb67 315 bfree(bp);
3efdd860
KM
316 bremhash(bp);
317 binshash(bp, dp);
663dbc72 318 bp->b_dev = dev;
ad30fb67 319 bp->b_blkno = blkno;
9d6d37ce
BJ
320 if (brealloc(bp, size) == 0)
321 goto loop;
663dbc72
BJ
322 return(bp);
323}
324
325/*
326 * get an empty block,
327 * not assigned to any particular device
328 */
329struct buf *
ad30fb67
KM
330geteblk(size)
331 int size;
663dbc72 332{
436518b9 333 register struct buf *bp, *dp;
530d0032 334 int s;
663dbc72
BJ
335
336loop:
530d0032 337 s = spl6();
46387ee3
BJ
338 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--)
339 if (dp->av_forw != dp)
340 break;
341 if (dp == bfreelist) { /* no free blocks */
342 dp->b_flags |= B_WANTED;
343 sleep((caddr_t)dp, PRIBIO+1);
344 goto loop;
663dbc72 345 }
530d0032 346 splx(s);
46387ee3 347 bp = dp->av_forw;
663dbc72
BJ
348 notavail(bp);
349 if (bp->b_flags & B_DELWRI) {
350 bp->b_flags |= B_ASYNC;
351 bwrite(bp);
352 goto loop;
353 }
53f9ca20 354 trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
46387ee3 355 bp->b_flags = B_BUSY|B_INVAL;
3efdd860
KM
356 bfree(bp);
357 bremhash(bp);
358 binshash(bp, dp);
663dbc72 359 bp->b_dev = (dev_t)NODEV;
9d6d37ce
BJ
360 if (brealloc(bp, size) == 0)
361 goto loop;
663dbc72
BJ
362 return(bp);
363}
364
ad30fb67
KM
365/*
366 * Allocate space associated with a buffer.
367 */
368brealloc(bp, size)
369 register struct buf *bp;
370 int size;
371{
372 daddr_t start, last;
373 register struct buf *ep;
374 struct buf *dp;
375 int s;
376
377 /*
378 * First need to make sure that all overlaping previous I/O
379 * is dispatched with.
380 */
381 if (size == bp->b_bcount)
9d6d37ce
BJ
382 return (1);
383 if (size < bp->b_bcount) {
384 if (bp->b_flags & B_DELWRI) {
385 bwrite(bp);
386 return (0);
387 }
388 if (bp->b_flags & B_LOCKED)
389 panic("brealloc");
f2965901 390 goto allocit;
ad30fb67 391 }
9d6d37ce
BJ
392 bp->b_flags &= ~B_DONE;
393 if (bp->b_dev == NODEV)
394 goto allocit;
395
396 /*
397 * Search cache for any buffers that overlap the one that we
398 * are trying to allocate. Overlapping buffers must be marked
399 * invalid, after being written out if they are dirty. (indicated
400 * by B_DELWRI) A disk block must be mapped by at most one buffer
401 * at any point in time. Care must be taken to avoid deadlocking
402 * when two buffer are trying to get the same set of disk blocks.
403 */
404 start = bp->b_blkno;
405 last = start + (size / DEV_BSIZE) - 1;
ad30fb67
KM
406 dp = BUFHASH(bp->b_dev, bp->b_blkno);
407loop:
ad30fb67 408 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
9d6d37ce
BJ
409 if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL))
410 continue;
411 /* look for overlap */
412 if (ep->b_bcount == 0 || ep->b_blkno > last ||
413 ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start)
ad30fb67 414 continue;
9d6d37ce
BJ
415if (bioprintfs)
416if (ep->b_flags&B_BUSY)
417printf("sleeping on:dev 0x%x, blks %d-%d, flg 0%o allocing dev 0x%x, blks %d-%d, flg 0%o\n",
418ep->b_dev, ep->b_blkno, ep->b_blkno + (ep->b_bcount / DEV_BSIZE) - 1,
419ep->b_flags, bp->b_dev, start, last, bp->b_flags);
ad30fb67
KM
420 s = spl6();
421 if (ep->b_flags&B_BUSY) {
422 ep->b_flags |= B_WANTED;
423 sleep((caddr_t)ep, PRIBIO+1);
3efdd860 424 (void) splx(s);
ad30fb67
KM
425 goto loop;
426 }
3efdd860 427 (void) splx(s);
9d6d37ce 428 notavail(ep);
ad30fb67 429 if (ep->b_flags & B_DELWRI) {
9d6d37ce
BJ
430if (bioprintfs)
431printf("DELWRI:dev 0x%x, blks %d-%d, flg 0%o allocing dev 0x%x, blks %d-%d, flg 0%o\n",
432ep->b_dev, ep->b_blkno, ep->b_blkno + (ep->b_bcount / DEV_BSIZE) - 1,
433ep->b_flags, bp->b_dev, start, last, bp->b_flags);
ad30fb67
KM
434 bwrite(ep);
435 goto loop;
436 }
9d6d37ce
BJ
437 ep->b_flags |= B_INVAL;
438 brelse(ep);
ad30fb67
KM
439 }
440allocit:
441 /*
442 * Here the buffer is already available, so all we
443 * need to do is set the size. Someday a better memory
444 * management scheme will be implemented.
445 */
446 bp->b_bcount = size;
9d6d37ce 447 return (1);
ad30fb67
KM
448}
449
450/*
451 * Release space associated with a buffer.
452 */
453bfree(bp)
454 struct buf *bp;
455{
456 /*
457 * Here the buffer does not change, so all we
458 * need to do is set the size. Someday a better memory
459 * management scheme will be implemented.
460 */
461 bp->b_bcount = 0;
462}
463
663dbc72
BJ
464/*
465 * Wait for I/O completion on the buffer; return errors
466 * to the user.
467 */
3efdd860 468biowait(bp)
ad30fb67 469 register struct buf *bp;
663dbc72 470{
530d0032 471 int s;
663dbc72 472
530d0032 473 s = spl6();
663dbc72
BJ
474 while ((bp->b_flags&B_DONE)==0)
475 sleep((caddr_t)bp, PRIBIO);
530d0032 476 splx(s);
d6d7360b 477 u.u_error = geterror(bp);
663dbc72
BJ
478}
479
663dbc72
BJ
480/*
481 * Mark I/O complete on a buffer. If the header
482 * indicates a dirty page push completion, the
483 * header is inserted into the ``cleaned'' list
484 * to be processed by the pageout daemon. Otherwise
485 * release it if I/O is asynchronous, and wake
486 * up anyone waiting for it.
487 */
3efdd860
KM
488biodone(bp)
489 register struct buf *bp;
663dbc72
BJ
490{
491 register int s;
492
80e7c811 493 if (bp->b_flags & B_DONE)
3efdd860 494 panic("dup biodone");
663dbc72
BJ
495 bp->b_flags |= B_DONE;
496 if (bp->b_flags & B_DIRTY) {
497 if (bp->b_flags & B_ERROR)
498 panic("IO err in push");
499 s = spl6();
663dbc72
BJ
500 bp->av_forw = bclnlist;
501 bp->b_bcount = swsize[bp - swbuf];
502 bp->b_pfcent = swpf[bp - swbuf];
796c66c0
BJ
503 cnt.v_pgout++;
504 cnt.v_pgpgout += bp->b_bcount / NBPG;
663dbc72
BJ
505 bclnlist = bp;
506 if (bswlist.b_flags & B_WANTED)
507 wakeup((caddr_t)&proc[2]);
508 splx(s);
a3ee1d55 509 return;
663dbc72
BJ
510 }
511 if (bp->b_flags&B_ASYNC)
512 brelse(bp);
513 else {
514 bp->b_flags &= ~B_WANTED;
515 wakeup((caddr_t)bp);
516 }
517}
518
663dbc72
BJ
519/*
520 * make sure all write-behind blocks
521 * on dev (or NODEV for all)
522 * are flushed out.
523 * (from umount and update)
ad30fb67 524 * (and temporarily pagein)
663dbc72
BJ
525 */
526bflush(dev)
3efdd860 527 dev_t dev;
663dbc72
BJ
528{
529 register struct buf *bp;
46387ee3 530 register struct buf *flist;
530d0032 531 int s;
663dbc72
BJ
532
533loop:
530d0032 534 s = spl6();
46387ee3
BJ
535 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++)
536 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
3efdd860
KM
537 if ((bp->b_flags & B_DELWRI) == 0)
538 continue;
539 if (dev == NODEV || dev == bp->b_dev) {
663dbc72
BJ
540 bp->b_flags |= B_ASYNC;
541 notavail(bp);
542 bwrite(bp);
543 goto loop;
544 }
545 }
530d0032 546 splx(s);
663dbc72
BJ
547}
548
663dbc72
BJ
549/*
550 * Pick up the device's error number and pass it to the user;
551 * if there is an error but the number is 0 set a generalized
552 * code. Actually the latter is always true because devices
553 * don't yet return specific errors.
554 */
555geterror(bp)
3efdd860 556 register struct buf *bp;
663dbc72 557{
d6d7360b 558 int error = 0;
663dbc72
BJ
559
560 if (bp->b_flags&B_ERROR)
d6d7360b
BJ
561 if ((error = bp->b_error)==0)
562 return (EIO);
563 return (error);
663dbc72 564}
7b8b5a01
RE
565
566/*
567 * Invalidate in core blocks belonging to closed or umounted filesystem
568 *
569 * This is not nicely done at all - the buffer ought to be removed from the
570 * hash chains & have its dev/blkno fields clobbered, but unfortunately we
571 * can't do that here, as it is quite possible that the block is still
572 * being used for i/o. Eventually, all disc drivers should be forced to
573 * have a close routine, which ought ensure that the queue is empty, then
574 * properly flush the queues. Until that happy day, this suffices for
575 * correctness. ... kre
576 */
577binval(dev)
3efdd860 578 dev_t dev;
7b8b5a01 579{
634ebdbe
RE
580 register struct buf *bp;
581 register struct bufhd *hp;
582#define dp ((struct buf *)hp)
7b8b5a01 583
634ebdbe
RE
584 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
585 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
586 if (bp->b_dev == dev)
587 bp->b_flags |= B_INVAL;
7b8b5a01 588}