expand vfs_add inline; allow update of mounted file systems
[unix-history] / usr / src / sys / kern / vfs_bio.c
CommitLineData
da7c5cc6 1/*
7188ac27
KM
2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3 * All rights reserved.
da7c5cc6 4 *
7188ac27
KM
5 * Redistribution and use in source and binary forms are permitted
6 * provided that the above copyright notice and this paragraph are
7 * duplicated in all such forms and that any documentation,
8 * advertising materials, and other materials related to such
9 * distribution and use acknowledge that the software was developed
10 * by the University of California, Berkeley. The name of the
11 * University may not be used to endorse or promote products derived
12 * from this software without specific prior written permission.
13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 *
0e980590 17 * @(#)vfs_bio.c 7.13 (Berkeley) %G%
da7c5cc6 18 */
961945a8 19
94368568 20#include "param.h"
94368568
JB
21#include "user.h"
22#include "buf.h"
7188ac27 23#include "vnode.h"
94368568 24#include "trace.h"
a937f856 25#include "ucred.h"
663dbc72 26
663dbc72
BJ
27/*
28 * Read in (if necessary) the block and return a buffer pointer.
29 */
a937f856 30bread(vp, blkno, size, cred, bpp)
7188ac27 31 struct vnode *vp;
ad30fb67
KM
32 daddr_t blkno;
33 int size;
a937f856 34 struct ucred *cred;
7188ac27 35 struct buf **bpp;
ec67a3ce
MK
36#ifdef SECSIZE
37 long secsize;
38#endif SECSIZE
663dbc72
BJ
39{
40 register struct buf *bp;
41
4f083fd7
SL
42 if (size == 0)
43 panic("bread: size 0");
ec67a3ce
MK
44#ifdef SECSIZE
45 bp = getblk(dev, blkno, size, secsize);
46#else SECSIZE
7188ac27 47 *bpp = bp = getblk(vp, blkno, size);
ec67a3ce 48#endif SECSIZE
32a56bda 49 if (bp->b_flags&(B_DONE|B_DELWRI)) {
7188ac27
KM
50 trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno);
51 return (0);
663dbc72
BJ
52 }
53 bp->b_flags |= B_READ;
4f083fd7
SL
54 if (bp->b_bcount > bp->b_bufsize)
55 panic("bread");
a937f856
KM
56 if (bp->b_rcred == NOCRED && cred != NOCRED) {
57 crhold(cred);
58 bp->b_rcred = cred;
59 }
7188ac27
KM
60 VOP_STRATEGY(bp);
61 trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno);
fb99a9a1 62 u.u_ru.ru_inblock++; /* pay for read */
7188ac27 63 return (biowait(bp));
663dbc72
BJ
64}
65
66/*
67 * Read in the block, like bread, but also start I/O on the
68 * read-ahead block (which is not allocated to the caller)
69 */
a937f856 70breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
7188ac27 71 struct vnode *vp;
84baaab3 72 daddr_t blkno; int size;
ec67a3ce
MK
73#ifdef SECSIZE
74 long secsize;
75#endif SECSIZE
a8d3bf7f 76 daddr_t rablkno; int rabsize;
a937f856 77 struct ucred *cred;
7188ac27 78 struct buf **bpp;
663dbc72
BJ
79{
80 register struct buf *bp, *rabp;
81
82 bp = NULL;
3efdd860
KM
83 /*
84 * If the block isn't in core, then allocate
85 * a buffer and initiate i/o (getblk checks
86 * for a cache hit).
87 */
7188ac27
KM
88 if (!incore(vp, blkno)) {
89 *bpp = bp = getblk(vp, blkno, size);
ec67a3ce 90#endif SECSIZE
32a56bda 91 if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) {
663dbc72 92 bp->b_flags |= B_READ;
4f083fd7
SL
93 if (bp->b_bcount > bp->b_bufsize)
94 panic("breada");
a937f856
KM
95 if (bp->b_rcred == NOCRED && cred != NOCRED) {
96 crhold(cred);
97 bp->b_rcred = cred;
98 }
7188ac27
KM
99 VOP_STRATEGY(bp);
100 trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size),
101 blkno);
fb99a9a1 102 u.u_ru.ru_inblock++; /* pay for read */
3efdd860 103 } else
7188ac27
KM
104 trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size),
105 blkno);
663dbc72 106 }
3efdd860
KM
107
108 /*
109 * If there's a read-ahead block, start i/o
110 * on it also (as above).
111 */
7188ac27
KM
112 if (rablkno && !incore(vp, rablkno)) {
113 rabp = getblk(vp, rablkno, rabsize);
ec67a3ce 114#endif SECSIZE
32a56bda 115 if (rabp->b_flags & (B_DONE|B_DELWRI)) {
663dbc72 116 brelse(rabp);
7188ac27 117 trace(TR_BREADHITRA,
5062ac4a 118 pack(vp->v_mount->m_fsid[0], rabsize), rablkno);
973ecc4f 119 } else {
663dbc72 120 rabp->b_flags |= B_READ|B_ASYNC;
4f083fd7
SL
121 if (rabp->b_bcount > rabp->b_bufsize)
122 panic("breadrabp");
5062ac4a 123 if (rabp->b_rcred == NOCRED && cred != NOCRED) {
a937f856 124 crhold(cred);
5062ac4a 125 rabp->b_rcred = cred;
a937f856 126 }
7188ac27
KM
127 VOP_STRATEGY(rabp);
128 trace(TR_BREADMISSRA,
5062ac4a 129 pack(vp->v_mount->m_fsid[0], rabsize), rablkno);
fb99a9a1 130 u.u_ru.ru_inblock++; /* pay in advance */
663dbc72
BJ
131 }
132 }
3efdd860
KM
133
134 /*
84baaab3
KM
135 * If block was in core, let bread get it.
136 * If block wasn't in core, then the read was started
137 * above, and just wait for it.
3efdd860 138 */
84baaab3 139 if (bp == NULL)
ec67a3ce
MK
140#ifdef SECSIZE
141 return (bread(dev, blkno, size, secsize));
142#else SECSIZE
a937f856 143 return (bread(vp, blkno, size, cred, bpp));
7188ac27 144 return (biowait(bp));
663dbc72
BJ
145}
146
147/*
148 * Write the buffer, waiting for completion.
149 * Then release the buffer.
150 */
151bwrite(bp)
3efdd860 152 register struct buf *bp;
663dbc72 153{
7188ac27
KM
154 register int flag;
155 int error;
663dbc72
BJ
156
157 flag = bp->b_flags;
f844ee62 158 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
663dbc72 159 if ((flag&B_DELWRI) == 0)
fb99a9a1 160 u.u_ru.ru_oublock++; /* noone paid yet */
7188ac27
KM
161 trace(TR_BWRITE,
162 pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_blkno);
4f083fd7
SL
163 if (bp->b_bcount > bp->b_bufsize)
164 panic("bwrite");
7188ac27 165 VOP_STRATEGY(bp);
3efdd860
KM
166
167 /*
168 * If the write was synchronous, then await i/o completion.
169 * If the write was "delayed", then we put the buffer on
170 * the q of blocks awaiting i/o completion status.
3efdd860 171 */
663dbc72 172 if ((flag&B_ASYNC) == 0) {
7188ac27 173 error = biowait(bp);
663dbc72 174 brelse(bp);
7188ac27 175 } else if (flag & B_DELWRI) {
663dbc72 176 bp->b_flags |= B_AGE;
7188ac27
KM
177 error = 0;
178 }
179 return (error);
663dbc72
BJ
180}
181
182/*
183 * Release the buffer, marking it so that if it is grabbed
184 * for another purpose it will be written out before being
185 * given up (e.g. when writing a partial block where it is
186 * assumed that another write for the same block will soon follow).
187 * This can't be done for magtape, since writes must be done
188 * in the same order as requested.
189 */
190bdwrite(bp)
3efdd860 191 register struct buf *bp;
663dbc72 192{
663dbc72
BJ
193
194 if ((bp->b_flags&B_DELWRI) == 0)
fb99a9a1 195 u.u_ru.ru_oublock++; /* noone paid yet */
7188ac27
KM
196#ifdef notdef
197 /*
198 * This does not work for buffers associated with
199 * vnodes that are remote - they have no dev.
200 * Besides, we don't use bio with tapes, so rather
201 * than develop a fix, we just ifdef this out for now.
202 */
ec67a3ce 203 if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
663dbc72
BJ
204 bawrite(bp);
205 else {
206 bp->b_flags |= B_DELWRI | B_DONE;
207 brelse(bp);
208 }
7188ac27
KM
209#endif
210 bp->b_flags |= B_DELWRI | B_DONE;
211 brelse(bp);
663dbc72
BJ
212}
213
214/*
215 * Release the buffer, start I/O on it, but don't wait for completion.
216 */
217bawrite(bp)
3efdd860 218 register struct buf *bp;
663dbc72
BJ
219{
220
221 bp->b_flags |= B_ASYNC;
7188ac27 222 (void) bwrite(bp);
663dbc72
BJ
223}
224
225/*
3efdd860 226 * Release the buffer, with no I/O implied.
663dbc72
BJ
227 */
228brelse(bp)
3efdd860 229 register struct buf *bp;
663dbc72 230{
46387ee3 231 register struct buf *flist;
663dbc72
BJ
232 register s;
233
7188ac27
KM
234 trace(TR_BRELSE,
235 pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno);
3efdd860
KM
236 /*
237 * If someone's waiting for the buffer, or
238 * is waiting for a buffer wake 'em up.
239 */
663dbc72
BJ
240 if (bp->b_flags&B_WANTED)
241 wakeup((caddr_t)bp);
46387ee3
BJ
242 if (bfreelist[0].b_flags&B_WANTED) {
243 bfreelist[0].b_flags &= ~B_WANTED;
244 wakeup((caddr_t)bfreelist);
663dbc72 245 }
7188ac27
KM
246 if (bp->b_flags & B_NOCACHE) {
247 bp->b_flags |= B_INVAL;
248 }
60a71525
BJ
249 if (bp->b_flags&B_ERROR)
250 if (bp->b_flags & B_LOCKED)
251 bp->b_flags &= ~B_ERROR; /* try again later */
252 else
7188ac27 253 brelvp(bp); /* no assoc */
3efdd860
KM
254
255 /*
256 * Stick the buffer back on a free list.
257 */
a5e62f37 258 s = splbio();
4f083fd7
SL
259 if (bp->b_bufsize <= 0) {
260 /* block has no buffer ... put at front of unused buffer list */
261 flist = &bfreelist[BQ_EMPTY];
262 binsheadfree(bp, flist);
263 } else if (bp->b_flags & (B_ERROR|B_INVAL)) {
46387ee3 264 /* block has no info ... put at front of most free list */
4f083fd7 265 flist = &bfreelist[BQ_AGE];
3efdd860 266 binsheadfree(bp, flist);
663dbc72 267 } else {
46387ee3
BJ
268 if (bp->b_flags & B_LOCKED)
269 flist = &bfreelist[BQ_LOCKED];
270 else if (bp->b_flags & B_AGE)
271 flist = &bfreelist[BQ_AGE];
272 else
273 flist = &bfreelist[BQ_LRU];
3efdd860 274 binstailfree(bp, flist);
663dbc72 275 }
7188ac27 276 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE);
663dbc72
BJ
277 splx(s);
278}
279
280/*
281 * See if the block is associated with some buffer
282 * (mainly to avoid getting hung up on a wait in breada)
283 */
7188ac27
KM
284incore(vp, blkno)
285 struct vnode *vp;
3efdd860 286 daddr_t blkno;
663dbc72
BJ
287{
288 register struct buf *bp;
46387ee3 289 register struct buf *dp;
663dbc72 290
243d4743 291 dp = BUFHASH(vp, blkno);
46387ee3 292 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
7188ac27 293 if (bp->b_blkno == blkno && bp->b_vp == vp &&
3efdd860 294 (bp->b_flags & B_INVAL) == 0)
5603d07d 295 return (1);
5603d07d 296 return (0);
663dbc72
BJ
297}
298
a937f856 299baddr(vp, blkno, size, cred, bpp)
7188ac27 300 struct vnode *vp;
ad30fb67
KM
301 daddr_t blkno;
302 int size;
a937f856 303 struct ucred *cred;
7188ac27 304 struct buf **bpp;
ec67a3ce
MK
305#ifdef SECSIZE
306 long secsize;
307#endif SECSIZE
663dbc72
BJ
308{
309
7188ac27 310 if (incore(vp, blkno))
a937f856 311 return (bread(vp, blkno, size, cred, bpp));
7188ac27 312 *bpp = 0;
ec67a3ce 313#endif SECSIZE
663dbc72
BJ
314 return (0);
315}
316
317/*
318 * Assign a buffer for the given block. If the appropriate
319 * block is already associated, return it; otherwise search
320 * for the oldest non-busy buffer and reassign it.
23900030 321 *
32a56bda
KM
322 * If we find the buffer, but it is dirty (marked DELWRI) and
323 * its size is changing, we must write it out first. When the
324 * buffer is shrinking, the write is done by brealloc to avoid
325 * losing the unwritten data. When the buffer is growing, the
326 * write is done by getblk, so that bread will not read stale
327 * disk data over the modified data in the buffer.
328 *
23900030
BJ
329 * We use splx here because this routine may be called
330 * on the interrupt stack during a dump, and we don't
331 * want to lower the ipl back to 0.
663dbc72
BJ
332 */
333struct buf *
ec67a3ce
MK
334#ifdef SECSIZE
335getblk(dev, blkno, size, secsize)
336#else SECSIZE
7188ac27
KM
337getblk(vp, blkno, size)
338 register struct vnode *vp;
ad30fb67
KM
339 daddr_t blkno;
340 int size;
ec67a3ce
MK
341#ifdef SECSIZE
342 long secsize;
343#endif SECSIZE
663dbc72 344{
4f083fd7 345 register struct buf *bp, *dp;
23900030 346 int s;
663dbc72 347
00a6a148
KM
348 if (size > MAXBSIZE)
349 panic("getblk: size too big");
751af33e
KM
350 /*
351 * To prevent overflow of 32-bit ints when converting block
352 * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
353 * to the maximum number that can be converted to a byte offset
354 * without overflow. This is historic code; what bug it fixed,
355 * or whether it is still a reasonable thing to do is open to
356 * dispute. mkm 9/85
357 */
358 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT))
359 blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1);
3efdd860
KM
360 /*
361 * Search the cache for the block. If we hit, but
362 * the buffer is in use for i/o, then we wait until
363 * the i/o has completed.
364 */
7188ac27 365 dp = BUFHASH(vp, blkno);
3efdd860 366loop:
46387ee3 367 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
7188ac27 368 if (bp->b_blkno != blkno || bp->b_vp != vp ||
46387ee3 369 bp->b_flags&B_INVAL)
663dbc72 370 continue;
a5e62f37 371 s = splbio();
663dbc72
BJ
372 if (bp->b_flags&B_BUSY) {
373 bp->b_flags |= B_WANTED;
374 sleep((caddr_t)bp, PRIBIO+1);
23900030 375 splx(s);
663dbc72
BJ
376 goto loop;
377 }
23900030 378 splx(s);
663dbc72 379 notavail(bp);
32a56bda
KM
380 if (bp->b_bcount != size) {
381 if (bp->b_bcount < size && (bp->b_flags&B_DELWRI)) {
382 bp->b_flags &= ~B_ASYNC;
7188ac27 383 (void) bwrite(bp);
32a56bda
KM
384 goto loop;
385 }
386 if (brealloc(bp, size) == 0)
387 goto loop;
388 }
b646a125 389 if (bp->b_bcount != size && brealloc(bp, size) == 0)
9d6d37ce 390 goto loop;
663dbc72 391 bp->b_flags |= B_CACHE;
a5e62f37 392 return (bp);
663dbc72 393 }
4f083fd7 394 bp = getnewbuf();
ad30fb67 395 bfree(bp);
3efdd860 396 bremhash(bp);
7188ac27
KM
397 if (bp->b_vp)
398 brelvp(bp);
8fe1c702 399 VREF(vp);
7188ac27
KM
400 bp->b_vp = vp;
401 bp->b_dev = vp->v_rdev;
ec67a3ce
MK
402#ifdef SECSIZE
403 bp->b_blksize = secsize;
404#endif SECSIZE
ad30fb67 405 bp->b_blkno = blkno;
4f083fd7 406 bp->b_error = 0;
7188ac27
KM
407 bp->b_resid = 0;
408 binshash(bp, dp);
9d6d37ce
BJ
409 if (brealloc(bp, size) == 0)
410 goto loop;
a5e62f37 411 return (bp);
663dbc72
BJ
412}
413
414/*
415 * get an empty block,
416 * not assigned to any particular device
417 */
418struct buf *
ad30fb67
KM
419geteblk(size)
420 int size;
663dbc72 421{
4f083fd7 422 register struct buf *bp, *flist;
663dbc72 423
00a6a148
KM
424 if (size > MAXBSIZE)
425 panic("geteblk: size too big");
663dbc72 426loop:
4f083fd7
SL
427 bp = getnewbuf();
428 bp->b_flags |= B_INVAL;
3efdd860
KM
429 bfree(bp);
430 bremhash(bp);
4f083fd7 431 flist = &bfreelist[BQ_AGE];
7188ac27 432 brelvp(bp);
ec67a3ce
MK
433#ifdef SECSIZE
434 bp->b_blksize = DEV_BSIZE;
435#endif SECSIZE
4f083fd7 436 bp->b_error = 0;
7188ac27
KM
437 bp->b_resid = 0;
438 binshash(bp, flist);
9d6d37ce
BJ
439 if (brealloc(bp, size) == 0)
440 goto loop;
a5e62f37 441 return (bp);
663dbc72
BJ
442}
443
ad30fb67
KM
444/*
445 * Allocate space associated with a buffer.
961945a8 446 * If can't get space, buffer is released
ad30fb67
KM
447 */
448brealloc(bp, size)
449 register struct buf *bp;
450 int size;
451{
452 daddr_t start, last;
453 register struct buf *ep;
454 struct buf *dp;
455 int s;
456
457 /*
ec67a3ce 458 * First need to make sure that all overlapping previous I/O
ad30fb67
KM
459 * is dispatched with.
460 */
461 if (size == bp->b_bcount)
9d6d37ce
BJ
462 return (1);
463 if (size < bp->b_bcount) {
464 if (bp->b_flags & B_DELWRI) {
7188ac27 465 (void) bwrite(bp);
9d6d37ce
BJ
466 return (0);
467 }
468 if (bp->b_flags & B_LOCKED)
469 panic("brealloc");
961945a8 470 return (allocbuf(bp, size));
ad30fb67 471 }
9d6d37ce 472 bp->b_flags &= ~B_DONE;
7188ac27 473 if (bp->b_vp == (struct vnode *)0)
961945a8 474 return (allocbuf(bp, size));
9d6d37ce 475
7188ac27
KM
476 trace(TR_BREALLOC,
477 pack(bp->b_vp->v_mount->m_fsid[0], size), bp->b_blkno);
9d6d37ce
BJ
478 /*
479 * Search cache for any buffers that overlap the one that we
480 * are trying to allocate. Overlapping buffers must be marked
481 * invalid, after being written out if they are dirty. (indicated
482 * by B_DELWRI) A disk block must be mapped by at most one buffer
483 * at any point in time. Care must be taken to avoid deadlocking
484 * when two buffer are trying to get the same set of disk blocks.
485 */
486 start = bp->b_blkno;
ec67a3ce
MK
487#ifdef SECSIZE
488 last = start + size/bp->b_blksize - 1;
489#else SECSIZE
ad891b02 490 last = start + btodb(size) - 1;
ec67a3ce 491#endif SECSIZE
7188ac27 492 dp = BUFHASH(bp->b_vp, bp->b_blkno);
ad30fb67 493loop:
ad30fb67 494 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
7188ac27
KM
495 if (ep == bp || ep->b_vp != bp->b_vp ||
496 (ep->b_flags & B_INVAL))
9d6d37ce
BJ
497 continue;
498 /* look for overlap */
499 if (ep->b_bcount == 0 || ep->b_blkno > last ||
ec67a3ce
MK
500#ifdef SECSIZE
501 ep->b_blkno + ep->b_bcount/ep->b_blksize <= start)
502#else SECSIZE
ad891b02 503 ep->b_blkno + btodb(ep->b_bcount) <= start)
ec67a3ce 504#endif SECSIZE
ad30fb67 505 continue;
a5e62f37 506 s = splbio();
ad30fb67
KM
507 if (ep->b_flags&B_BUSY) {
508 ep->b_flags |= B_WANTED;
509 sleep((caddr_t)ep, PRIBIO+1);
4f083fd7 510 splx(s);
ad30fb67
KM
511 goto loop;
512 }
4f083fd7 513 splx(s);
9d6d37ce 514 notavail(ep);
ad30fb67 515 if (ep->b_flags & B_DELWRI) {
7188ac27 516 (void) bwrite(ep);
ad30fb67
KM
517 goto loop;
518 }
9d6d37ce
BJ
519 ep->b_flags |= B_INVAL;
520 brelse(ep);
ad30fb67 521 }
961945a8 522 return (allocbuf(bp, size));
4f083fd7
SL
523}
524
4f083fd7
SL
525/*
526 * Find a buffer which is available for use.
527 * Select something from a free list.
528 * Preference is to AGE list, then LRU list.
529 */
530struct buf *
531getnewbuf()
532{
533 register struct buf *bp, *dp;
a937f856 534 register struct ucred *cred;
4f083fd7
SL
535 int s;
536
537loop:
a5e62f37 538 s = splbio();
4f083fd7
SL
539 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
540 if (dp->av_forw != dp)
541 break;
542 if (dp == bfreelist) { /* no free blocks */
543 dp->b_flags |= B_WANTED;
544 sleep((caddr_t)dp, PRIBIO+1);
4b7d506c 545 splx(s);
4f083fd7
SL
546 goto loop;
547 }
548 splx(s);
549 bp = dp->av_forw;
550 notavail(bp);
551 if (bp->b_flags & B_DELWRI) {
033a786e 552 (void) bawrite(bp);
4f083fd7
SL
553 goto loop;
554 }
7188ac27
KM
555 trace(TR_BRELSE,
556 pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno);
557 brelvp(bp);
a937f856
KM
558 if (bp->b_rcred != NOCRED) {
559 cred = bp->b_rcred;
560 bp->b_rcred = NOCRED;
561 crfree(cred);
562 }
563 if (bp->b_wcred != NOCRED) {
564 cred = bp->b_wcred;
565 bp->b_wcred = NOCRED;
566 crfree(cred);
567 }
4f083fd7
SL
568 bp->b_flags = B_BUSY;
569 return (bp);
570}
571
663dbc72
BJ
572/*
573 * Wait for I/O completion on the buffer; return errors
574 * to the user.
575 */
3efdd860 576biowait(bp)
ad30fb67 577 register struct buf *bp;
663dbc72 578{
530d0032 579 int s;
663dbc72 580
a5e62f37 581 s = splbio();
a937f856 582 while ((bp->b_flags & B_DONE) == 0)
663dbc72 583 sleep((caddr_t)bp, PRIBIO);
530d0032 584 splx(s);
7188ac27
KM
585 /*
586 * Pick up the device's error number and pass it to the user;
587 * if there is an error but the number is 0 set a generalized code.
588 */
589 if ((bp->b_flags & B_ERROR) == 0)
590 return (0);
591 if (bp->b_error)
592 return (bp->b_error);
593 return (EIO);
663dbc72
BJ
594}
595
663dbc72 596/*
af04ce66
SL
597 * Mark I/O complete on a buffer.
598 * If someone should be called, e.g. the pageout
599 * daemon, do so. Otherwise, wake up anyone
600 * waiting for it.
663dbc72 601 */
3efdd860
KM
602biodone(bp)
603 register struct buf *bp;
663dbc72 604{
663dbc72 605
80e7c811 606 if (bp->b_flags & B_DONE)
3efdd860 607 panic("dup biodone");
663dbc72 608 bp->b_flags |= B_DONE;
a937f856
KM
609 if ((bp->b_flags & B_READ) == 0)
610 bp->b_dirtyoff = bp->b_dirtyend = 0;
961945a8
SL
611 if (bp->b_flags & B_CALL) {
612 bp->b_flags &= ~B_CALL;
613 (*bp->b_iodone)(bp);
614 return;
615 }
663dbc72
BJ
616 if (bp->b_flags&B_ASYNC)
617 brelse(bp);
618 else {
619 bp->b_flags &= ~B_WANTED;
620 wakeup((caddr_t)bp);
621 }
622}
623
4f083fd7 624/*
7188ac27 625 * Ensure that no part of a specified block is in an incore buffer.
609e7cfa
MK
626#ifdef SECSIZE
627 * "size" is given in device blocks (the units of b_blkno).
628#endif SECSIZE
ec67a3ce
MK
629#ifdef SECSIZE
630 * "size" is given in device blocks (the units of b_blkno).
631#endif SECSIZE
4f083fd7 632 */
7188ac27
KM
633blkflush(vp, blkno, size)
634 struct vnode *vp;
4f083fd7 635 daddr_t blkno;
ec67a3ce
MK
636#ifdef SECSIZE
637 int size;
638#else SECSIZE
4f083fd7 639 long size;
ec67a3ce 640#endif SECSIZE
4f083fd7
SL
641{
642 register struct buf *ep;
643 struct buf *dp;
0e980590 644 daddr_t curblk, nextblk, ecurblk, lastblk;
7188ac27 645 int s, error, allerrors = 0;
4f083fd7 646
0e980590
KM
647 /*
648 * Iterate through each possible hash chain.
649 */
650 lastblk = blkno + btodb(size) - 1;
651 for (curblk = blkno; curblk <= lastblk; curblk = nextblk) {
652#if RND & (RND-1)
653 nextblk = ((curblk / RND) + 1) * RND;
654#else
655 nextblk = ((curblk & ~(RND-1)) + RND);
656#endif
657 ecurblk = nextblk > lastblk ? lastblk : nextblk - 1;
658 dp = BUFHASH(vp, curblk);
4f083fd7 659loop:
0e980590
KM
660 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
661 if (ep->b_vp != vp || (ep->b_flags & B_INVAL))
662 continue;
663 /* look for overlap */
664 if (ep->b_bcount == 0 || ep->b_blkno > ecurblk ||
665 ep->b_blkno + btodb(ep->b_bcount) <= curblk)
666 continue;
667 s = splbio();
668 if (ep->b_flags&B_BUSY) {
669 ep->b_flags |= B_WANTED;
670 sleep((caddr_t)ep, PRIBIO+1);
671 splx(s);
672 goto loop;
673 }
674 if (ep->b_flags & B_DELWRI) {
675 splx(s);
676 notavail(ep);
677 if (error = bwrite(ep))
678 allerrors = error;
679 goto loop;
680 }
4f083fd7 681 splx(s);
4f083fd7 682 }
4f083fd7 683 }
7188ac27 684 return (allerrors);
4f083fd7
SL
685}
686
663dbc72 687/*
7188ac27 688 * Make sure all write-behind blocks associated
a937f856 689 * with mount point are flushed out (from sync).
663dbc72 690 */
a937f856
KM
691bflush(mountp)
692 struct mount *mountp;
663dbc72
BJ
693{
694 register struct buf *bp;
46387ee3 695 register struct buf *flist;
530d0032 696 int s;
663dbc72
BJ
697
698loop:
a5e62f37 699 s = splbio();
a937f856
KM
700 for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) {
701 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
702 if ((bp->b_flags & B_BUSY))
703 continue;
704 if ((bp->b_flags & B_DELWRI) == 0)
705 continue;
706 if (bp->b_vp && bp->b_vp->v_mount == mountp) {
5a3e32e2 707 splx(s);
7188ac27 708 notavail(bp);
033a786e 709 (void) bawrite(bp);
a937f856 710 goto loop;
7188ac27 711 }
7188ac27
KM
712 }
713 }
a937f856 714 splx(s);
663dbc72 715}
7b8b5a01
RE
716
717/*
718 * Invalidate in core blocks belonging to closed or umounted filesystem
719 *
033a786e 720 * We walk through the buffer pool and invalidate any buffers for the
a937f856 721 * indicated mount point. Normally this routine is preceeded by a bflush
033a786e
KM
722 * call, so that on a quiescent filesystem there will be no dirty
723 * buffers when we are done. We return the count of dirty buffers when
724 * we are finished.
7b8b5a01 725 */
a937f856
KM
726binval(mountp)
727 struct mount *mountp;
7b8b5a01 728{
634ebdbe
RE
729 register struct buf *bp;
730 register struct bufhd *hp;
1a24c701 731 int s, dirty = 0;
634ebdbe 732#define dp ((struct buf *)hp)
7b8b5a01 733
a937f856 734loop:
033a786e
KM
735 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) {
736 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
a937f856 737 if (bp->b_vp == NULL || bp->b_vp->v_mount != mountp)
033a786e 738 continue;
5a3e32e2 739 s = splbio();
1a24c701
KM
740 if (bp->b_flags & B_BUSY) {
741 bp->b_flags |= B_WANTED;
742 sleep((caddr_t)bp, PRIBIO+1);
743 splx(s);
744 goto loop;
745 }
5a3e32e2 746 splx(s);
033a786e
KM
747 notavail(bp);
748 if (bp->b_flags & B_DELWRI) {
749 (void) bawrite(bp);
750 dirty++;
751 continue;
7188ac27 752 }
033a786e
KM
753 bp->b_flags |= B_INVAL;
754 brelvp(bp);
755 brelse(bp);
756 }
757 }
758 return (dirty);
7188ac27
KM
759}
760
761brelvp(bp)
762 struct buf *bp;
763{
764 struct vnode *vp;
765
766 if (bp->b_vp == (struct vnode *) 0)
767 return;
768 vp = bp->b_vp;
769 bp->b_vp = (struct vnode *) 0;
770 vrele(vp);
7b8b5a01 771}