Commit | Line | Data |
---|---|---|
da7c5cc6 | 1 | /* |
7188ac27 KM |
2 | * Copyright (c) 1982, 1986, 1989 Regents of the University of California. |
3 | * All rights reserved. | |
da7c5cc6 | 4 | * |
7188ac27 KM |
5 | * Redistribution and use in source and binary forms are permitted |
6 | * provided that the above copyright notice and this paragraph are | |
7 | * duplicated in all such forms and that any documentation, | |
8 | * advertising materials, and other materials related to such | |
9 | * distribution and use acknowledge that the software was developed | |
10 | * by the University of California, Berkeley. The name of the | |
11 | * University may not be used to endorse or promote products derived | |
12 | * from this software without specific prior written permission. | |
13 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | |
14 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | |
15 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
16 | * | |
0e980590 | 17 | * @(#)vfs_bio.c 7.13 (Berkeley) %G% |
da7c5cc6 | 18 | */ |
961945a8 | 19 | |
94368568 | 20 | #include "param.h" |
94368568 JB |
21 | #include "user.h" |
22 | #include "buf.h" | |
7188ac27 | 23 | #include "vnode.h" |
94368568 | 24 | #include "trace.h" |
a937f856 | 25 | #include "ucred.h" |
663dbc72 | 26 | |
663dbc72 BJ |
27 | /* |
28 | * Read in (if necessary) the block and return a buffer pointer. | |
29 | */ | |
a937f856 | 30 | bread(vp, blkno, size, cred, bpp) |
7188ac27 | 31 | struct vnode *vp; |
ad30fb67 KM |
32 | daddr_t blkno; |
33 | int size; | |
a937f856 | 34 | struct ucred *cred; |
7188ac27 | 35 | struct buf **bpp; |
ec67a3ce MK |
36 | #ifdef SECSIZE |
37 | long secsize; | |
38 | #endif SECSIZE | |
663dbc72 BJ |
39 | { |
40 | register struct buf *bp; | |
41 | ||
4f083fd7 SL |
42 | if (size == 0) |
43 | panic("bread: size 0"); | |
ec67a3ce MK |
44 | #ifdef SECSIZE |
45 | bp = getblk(dev, blkno, size, secsize); | |
46 | #else SECSIZE | |
7188ac27 | 47 | *bpp = bp = getblk(vp, blkno, size); |
ec67a3ce | 48 | #endif SECSIZE |
32a56bda | 49 | if (bp->b_flags&(B_DONE|B_DELWRI)) { |
7188ac27 KM |
50 | trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), blkno); |
51 | return (0); | |
663dbc72 BJ |
52 | } |
53 | bp->b_flags |= B_READ; | |
4f083fd7 SL |
54 | if (bp->b_bcount > bp->b_bufsize) |
55 | panic("bread"); | |
a937f856 KM |
56 | if (bp->b_rcred == NOCRED && cred != NOCRED) { |
57 | crhold(cred); | |
58 | bp->b_rcred = cred; | |
59 | } | |
7188ac27 KM |
60 | VOP_STRATEGY(bp); |
61 | trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), blkno); | |
fb99a9a1 | 62 | u.u_ru.ru_inblock++; /* pay for read */ |
7188ac27 | 63 | return (biowait(bp)); |
663dbc72 BJ |
64 | } |
65 | ||
66 | /* | |
67 | * Read in the block, like bread, but also start I/O on the | |
68 | * read-ahead block (which is not allocated to the caller) | |
69 | */ | |
a937f856 | 70 | breada(vp, blkno, size, rablkno, rabsize, cred, bpp) |
7188ac27 | 71 | struct vnode *vp; |
84baaab3 | 72 | daddr_t blkno; int size; |
ec67a3ce MK |
73 | #ifdef SECSIZE |
74 | long secsize; | |
75 | #endif SECSIZE | |
a8d3bf7f | 76 | daddr_t rablkno; int rabsize; |
a937f856 | 77 | struct ucred *cred; |
7188ac27 | 78 | struct buf **bpp; |
663dbc72 BJ |
79 | { |
80 | register struct buf *bp, *rabp; | |
81 | ||
82 | bp = NULL; | |
3efdd860 KM |
83 | /* |
84 | * If the block isn't in core, then allocate | |
85 | * a buffer and initiate i/o (getblk checks | |
86 | * for a cache hit). | |
87 | */ | |
7188ac27 KM |
88 | if (!incore(vp, blkno)) { |
89 | *bpp = bp = getblk(vp, blkno, size); | |
ec67a3ce | 90 | #endif SECSIZE |
32a56bda | 91 | if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) { |
663dbc72 | 92 | bp->b_flags |= B_READ; |
4f083fd7 SL |
93 | if (bp->b_bcount > bp->b_bufsize) |
94 | panic("breada"); | |
a937f856 KM |
95 | if (bp->b_rcred == NOCRED && cred != NOCRED) { |
96 | crhold(cred); | |
97 | bp->b_rcred = cred; | |
98 | } | |
7188ac27 KM |
99 | VOP_STRATEGY(bp); |
100 | trace(TR_BREADMISS, pack(vp->v_mount->m_fsid[0], size), | |
101 | blkno); | |
fb99a9a1 | 102 | u.u_ru.ru_inblock++; /* pay for read */ |
3efdd860 | 103 | } else |
7188ac27 KM |
104 | trace(TR_BREADHIT, pack(vp->v_mount->m_fsid[0], size), |
105 | blkno); | |
663dbc72 | 106 | } |
3efdd860 KM |
107 | |
108 | /* | |
109 | * If there's a read-ahead block, start i/o | |
110 | * on it also (as above). | |
111 | */ | |
7188ac27 KM |
112 | if (rablkno && !incore(vp, rablkno)) { |
113 | rabp = getblk(vp, rablkno, rabsize); | |
ec67a3ce | 114 | #endif SECSIZE |
32a56bda | 115 | if (rabp->b_flags & (B_DONE|B_DELWRI)) { |
663dbc72 | 116 | brelse(rabp); |
7188ac27 | 117 | trace(TR_BREADHITRA, |
5062ac4a | 118 | pack(vp->v_mount->m_fsid[0], rabsize), rablkno); |
973ecc4f | 119 | } else { |
663dbc72 | 120 | rabp->b_flags |= B_READ|B_ASYNC; |
4f083fd7 SL |
121 | if (rabp->b_bcount > rabp->b_bufsize) |
122 | panic("breadrabp"); | |
5062ac4a | 123 | if (rabp->b_rcred == NOCRED && cred != NOCRED) { |
a937f856 | 124 | crhold(cred); |
5062ac4a | 125 | rabp->b_rcred = cred; |
a937f856 | 126 | } |
7188ac27 KM |
127 | VOP_STRATEGY(rabp); |
128 | trace(TR_BREADMISSRA, | |
5062ac4a | 129 | pack(vp->v_mount->m_fsid[0], rabsize), rablkno); |
fb99a9a1 | 130 | u.u_ru.ru_inblock++; /* pay in advance */ |
663dbc72 BJ |
131 | } |
132 | } | |
3efdd860 KM |
133 | |
134 | /* | |
84baaab3 KM |
135 | * If block was in core, let bread get it. |
136 | * If block wasn't in core, then the read was started | |
137 | * above, and just wait for it. | |
3efdd860 | 138 | */ |
84baaab3 | 139 | if (bp == NULL) |
ec67a3ce MK |
140 | #ifdef SECSIZE |
141 | return (bread(dev, blkno, size, secsize)); | |
142 | #else SECSIZE | |
a937f856 | 143 | return (bread(vp, blkno, size, cred, bpp)); |
7188ac27 | 144 | return (biowait(bp)); |
663dbc72 BJ |
145 | } |
146 | ||
147 | /* | |
148 | * Write the buffer, waiting for completion. | |
149 | * Then release the buffer. | |
150 | */ | |
151 | bwrite(bp) | |
3efdd860 | 152 | register struct buf *bp; |
663dbc72 | 153 | { |
7188ac27 KM |
154 | register int flag; |
155 | int error; | |
663dbc72 BJ |
156 | |
157 | flag = bp->b_flags; | |
f844ee62 | 158 | bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); |
663dbc72 | 159 | if ((flag&B_DELWRI) == 0) |
fb99a9a1 | 160 | u.u_ru.ru_oublock++; /* noone paid yet */ |
7188ac27 KM |
161 | trace(TR_BWRITE, |
162 | pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bcount), bp->b_blkno); | |
4f083fd7 SL |
163 | if (bp->b_bcount > bp->b_bufsize) |
164 | panic("bwrite"); | |
7188ac27 | 165 | VOP_STRATEGY(bp); |
3efdd860 KM |
166 | |
167 | /* | |
168 | * If the write was synchronous, then await i/o completion. | |
169 | * If the write was "delayed", then we put the buffer on | |
170 | * the q of blocks awaiting i/o completion status. | |
3efdd860 | 171 | */ |
663dbc72 | 172 | if ((flag&B_ASYNC) == 0) { |
7188ac27 | 173 | error = biowait(bp); |
663dbc72 | 174 | brelse(bp); |
7188ac27 | 175 | } else if (flag & B_DELWRI) { |
663dbc72 | 176 | bp->b_flags |= B_AGE; |
7188ac27 KM |
177 | error = 0; |
178 | } | |
179 | return (error); | |
663dbc72 BJ |
180 | } |
181 | ||
182 | /* | |
183 | * Release the buffer, marking it so that if it is grabbed | |
184 | * for another purpose it will be written out before being | |
185 | * given up (e.g. when writing a partial block where it is | |
186 | * assumed that another write for the same block will soon follow). | |
187 | * This can't be done for magtape, since writes must be done | |
188 | * in the same order as requested. | |
189 | */ | |
190 | bdwrite(bp) | |
3efdd860 | 191 | register struct buf *bp; |
663dbc72 | 192 | { |
663dbc72 BJ |
193 | |
194 | if ((bp->b_flags&B_DELWRI) == 0) | |
fb99a9a1 | 195 | u.u_ru.ru_oublock++; /* noone paid yet */ |
7188ac27 KM |
196 | #ifdef notdef |
197 | /* | |
198 | * This does not work for buffers associated with | |
199 | * vnodes that are remote - they have no dev. | |
200 | * Besides, we don't use bio with tapes, so rather | |
201 | * than develop a fix, we just ifdef this out for now. | |
202 | */ | |
ec67a3ce | 203 | if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE) |
663dbc72 BJ |
204 | bawrite(bp); |
205 | else { | |
206 | bp->b_flags |= B_DELWRI | B_DONE; | |
207 | brelse(bp); | |
208 | } | |
7188ac27 KM |
209 | #endif |
210 | bp->b_flags |= B_DELWRI | B_DONE; | |
211 | brelse(bp); | |
663dbc72 BJ |
212 | } |
213 | ||
214 | /* | |
215 | * Release the buffer, start I/O on it, but don't wait for completion. | |
216 | */ | |
217 | bawrite(bp) | |
3efdd860 | 218 | register struct buf *bp; |
663dbc72 BJ |
219 | { |
220 | ||
221 | bp->b_flags |= B_ASYNC; | |
7188ac27 | 222 | (void) bwrite(bp); |
663dbc72 BJ |
223 | } |
224 | ||
225 | /* | |
3efdd860 | 226 | * Release the buffer, with no I/O implied. |
663dbc72 BJ |
227 | */ |
228 | brelse(bp) | |
3efdd860 | 229 | register struct buf *bp; |
663dbc72 | 230 | { |
46387ee3 | 231 | register struct buf *flist; |
663dbc72 BJ |
232 | register s; |
233 | ||
7188ac27 KM |
234 | trace(TR_BRELSE, |
235 | pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno); | |
3efdd860 KM |
236 | /* |
237 | * If someone's waiting for the buffer, or | |
238 | * is waiting for a buffer wake 'em up. | |
239 | */ | |
663dbc72 BJ |
240 | if (bp->b_flags&B_WANTED) |
241 | wakeup((caddr_t)bp); | |
46387ee3 BJ |
242 | if (bfreelist[0].b_flags&B_WANTED) { |
243 | bfreelist[0].b_flags &= ~B_WANTED; | |
244 | wakeup((caddr_t)bfreelist); | |
663dbc72 | 245 | } |
7188ac27 KM |
246 | if (bp->b_flags & B_NOCACHE) { |
247 | bp->b_flags |= B_INVAL; | |
248 | } | |
60a71525 BJ |
249 | if (bp->b_flags&B_ERROR) |
250 | if (bp->b_flags & B_LOCKED) | |
251 | bp->b_flags &= ~B_ERROR; /* try again later */ | |
252 | else | |
7188ac27 | 253 | brelvp(bp); /* no assoc */ |
3efdd860 KM |
254 | |
255 | /* | |
256 | * Stick the buffer back on a free list. | |
257 | */ | |
a5e62f37 | 258 | s = splbio(); |
4f083fd7 SL |
259 | if (bp->b_bufsize <= 0) { |
260 | /* block has no buffer ... put at front of unused buffer list */ | |
261 | flist = &bfreelist[BQ_EMPTY]; | |
262 | binsheadfree(bp, flist); | |
263 | } else if (bp->b_flags & (B_ERROR|B_INVAL)) { | |
46387ee3 | 264 | /* block has no info ... put at front of most free list */ |
4f083fd7 | 265 | flist = &bfreelist[BQ_AGE]; |
3efdd860 | 266 | binsheadfree(bp, flist); |
663dbc72 | 267 | } else { |
46387ee3 BJ |
268 | if (bp->b_flags & B_LOCKED) |
269 | flist = &bfreelist[BQ_LOCKED]; | |
270 | else if (bp->b_flags & B_AGE) | |
271 | flist = &bfreelist[BQ_AGE]; | |
272 | else | |
273 | flist = &bfreelist[BQ_LRU]; | |
3efdd860 | 274 | binstailfree(bp, flist); |
663dbc72 | 275 | } |
7188ac27 | 276 | bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE); |
663dbc72 BJ |
277 | splx(s); |
278 | } | |
279 | ||
280 | /* | |
281 | * See if the block is associated with some buffer | |
282 | * (mainly to avoid getting hung up on a wait in breada) | |
283 | */ | |
7188ac27 KM |
284 | incore(vp, blkno) |
285 | struct vnode *vp; | |
3efdd860 | 286 | daddr_t blkno; |
663dbc72 BJ |
287 | { |
288 | register struct buf *bp; | |
46387ee3 | 289 | register struct buf *dp; |
663dbc72 | 290 | |
243d4743 | 291 | dp = BUFHASH(vp, blkno); |
46387ee3 | 292 | for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) |
7188ac27 | 293 | if (bp->b_blkno == blkno && bp->b_vp == vp && |
3efdd860 | 294 | (bp->b_flags & B_INVAL) == 0) |
5603d07d | 295 | return (1); |
5603d07d | 296 | return (0); |
663dbc72 BJ |
297 | } |
298 | ||
a937f856 | 299 | baddr(vp, blkno, size, cred, bpp) |
7188ac27 | 300 | struct vnode *vp; |
ad30fb67 KM |
301 | daddr_t blkno; |
302 | int size; | |
a937f856 | 303 | struct ucred *cred; |
7188ac27 | 304 | struct buf **bpp; |
ec67a3ce MK |
305 | #ifdef SECSIZE |
306 | long secsize; | |
307 | #endif SECSIZE | |
663dbc72 BJ |
308 | { |
309 | ||
7188ac27 | 310 | if (incore(vp, blkno)) |
a937f856 | 311 | return (bread(vp, blkno, size, cred, bpp)); |
7188ac27 | 312 | *bpp = 0; |
ec67a3ce | 313 | #endif SECSIZE |
663dbc72 BJ |
314 | return (0); |
315 | } | |
316 | ||
317 | /* | |
318 | * Assign a buffer for the given block. If the appropriate | |
319 | * block is already associated, return it; otherwise search | |
320 | * for the oldest non-busy buffer and reassign it. | |
23900030 | 321 | * |
32a56bda KM |
322 | * If we find the buffer, but it is dirty (marked DELWRI) and |
323 | * its size is changing, we must write it out first. When the | |
324 | * buffer is shrinking, the write is done by brealloc to avoid | |
325 | * losing the unwritten data. When the buffer is growing, the | |
326 | * write is done by getblk, so that bread will not read stale | |
327 | * disk data over the modified data in the buffer. | |
328 | * | |
23900030 BJ |
329 | * We use splx here because this routine may be called |
330 | * on the interrupt stack during a dump, and we don't | |
331 | * want to lower the ipl back to 0. | |
663dbc72 BJ |
332 | */ |
333 | struct buf * | |
ec67a3ce MK |
334 | #ifdef SECSIZE |
335 | getblk(dev, blkno, size, secsize) | |
336 | #else SECSIZE | |
7188ac27 KM |
337 | getblk(vp, blkno, size) |
338 | register struct vnode *vp; | |
ad30fb67 KM |
339 | daddr_t blkno; |
340 | int size; | |
ec67a3ce MK |
341 | #ifdef SECSIZE |
342 | long secsize; | |
343 | #endif SECSIZE | |
663dbc72 | 344 | { |
4f083fd7 | 345 | register struct buf *bp, *dp; |
23900030 | 346 | int s; |
663dbc72 | 347 | |
00a6a148 KM |
348 | if (size > MAXBSIZE) |
349 | panic("getblk: size too big"); | |
751af33e KM |
350 | /* |
351 | * To prevent overflow of 32-bit ints when converting block | |
352 | * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set | |
353 | * to the maximum number that can be converted to a byte offset | |
354 | * without overflow. This is historic code; what bug it fixed, | |
355 | * or whether it is still a reasonable thing to do is open to | |
356 | * dispute. mkm 9/85 | |
357 | */ | |
358 | if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT)) | |
359 | blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1); | |
3efdd860 KM |
360 | /* |
361 | * Search the cache for the block. If we hit, but | |
362 | * the buffer is in use for i/o, then we wait until | |
363 | * the i/o has completed. | |
364 | */ | |
7188ac27 | 365 | dp = BUFHASH(vp, blkno); |
3efdd860 | 366 | loop: |
46387ee3 | 367 | for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { |
7188ac27 | 368 | if (bp->b_blkno != blkno || bp->b_vp != vp || |
46387ee3 | 369 | bp->b_flags&B_INVAL) |
663dbc72 | 370 | continue; |
a5e62f37 | 371 | s = splbio(); |
663dbc72 BJ |
372 | if (bp->b_flags&B_BUSY) { |
373 | bp->b_flags |= B_WANTED; | |
374 | sleep((caddr_t)bp, PRIBIO+1); | |
23900030 | 375 | splx(s); |
663dbc72 BJ |
376 | goto loop; |
377 | } | |
23900030 | 378 | splx(s); |
663dbc72 | 379 | notavail(bp); |
32a56bda KM |
380 | if (bp->b_bcount != size) { |
381 | if (bp->b_bcount < size && (bp->b_flags&B_DELWRI)) { | |
382 | bp->b_flags &= ~B_ASYNC; | |
7188ac27 | 383 | (void) bwrite(bp); |
32a56bda KM |
384 | goto loop; |
385 | } | |
386 | if (brealloc(bp, size) == 0) | |
387 | goto loop; | |
388 | } | |
b646a125 | 389 | if (bp->b_bcount != size && brealloc(bp, size) == 0) |
9d6d37ce | 390 | goto loop; |
663dbc72 | 391 | bp->b_flags |= B_CACHE; |
a5e62f37 | 392 | return (bp); |
663dbc72 | 393 | } |
4f083fd7 | 394 | bp = getnewbuf(); |
ad30fb67 | 395 | bfree(bp); |
3efdd860 | 396 | bremhash(bp); |
7188ac27 KM |
397 | if (bp->b_vp) |
398 | brelvp(bp); | |
8fe1c702 | 399 | VREF(vp); |
7188ac27 KM |
400 | bp->b_vp = vp; |
401 | bp->b_dev = vp->v_rdev; | |
ec67a3ce MK |
402 | #ifdef SECSIZE |
403 | bp->b_blksize = secsize; | |
404 | #endif SECSIZE | |
ad30fb67 | 405 | bp->b_blkno = blkno; |
4f083fd7 | 406 | bp->b_error = 0; |
7188ac27 KM |
407 | bp->b_resid = 0; |
408 | binshash(bp, dp); | |
9d6d37ce BJ |
409 | if (brealloc(bp, size) == 0) |
410 | goto loop; | |
a5e62f37 | 411 | return (bp); |
663dbc72 BJ |
412 | } |
413 | ||
414 | /* | |
415 | * get an empty block, | |
416 | * not assigned to any particular device | |
417 | */ | |
418 | struct buf * | |
ad30fb67 KM |
419 | geteblk(size) |
420 | int size; | |
663dbc72 | 421 | { |
4f083fd7 | 422 | register struct buf *bp, *flist; |
663dbc72 | 423 | |
00a6a148 KM |
424 | if (size > MAXBSIZE) |
425 | panic("geteblk: size too big"); | |
663dbc72 | 426 | loop: |
4f083fd7 SL |
427 | bp = getnewbuf(); |
428 | bp->b_flags |= B_INVAL; | |
3efdd860 KM |
429 | bfree(bp); |
430 | bremhash(bp); | |
4f083fd7 | 431 | flist = &bfreelist[BQ_AGE]; |
7188ac27 | 432 | brelvp(bp); |
ec67a3ce MK |
433 | #ifdef SECSIZE |
434 | bp->b_blksize = DEV_BSIZE; | |
435 | #endif SECSIZE | |
4f083fd7 | 436 | bp->b_error = 0; |
7188ac27 KM |
437 | bp->b_resid = 0; |
438 | binshash(bp, flist); | |
9d6d37ce BJ |
439 | if (brealloc(bp, size) == 0) |
440 | goto loop; | |
a5e62f37 | 441 | return (bp); |
663dbc72 BJ |
442 | } |
443 | ||
ad30fb67 KM |
444 | /* |
445 | * Allocate space associated with a buffer. | |
961945a8 | 446 | * If can't get space, buffer is released |
ad30fb67 KM |
447 | */ |
448 | brealloc(bp, size) | |
449 | register struct buf *bp; | |
450 | int size; | |
451 | { | |
452 | daddr_t start, last; | |
453 | register struct buf *ep; | |
454 | struct buf *dp; | |
455 | int s; | |
456 | ||
457 | /* | |
ec67a3ce | 458 | * First need to make sure that all overlapping previous I/O |
ad30fb67 KM |
459 | * is dispatched with. |
460 | */ | |
461 | if (size == bp->b_bcount) | |
9d6d37ce BJ |
462 | return (1); |
463 | if (size < bp->b_bcount) { | |
464 | if (bp->b_flags & B_DELWRI) { | |
7188ac27 | 465 | (void) bwrite(bp); |
9d6d37ce BJ |
466 | return (0); |
467 | } | |
468 | if (bp->b_flags & B_LOCKED) | |
469 | panic("brealloc"); | |
961945a8 | 470 | return (allocbuf(bp, size)); |
ad30fb67 | 471 | } |
9d6d37ce | 472 | bp->b_flags &= ~B_DONE; |
7188ac27 | 473 | if (bp->b_vp == (struct vnode *)0) |
961945a8 | 474 | return (allocbuf(bp, size)); |
9d6d37ce | 475 | |
7188ac27 KM |
476 | trace(TR_BREALLOC, |
477 | pack(bp->b_vp->v_mount->m_fsid[0], size), bp->b_blkno); | |
9d6d37ce BJ |
478 | /* |
479 | * Search cache for any buffers that overlap the one that we | |
480 | * are trying to allocate. Overlapping buffers must be marked | |
481 | * invalid, after being written out if they are dirty. (indicated | |
482 | * by B_DELWRI) A disk block must be mapped by at most one buffer | |
483 | * at any point in time. Care must be taken to avoid deadlocking | |
484 | * when two buffer are trying to get the same set of disk blocks. | |
485 | */ | |
486 | start = bp->b_blkno; | |
ec67a3ce MK |
487 | #ifdef SECSIZE |
488 | last = start + size/bp->b_blksize - 1; | |
489 | #else SECSIZE | |
ad891b02 | 490 | last = start + btodb(size) - 1; |
ec67a3ce | 491 | #endif SECSIZE |
7188ac27 | 492 | dp = BUFHASH(bp->b_vp, bp->b_blkno); |
ad30fb67 | 493 | loop: |
ad30fb67 | 494 | for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { |
7188ac27 KM |
495 | if (ep == bp || ep->b_vp != bp->b_vp || |
496 | (ep->b_flags & B_INVAL)) | |
9d6d37ce BJ |
497 | continue; |
498 | /* look for overlap */ | |
499 | if (ep->b_bcount == 0 || ep->b_blkno > last || | |
ec67a3ce MK |
500 | #ifdef SECSIZE |
501 | ep->b_blkno + ep->b_bcount/ep->b_blksize <= start) | |
502 | #else SECSIZE | |
ad891b02 | 503 | ep->b_blkno + btodb(ep->b_bcount) <= start) |
ec67a3ce | 504 | #endif SECSIZE |
ad30fb67 | 505 | continue; |
a5e62f37 | 506 | s = splbio(); |
ad30fb67 KM |
507 | if (ep->b_flags&B_BUSY) { |
508 | ep->b_flags |= B_WANTED; | |
509 | sleep((caddr_t)ep, PRIBIO+1); | |
4f083fd7 | 510 | splx(s); |
ad30fb67 KM |
511 | goto loop; |
512 | } | |
4f083fd7 | 513 | splx(s); |
9d6d37ce | 514 | notavail(ep); |
ad30fb67 | 515 | if (ep->b_flags & B_DELWRI) { |
7188ac27 | 516 | (void) bwrite(ep); |
ad30fb67 KM |
517 | goto loop; |
518 | } | |
9d6d37ce BJ |
519 | ep->b_flags |= B_INVAL; |
520 | brelse(ep); | |
ad30fb67 | 521 | } |
961945a8 | 522 | return (allocbuf(bp, size)); |
4f083fd7 SL |
523 | } |
524 | ||
4f083fd7 SL |
525 | /* |
526 | * Find a buffer which is available for use. | |
527 | * Select something from a free list. | |
528 | * Preference is to AGE list, then LRU list. | |
529 | */ | |
530 | struct buf * | |
531 | getnewbuf() | |
532 | { | |
533 | register struct buf *bp, *dp; | |
a937f856 | 534 | register struct ucred *cred; |
4f083fd7 SL |
535 | int s; |
536 | ||
537 | loop: | |
a5e62f37 | 538 | s = splbio(); |
4f083fd7 SL |
539 | for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) |
540 | if (dp->av_forw != dp) | |
541 | break; | |
542 | if (dp == bfreelist) { /* no free blocks */ | |
543 | dp->b_flags |= B_WANTED; | |
544 | sleep((caddr_t)dp, PRIBIO+1); | |
4b7d506c | 545 | splx(s); |
4f083fd7 SL |
546 | goto loop; |
547 | } | |
548 | splx(s); | |
549 | bp = dp->av_forw; | |
550 | notavail(bp); | |
551 | if (bp->b_flags & B_DELWRI) { | |
033a786e | 552 | (void) bawrite(bp); |
4f083fd7 SL |
553 | goto loop; |
554 | } | |
7188ac27 KM |
555 | trace(TR_BRELSE, |
556 | pack(bp->b_vp->v_mount->m_fsid[0], bp->b_bufsize), bp->b_blkno); | |
557 | brelvp(bp); | |
a937f856 KM |
558 | if (bp->b_rcred != NOCRED) { |
559 | cred = bp->b_rcred; | |
560 | bp->b_rcred = NOCRED; | |
561 | crfree(cred); | |
562 | } | |
563 | if (bp->b_wcred != NOCRED) { | |
564 | cred = bp->b_wcred; | |
565 | bp->b_wcred = NOCRED; | |
566 | crfree(cred); | |
567 | } | |
4f083fd7 SL |
568 | bp->b_flags = B_BUSY; |
569 | return (bp); | |
570 | } | |
571 | ||
663dbc72 BJ |
572 | /* |
573 | * Wait for I/O completion on the buffer; return errors | |
574 | * to the user. | |
575 | */ | |
3efdd860 | 576 | biowait(bp) |
ad30fb67 | 577 | register struct buf *bp; |
663dbc72 | 578 | { |
530d0032 | 579 | int s; |
663dbc72 | 580 | |
a5e62f37 | 581 | s = splbio(); |
a937f856 | 582 | while ((bp->b_flags & B_DONE) == 0) |
663dbc72 | 583 | sleep((caddr_t)bp, PRIBIO); |
530d0032 | 584 | splx(s); |
7188ac27 KM |
585 | /* |
586 | * Pick up the device's error number and pass it to the user; | |
587 | * if there is an error but the number is 0 set a generalized code. | |
588 | */ | |
589 | if ((bp->b_flags & B_ERROR) == 0) | |
590 | return (0); | |
591 | if (bp->b_error) | |
592 | return (bp->b_error); | |
593 | return (EIO); | |
663dbc72 BJ |
594 | } |
595 | ||
663dbc72 | 596 | /* |
af04ce66 SL |
597 | * Mark I/O complete on a buffer. |
598 | * If someone should be called, e.g. the pageout | |
599 | * daemon, do so. Otherwise, wake up anyone | |
600 | * waiting for it. | |
663dbc72 | 601 | */ |
3efdd860 KM |
602 | biodone(bp) |
603 | register struct buf *bp; | |
663dbc72 | 604 | { |
663dbc72 | 605 | |
80e7c811 | 606 | if (bp->b_flags & B_DONE) |
3efdd860 | 607 | panic("dup biodone"); |
663dbc72 | 608 | bp->b_flags |= B_DONE; |
a937f856 KM |
609 | if ((bp->b_flags & B_READ) == 0) |
610 | bp->b_dirtyoff = bp->b_dirtyend = 0; | |
961945a8 SL |
611 | if (bp->b_flags & B_CALL) { |
612 | bp->b_flags &= ~B_CALL; | |
613 | (*bp->b_iodone)(bp); | |
614 | return; | |
615 | } | |
663dbc72 BJ |
616 | if (bp->b_flags&B_ASYNC) |
617 | brelse(bp); | |
618 | else { | |
619 | bp->b_flags &= ~B_WANTED; | |
620 | wakeup((caddr_t)bp); | |
621 | } | |
622 | } | |
623 | ||
4f083fd7 | 624 | /* |
7188ac27 | 625 | * Ensure that no part of a specified block is in an incore buffer. |
609e7cfa MK |
626 | #ifdef SECSIZE |
627 | * "size" is given in device blocks (the units of b_blkno). | |
628 | #endif SECSIZE | |
ec67a3ce MK |
629 | #ifdef SECSIZE |
630 | * "size" is given in device blocks (the units of b_blkno). | |
631 | #endif SECSIZE | |
4f083fd7 | 632 | */ |
7188ac27 KM |
633 | blkflush(vp, blkno, size) |
634 | struct vnode *vp; | |
4f083fd7 | 635 | daddr_t blkno; |
ec67a3ce MK |
636 | #ifdef SECSIZE |
637 | int size; | |
638 | #else SECSIZE | |
4f083fd7 | 639 | long size; |
ec67a3ce | 640 | #endif SECSIZE |
4f083fd7 SL |
641 | { |
642 | register struct buf *ep; | |
643 | struct buf *dp; | |
0e980590 | 644 | daddr_t curblk, nextblk, ecurblk, lastblk; |
7188ac27 | 645 | int s, error, allerrors = 0; |
4f083fd7 | 646 | |
0e980590 KM |
647 | /* |
648 | * Iterate through each possible hash chain. | |
649 | */ | |
650 | lastblk = blkno + btodb(size) - 1; | |
651 | for (curblk = blkno; curblk <= lastblk; curblk = nextblk) { | |
652 | #if RND & (RND-1) | |
653 | nextblk = ((curblk / RND) + 1) * RND; | |
654 | #else | |
655 | nextblk = ((curblk & ~(RND-1)) + RND); | |
656 | #endif | |
657 | ecurblk = nextblk > lastblk ? lastblk : nextblk - 1; | |
658 | dp = BUFHASH(vp, curblk); | |
4f083fd7 | 659 | loop: |
0e980590 KM |
660 | for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { |
661 | if (ep->b_vp != vp || (ep->b_flags & B_INVAL)) | |
662 | continue; | |
663 | /* look for overlap */ | |
664 | if (ep->b_bcount == 0 || ep->b_blkno > ecurblk || | |
665 | ep->b_blkno + btodb(ep->b_bcount) <= curblk) | |
666 | continue; | |
667 | s = splbio(); | |
668 | if (ep->b_flags&B_BUSY) { | |
669 | ep->b_flags |= B_WANTED; | |
670 | sleep((caddr_t)ep, PRIBIO+1); | |
671 | splx(s); | |
672 | goto loop; | |
673 | } | |
674 | if (ep->b_flags & B_DELWRI) { | |
675 | splx(s); | |
676 | notavail(ep); | |
677 | if (error = bwrite(ep)) | |
678 | allerrors = error; | |
679 | goto loop; | |
680 | } | |
4f083fd7 | 681 | splx(s); |
4f083fd7 | 682 | } |
4f083fd7 | 683 | } |
7188ac27 | 684 | return (allerrors); |
4f083fd7 SL |
685 | } |
686 | ||
663dbc72 | 687 | /* |
7188ac27 | 688 | * Make sure all write-behind blocks associated |
a937f856 | 689 | * with mount point are flushed out (from sync). |
663dbc72 | 690 | */ |
a937f856 KM |
691 | bflush(mountp) |
692 | struct mount *mountp; | |
663dbc72 BJ |
693 | { |
694 | register struct buf *bp; | |
46387ee3 | 695 | register struct buf *flist; |
530d0032 | 696 | int s; |
663dbc72 BJ |
697 | |
698 | loop: | |
a5e62f37 | 699 | s = splbio(); |
a937f856 KM |
700 | for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) { |
701 | for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { | |
702 | if ((bp->b_flags & B_BUSY)) | |
703 | continue; | |
704 | if ((bp->b_flags & B_DELWRI) == 0) | |
705 | continue; | |
706 | if (bp->b_vp && bp->b_vp->v_mount == mountp) { | |
5a3e32e2 | 707 | splx(s); |
7188ac27 | 708 | notavail(bp); |
033a786e | 709 | (void) bawrite(bp); |
a937f856 | 710 | goto loop; |
7188ac27 | 711 | } |
7188ac27 KM |
712 | } |
713 | } | |
a937f856 | 714 | splx(s); |
663dbc72 | 715 | } |
7b8b5a01 RE |
716 | |
717 | /* | |
718 | * Invalidate in core blocks belonging to closed or umounted filesystem | |
719 | * | |
033a786e | 720 | * We walk through the buffer pool and invalidate any buffers for the |
a937f856 | 721 | * indicated mount point. Normally this routine is preceeded by a bflush |
033a786e KM |
722 | * call, so that on a quiescent filesystem there will be no dirty |
723 | * buffers when we are done. We return the count of dirty buffers when | |
724 | * we are finished. | |
7b8b5a01 | 725 | */ |
a937f856 KM |
726 | binval(mountp) |
727 | struct mount *mountp; | |
7b8b5a01 | 728 | { |
634ebdbe RE |
729 | register struct buf *bp; |
730 | register struct bufhd *hp; | |
1a24c701 | 731 | int s, dirty = 0; |
634ebdbe | 732 | #define dp ((struct buf *)hp) |
7b8b5a01 | 733 | |
a937f856 | 734 | loop: |
033a786e KM |
735 | for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) { |
736 | for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { | |
a937f856 | 737 | if (bp->b_vp == NULL || bp->b_vp->v_mount != mountp) |
033a786e | 738 | continue; |
5a3e32e2 | 739 | s = splbio(); |
1a24c701 KM |
740 | if (bp->b_flags & B_BUSY) { |
741 | bp->b_flags |= B_WANTED; | |
742 | sleep((caddr_t)bp, PRIBIO+1); | |
743 | splx(s); | |
744 | goto loop; | |
745 | } | |
5a3e32e2 | 746 | splx(s); |
033a786e KM |
747 | notavail(bp); |
748 | if (bp->b_flags & B_DELWRI) { | |
749 | (void) bawrite(bp); | |
750 | dirty++; | |
751 | continue; | |
7188ac27 | 752 | } |
033a786e KM |
753 | bp->b_flags |= B_INVAL; |
754 | brelvp(bp); | |
755 | brelse(bp); | |
756 | } | |
757 | } | |
758 | return (dirty); | |
7188ac27 KM |
759 | } |
760 | ||
761 | brelvp(bp) | |
762 | struct buf *bp; | |
763 | { | |
764 | struct vnode *vp; | |
765 | ||
766 | if (bp->b_vp == (struct vnode *) 0) | |
767 | return; | |
768 | vp = bp->b_vp; | |
769 | bp->b_vp = (struct vnode *) 0; | |
770 | vrele(vp); | |
7b8b5a01 | 771 | } |