Commit | Line | Data |
---|---|---|
5dc2581e KB |
1 | /*- |
2 | * Copyright (c) 1982, 1986, 1989 The Regents of the University of California. | |
7188ac27 | 3 | * All rights reserved. |
da7c5cc6 | 4 | * |
217c3be4 KM |
5 | * This module is believed to contain source code proprietary to AT&T. |
6 | * Use and redistribution is subject to the Berkeley Software License | |
7 | * Agreement and your Software Agreement with AT&T (Western Electric). | |
7188ac27 | 8 | * |
467e7fab | 9 | * @(#)vfs_cluster.c 7.54 (Berkeley) %G% |
da7c5cc6 | 10 | */ |
961945a8 | 11 | |
251f56ba KB |
12 | #include <sys/param.h> |
13 | #include <sys/proc.h> | |
14 | #include <sys/buf.h> | |
15 | #include <sys/vnode.h> | |
251f56ba KB |
16 | #include <sys/mount.h> |
17 | #include <sys/trace.h> | |
18 | #include <sys/resourcevar.h> | |
37392cf8 KM |
19 | #include <sys/malloc.h> |
20 | #include <libkern/libkern.h> | |
21 | ||
22 | /* | |
23 | * Definitions for the buffer hash lists. | |
24 | */ | |
25 | #define BUFHASH(dvp, lbn) \ | |
26 | (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) | |
27 | struct buf **bufhashtbl, *invalhash; | |
28 | u_long bufhash; | |
29 | ||
30 | /* | |
31 | * Insq/Remq for the buffer hash lists. | |
32 | */ | |
33 | #define bremhash(bp) { \ | |
34 | struct buf *bq; \ | |
35 | if (bq = (bp)->b_forw) \ | |
36 | bq->b_back = (bp)->b_back; \ | |
37 | *(bp)->b_back = bq; \ | |
38 | } | |
39 | #define binshash(bp, dp) { \ | |
40 | struct buf *bq; \ | |
41 | if (bq = *(dp)) \ | |
42 | bq->b_back = &(bp)->b_forw; \ | |
43 | (bp)->b_forw = bq; \ | |
44 | (bp)->b_back = (dp); \ | |
45 | *(dp) = (bp); \ | |
46 | } | |
47 | ||
48 | /* | |
49 | * Definitions for the buffer free lists. | |
50 | */ | |
51 | #define BQUEUES 4 /* number of free buffer queues */ | |
52 | ||
53 | #define BQ_LOCKED 0 /* super-blocks &c */ | |
54 | #define BQ_LRU 1 /* lru, useful buffers */ | |
55 | #define BQ_AGE 2 /* rubbish */ | |
56 | #define BQ_EMPTY 3 /* buffer headers with no memory */ | |
57 | ||
58 | struct bufqueue { | |
59 | struct buf *buffreehead; /* head of available list */ | |
60 | struct buf **buffreetail; /* tail of available list */ | |
61 | } bufqueues[BQUEUES]; | |
62 | int needbuffer; | |
63 | ||
64 | /* | |
65 | * Insq/Remq for the buffer free lists. | |
66 | */ | |
67 | void | |
68 | bremfree(bp) | |
69 | struct buf *bp; | |
70 | { | |
71 | struct buf *bq; | |
72 | struct bufqueue *dp; | |
73 | ||
74 | if (bq = bp->b_actf) { | |
75 | bq->b_actb = bp->b_actb; | |
76 | } else { | |
77 | for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) | |
78 | if (dp->buffreetail == &bp->b_actf) | |
79 | break; | |
80 | if (dp == &bufqueues[BQUEUES]) | |
81 | panic("bremfree: lost tail"); | |
82 | dp->buffreetail = bp->b_actb; | |
83 | } | |
84 | *bp->b_actb = bq; | |
85 | } | |
86 | ||
87 | #define binsheadfree(bp, dp) { \ | |
88 | struct buf *bq; \ | |
89 | if (bq = (dp)->buffreehead) \ | |
90 | bq->b_actb = &(bp)->b_actf; \ | |
91 | else \ | |
92 | (dp)->buffreetail = &(bp)->b_actf; \ | |
93 | (dp)->buffreehead = (bp); \ | |
94 | (bp)->b_actf = bq; \ | |
95 | (bp)->b_actb = &(dp)->buffreehead; \ | |
96 | } | |
97 | #define binstailfree(bp, dp) { \ | |
98 | (bp)->b_actf = NULL; \ | |
99 | (bp)->b_actb = (dp)->buffreetail; \ | |
100 | *(dp)->buffreetail = (bp); \ | |
101 | (dp)->buffreetail = &(bp)->b_actf; \ | |
102 | } | |
663dbc72 | 103 | |
e7db227e MK |
104 | /* |
105 | * Initialize buffers and hash links for buffers. | |
106 | */ | |
251f56ba | 107 | void |
e7db227e MK |
108 | bufinit() |
109 | { | |
37392cf8 KM |
110 | register struct buf *bp; |
111 | struct bufqueue *dp; | |
e7db227e | 112 | register int i; |
e7db227e MK |
113 | int base, residual; |
114 | ||
37392cf8 KM |
115 | for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) |
116 | dp->buffreetail = &dp->buffreehead; | |
117 | bufhashtbl = (struct buf **)hashinit(nbuf, M_CACHE, &bufhash); | |
e7db227e MK |
118 | base = bufpages / nbuf; |
119 | residual = bufpages % nbuf; | |
120 | for (i = 0; i < nbuf; i++) { | |
121 | bp = &buf[i]; | |
37392cf8 | 122 | bzero((char *)bp, sizeof *bp); |
e7db227e | 123 | bp->b_dev = NODEV; |
e7db227e MK |
124 | bp->b_rcred = NOCRED; |
125 | bp->b_wcred = NOCRED; | |
e7db227e MK |
126 | bp->b_un.b_addr = buffers + i * MAXBSIZE; |
127 | if (i < residual) | |
128 | bp->b_bufsize = (base + 1) * CLBYTES; | |
129 | else | |
130 | bp->b_bufsize = base * CLBYTES; | |
31222d0d | 131 | bp->b_flags = B_INVAL; |
37392cf8 | 132 | dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; |
31222d0d | 133 | binsheadfree(bp, dp); |
37392cf8 | 134 | binshash(bp, &invalhash); |
e7db227e MK |
135 | } |
136 | } | |
137 | ||
663dbc72 | 138 | /* |
d42a4811 KM |
139 | * Find the block in the buffer pool. |
140 | * If the buffer is not present, allocate a new buffer and load | |
141 | * its contents according to the filesystem fill routine. | |
663dbc72 | 142 | */ |
a937f856 | 143 | bread(vp, blkno, size, cred, bpp) |
7188ac27 | 144 | struct vnode *vp; |
ad30fb67 KM |
145 | daddr_t blkno; |
146 | int size; | |
a937f856 | 147 | struct ucred *cred; |
7188ac27 | 148 | struct buf **bpp; |
ec67a3ce MK |
149 | #ifdef SECSIZE |
150 | long secsize; | |
151 | #endif SECSIZE | |
663dbc72 | 152 | { |
3789a403 | 153 | struct proc *p = curproc; /* XXX */ |
663dbc72 BJ |
154 | register struct buf *bp; |
155 | ||
4f083fd7 SL |
156 | if (size == 0) |
157 | panic("bread: size 0"); | |
ec67a3ce MK |
158 | #ifdef SECSIZE |
159 | bp = getblk(dev, blkno, size, secsize); | |
160 | #else SECSIZE | |
7188ac27 | 161 | *bpp = bp = getblk(vp, blkno, size); |
ec67a3ce | 162 | #endif SECSIZE |
d42a4811 | 163 | if (bp->b_flags & (B_DONE | B_DELWRI)) { |
c5a600cf | 164 | trace(TR_BREADHIT, pack(vp, size), blkno); |
7188ac27 | 165 | return (0); |
663dbc72 BJ |
166 | } |
167 | bp->b_flags |= B_READ; | |
4f083fd7 SL |
168 | if (bp->b_bcount > bp->b_bufsize) |
169 | panic("bread"); | |
a937f856 KM |
170 | if (bp->b_rcred == NOCRED && cred != NOCRED) { |
171 | crhold(cred); | |
172 | bp->b_rcred = cred; | |
173 | } | |
7188ac27 | 174 | VOP_STRATEGY(bp); |
c5a600cf | 175 | trace(TR_BREADMISS, pack(vp, size), blkno); |
3789a403 | 176 | p->p_stats->p_ru.ru_inblock++; /* pay for read */ |
7188ac27 | 177 | return (biowait(bp)); |
663dbc72 BJ |
178 | } |
179 | ||
180 | /* | |
bb1626f7 KM |
181 | * Operates like bread, but also starts I/O on the N specified |
182 | * read-ahead blocks. | |
663dbc72 | 183 | */ |
bb1626f7 | 184 | breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) |
7188ac27 | 185 | struct vnode *vp; |
84baaab3 | 186 | daddr_t blkno; int size; |
ec67a3ce MK |
187 | #ifdef SECSIZE |
188 | long secsize; | |
189 | #endif SECSIZE | |
bb1626f7 KM |
190 | daddr_t rablkno[]; int rabsize[]; |
191 | int num; | |
a937f856 | 192 | struct ucred *cred; |
7188ac27 | 193 | struct buf **bpp; |
663dbc72 | 194 | { |
3789a403 | 195 | struct proc *p = curproc; /* XXX */ |
663dbc72 | 196 | register struct buf *bp, *rabp; |
bb1626f7 | 197 | register int i; |
663dbc72 BJ |
198 | |
199 | bp = NULL; | |
3efdd860 | 200 | /* |
d42a4811 KM |
201 | * If the block is not memory resident, |
202 | * allocate a buffer and start I/O. | |
3efdd860 | 203 | */ |
7188ac27 KM |
204 | if (!incore(vp, blkno)) { |
205 | *bpp = bp = getblk(vp, blkno, size); | |
ec67a3ce | 206 | #endif SECSIZE |
d42a4811 | 207 | if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { |
663dbc72 | 208 | bp->b_flags |= B_READ; |
4f083fd7 | 209 | if (bp->b_bcount > bp->b_bufsize) |
bb1626f7 | 210 | panic("breadn"); |
a937f856 KM |
211 | if (bp->b_rcred == NOCRED && cred != NOCRED) { |
212 | crhold(cred); | |
213 | bp->b_rcred = cred; | |
214 | } | |
7188ac27 | 215 | VOP_STRATEGY(bp); |
c5a600cf | 216 | trace(TR_BREADMISS, pack(vp, size), blkno); |
3789a403 | 217 | p->p_stats->p_ru.ru_inblock++; /* pay for read */ |
7d1e9cf4 | 218 | } else { |
c5a600cf | 219 | trace(TR_BREADHIT, pack(vp, size), blkno); |
7d1e9cf4 | 220 | } |
663dbc72 | 221 | } |
3efdd860 KM |
222 | |
223 | /* | |
bb1626f7 KM |
224 | * If there's read-ahead block(s), start I/O |
225 | * on them also (as above). | |
3efdd860 | 226 | */ |
bb1626f7 KM |
227 | for (i = 0; i < num; i++) { |
228 | if (incore(vp, rablkno[i])) | |
229 | continue; | |
230 | rabp = getblk(vp, rablkno[i], rabsize[i]); | |
ec67a3ce | 231 | #endif SECSIZE |
d42a4811 | 232 | if (rabp->b_flags & (B_DONE | B_DELWRI)) { |
663dbc72 | 233 | brelse(rabp); |
bb1626f7 | 234 | trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); |
973ecc4f | 235 | } else { |
d42a4811 | 236 | rabp->b_flags |= B_ASYNC | B_READ; |
4f083fd7 SL |
237 | if (rabp->b_bcount > rabp->b_bufsize) |
238 | panic("breadrabp"); | |
5062ac4a | 239 | if (rabp->b_rcred == NOCRED && cred != NOCRED) { |
a937f856 | 240 | crhold(cred); |
5062ac4a | 241 | rabp->b_rcred = cred; |
a937f856 | 242 | } |
7188ac27 | 243 | VOP_STRATEGY(rabp); |
bb1626f7 | 244 | trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); |
3789a403 | 245 | p->p_stats->p_ru.ru_inblock++; /* pay in advance */ |
663dbc72 BJ |
246 | } |
247 | } | |
3efdd860 KM |
248 | |
249 | /* | |
d42a4811 KM |
250 | * If block was memory resident, let bread get it. |
251 | * If block was not memory resident, the read was | |
252 | * started above, so just wait for the read to complete. | |
3efdd860 | 253 | */ |
84baaab3 | 254 | if (bp == NULL) |
ec67a3ce MK |
255 | #ifdef SECSIZE |
256 | return (bread(dev, blkno, size, secsize)); | |
257 | #else SECSIZE | |
a937f856 | 258 | return (bread(vp, blkno, size, cred, bpp)); |
7188ac27 | 259 | return (biowait(bp)); |
663dbc72 BJ |
260 | } |
261 | ||
262 | /* | |
d42a4811 KM |
263 | * Synchronous write. |
264 | * Release buffer on completion. | |
663dbc72 BJ |
265 | */ |
266 | bwrite(bp) | |
3efdd860 | 267 | register struct buf *bp; |
663dbc72 | 268 | { |
3789a403 | 269 | struct proc *p = curproc; /* XXX */ |
7188ac27 | 270 | register int flag; |
31222d0d | 271 | int s, error = 0; |
663dbc72 BJ |
272 | |
273 | flag = bp->b_flags; | |
f844ee62 | 274 | bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); |
77dc8a8c KM |
275 | if (flag & B_ASYNC) { |
276 | if ((flag & B_DELWRI) == 0) | |
277 | p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ | |
278 | else | |
279 | reassignbuf(bp, bp->b_vp); | |
280 | } | |
c5a600cf | 281 | trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); |
4f083fd7 SL |
282 | if (bp->b_bcount > bp->b_bufsize) |
283 | panic("bwrite"); | |
86e7dd3b | 284 | s = splbio(); |
c669f646 | 285 | bp->b_vp->v_numoutput++; |
86e7dd3b | 286 | splx(s); |
7188ac27 | 287 | VOP_STRATEGY(bp); |
3efdd860 KM |
288 | |
289 | /* | |
d42a4811 | 290 | * If the write was synchronous, then await I/O completion. |
3efdd860 | 291 | * If the write was "delayed", then we put the buffer on |
d42a4811 | 292 | * the queue of blocks awaiting I/O completion status. |
3efdd860 | 293 | */ |
d42a4811 | 294 | if ((flag & B_ASYNC) == 0) { |
7188ac27 | 295 | error = biowait(bp); |
77dc8a8c KM |
296 | if ((flag&B_DELWRI) == 0) |
297 | p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ | |
298 | else | |
299 | reassignbuf(bp, bp->b_vp); | |
663dbc72 | 300 | brelse(bp); |
7188ac27 | 301 | } else if (flag & B_DELWRI) { |
31222d0d | 302 | s = splbio(); |
663dbc72 | 303 | bp->b_flags |= B_AGE; |
31222d0d | 304 | splx(s); |
7188ac27 KM |
305 | } |
306 | return (error); | |
663dbc72 BJ |
307 | } |
308 | ||
80746147 JH |
309 | int |
310 | vn_bwrite(ap) | |
311 | struct vop_bwrite_args *ap; | |
312 | { | |
37392cf8 | 313 | return (bwrite(ap->a_bp)); |
80746147 JH |
314 | } |
315 | ||
316 | ||
663dbc72 | 317 | /* |
d42a4811 KM |
318 | * Delayed write. |
319 | * | |
320 | * The buffer is marked dirty, but is not queued for I/O. | |
321 | * This routine should be used when the buffer is expected | |
322 | * to be modified again soon, typically a small write that | |
323 | * partially fills a buffer. | |
324 | * | |
325 | * NB: magnetic tapes cannot be delayed; they must be | |
326 | * written in the order that the writes are requested. | |
663dbc72 BJ |
327 | */ |
328 | bdwrite(bp) | |
3efdd860 | 329 | register struct buf *bp; |
663dbc72 | 330 | { |
3789a403 | 331 | struct proc *p = curproc; /* XXX */ |
663dbc72 | 332 | |
c669f646 KM |
333 | if ((bp->b_flags & B_DELWRI) == 0) { |
334 | bp->b_flags |= B_DELWRI; | |
335 | reassignbuf(bp, bp->b_vp); | |
3789a403 | 336 | p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ |
c669f646 | 337 | } |
7188ac27 | 338 | /* |
edadbc2c | 339 | * If this is a tape drive, the write must be initiated. |
7188ac27 | 340 | */ |
ec67a3ce | 341 | if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE) |
663dbc72 | 342 | bawrite(bp); |
edadbc2c | 343 | } else { |
d42a4811 | 344 | bp->b_flags |= (B_DONE | B_DELWRI); |
663dbc72 BJ |
345 | brelse(bp); |
346 | } | |
347 | } | |
348 | ||
349 | /* | |
d42a4811 KM |
350 | * Asynchronous write. |
351 | * Start I/O on a buffer, but do not wait for it to complete. | |
352 | * The buffer is released when the I/O completes. | |
663dbc72 BJ |
353 | */ |
354 | bawrite(bp) | |
3efdd860 | 355 | register struct buf *bp; |
663dbc72 BJ |
356 | { |
357 | ||
d42a4811 KM |
358 | /* |
359 | * Setting the ASYNC flag causes bwrite to return | |
360 | * after starting the I/O. | |
361 | */ | |
663dbc72 | 362 | bp->b_flags |= B_ASYNC; |
7188ac27 | 363 | (void) bwrite(bp); |
663dbc72 BJ |
364 | } |
365 | ||
366 | /* | |
d42a4811 KM |
367 | * Release a buffer. |
368 | * Even if the buffer is dirty, no I/O is started. | |
663dbc72 BJ |
369 | */ |
370 | brelse(bp) | |
3efdd860 | 371 | register struct buf *bp; |
663dbc72 | 372 | { |
37392cf8 | 373 | register struct bufqueue *flist; |
d42a4811 | 374 | int s; |
663dbc72 | 375 | |
c5a600cf | 376 | trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); |
3efdd860 | 377 | /* |
edadbc2c KM |
378 | * If a process is waiting for the buffer, or |
379 | * is waiting for a free buffer, awaken it. | |
3efdd860 | 380 | */ |
d42a4811 | 381 | if (bp->b_flags & B_WANTED) |
663dbc72 | 382 | wakeup((caddr_t)bp); |
37392cf8 KM |
383 | if (needbuffer) { |
384 | needbuffer = 0; | |
385 | wakeup((caddr_t)&needbuffer); | |
663dbc72 | 386 | } |
edadbc2c KM |
387 | /* |
388 | * Retry I/O for locked buffers rather than invalidating them. | |
389 | */ | |
31222d0d | 390 | s = splbio(); |
edadbc2c KM |
391 | if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) |
392 | bp->b_flags &= ~B_ERROR; | |
edadbc2c KM |
393 | /* |
394 | * Disassociate buffers that are no longer valid. | |
395 | */ | |
d42a4811 | 396 | if (bp->b_flags & (B_NOCACHE | B_ERROR)) |
7188ac27 | 397 | bp->b_flags |= B_INVAL; |
d42a4811 | 398 | if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { |
edadbc2c KM |
399 | if (bp->b_vp) |
400 | brelvp(bp); | |
401 | bp->b_flags &= ~B_DELWRI; | |
7188ac27 | 402 | } |
3efdd860 KM |
403 | /* |
404 | * Stick the buffer back on a free list. | |
405 | */ | |
4f083fd7 SL |
406 | if (bp->b_bufsize <= 0) { |
407 | /* block has no buffer ... put at front of unused buffer list */ | |
37392cf8 | 408 | flist = &bufqueues[BQ_EMPTY]; |
4f083fd7 | 409 | binsheadfree(bp, flist); |
d42a4811 | 410 | } else if (bp->b_flags & (B_ERROR | B_INVAL)) { |
46387ee3 | 411 | /* block has no info ... put at front of most free list */ |
37392cf8 | 412 | flist = &bufqueues[BQ_AGE]; |
3efdd860 | 413 | binsheadfree(bp, flist); |
663dbc72 | 414 | } else { |
46387ee3 | 415 | if (bp->b_flags & B_LOCKED) |
37392cf8 | 416 | flist = &bufqueues[BQ_LOCKED]; |
46387ee3 | 417 | else if (bp->b_flags & B_AGE) |
37392cf8 | 418 | flist = &bufqueues[BQ_AGE]; |
46387ee3 | 419 | else |
37392cf8 | 420 | flist = &bufqueues[BQ_LRU]; |
3efdd860 | 421 | binstailfree(bp, flist); |
663dbc72 | 422 | } |
d42a4811 | 423 | bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); |
663dbc72 BJ |
424 | splx(s); |
425 | } | |
426 | ||
427 | /* | |
d42a4811 | 428 | * Check to see if a block is currently memory resident. |
663dbc72 | 429 | */ |
7188ac27 KM |
430 | incore(vp, blkno) |
431 | struct vnode *vp; | |
3efdd860 | 432 | daddr_t blkno; |
663dbc72 BJ |
433 | { |
434 | register struct buf *bp; | |
663dbc72 | 435 | |
37392cf8 | 436 | for (bp = *BUFHASH(vp, blkno); bp; bp = bp->b_forw) |
edadbc2c | 437 | if (bp->b_lblkno == blkno && bp->b_vp == vp && |
3efdd860 | 438 | (bp->b_flags & B_INVAL) == 0) |
5603d07d | 439 | return (1); |
5603d07d | 440 | return (0); |
663dbc72 BJ |
441 | } |
442 | ||
edadbc2c | 443 | /* |
d42a4811 KM |
444 | * Check to see if a block is currently memory resident. |
445 | * If it is resident, return it. If it is not resident, | |
446 | * allocate a new buffer and assign it to the block. | |
663dbc72 BJ |
447 | */ |
448 | struct buf * | |
ec67a3ce MK |
449 | #ifdef SECSIZE |
450 | getblk(dev, blkno, size, secsize) | |
451 | #else SECSIZE | |
7188ac27 KM |
452 | getblk(vp, blkno, size) |
453 | register struct vnode *vp; | |
ad30fb67 KM |
454 | daddr_t blkno; |
455 | int size; | |
ec67a3ce MK |
456 | #ifdef SECSIZE |
457 | long secsize; | |
458 | #endif SECSIZE | |
663dbc72 | 459 | { |
37392cf8 | 460 | register struct buf *bp, **dp; |
23900030 | 461 | int s; |
663dbc72 | 462 | |
00a6a148 KM |
463 | if (size > MAXBSIZE) |
464 | panic("getblk: size too big"); | |
3efdd860 | 465 | /* |
d42a4811 KM |
466 | * Search the cache for the block. If the buffer is found, |
467 | * but it is currently locked, the we must wait for it to | |
468 | * become available. | |
3efdd860 | 469 | */ |
7188ac27 | 470 | dp = BUFHASH(vp, blkno); |
3efdd860 | 471 | loop: |
37392cf8 | 472 | for (bp = *dp; bp; bp = bp->b_forw) { |
edadbc2c | 473 | if (bp->b_lblkno != blkno || bp->b_vp != vp || |
d42a4811 | 474 | (bp->b_flags & B_INVAL)) |
663dbc72 | 475 | continue; |
a5e62f37 | 476 | s = splbio(); |
d42a4811 | 477 | if (bp->b_flags & B_BUSY) { |
663dbc72 | 478 | bp->b_flags |= B_WANTED; |
d42a4811 | 479 | sleep((caddr_t)bp, PRIBIO + 1); |
23900030 | 480 | splx(s); |
663dbc72 BJ |
481 | goto loop; |
482 | } | |
c669f646 KM |
483 | bremfree(bp); |
484 | bp->b_flags |= B_BUSY; | |
23900030 | 485 | splx(s); |
32a56bda | 486 | if (bp->b_bcount != size) { |
edadbc2c KM |
487 | printf("getblk: stray size"); |
488 | bp->b_flags |= B_INVAL; | |
489 | bwrite(bp); | |
9d6d37ce | 490 | goto loop; |
edadbc2c | 491 | } |
663dbc72 | 492 | bp->b_flags |= B_CACHE; |
a5e62f37 | 493 | return (bp); |
663dbc72 | 494 | } |
4f083fd7 | 495 | bp = getnewbuf(); |
3efdd860 | 496 | bremhash(bp); |
edadbc2c | 497 | bgetvp(vp, bp); |
521a4688 | 498 | bp->b_bcount = 0; |
edadbc2c | 499 | bp->b_lblkno = blkno; |
ec67a3ce MK |
500 | #ifdef SECSIZE |
501 | bp->b_blksize = secsize; | |
502 | #endif SECSIZE | |
ad30fb67 | 503 | bp->b_blkno = blkno; |
4f083fd7 | 504 | bp->b_error = 0; |
7188ac27 KM |
505 | bp->b_resid = 0; |
506 | binshash(bp, dp); | |
521a4688 | 507 | allocbuf(bp, size); |
a5e62f37 | 508 | return (bp); |
663dbc72 BJ |
509 | } |
510 | ||
511 | /* | |
d42a4811 KM |
512 | * Allocate a buffer. |
513 | * The caller will assign it to a block. | |
663dbc72 BJ |
514 | */ |
515 | struct buf * | |
ad30fb67 KM |
516 | geteblk(size) |
517 | int size; | |
663dbc72 | 518 | { |
37392cf8 | 519 | register struct buf *bp; |
663dbc72 | 520 | |
00a6a148 KM |
521 | if (size > MAXBSIZE) |
522 | panic("geteblk: size too big"); | |
4f083fd7 SL |
523 | bp = getnewbuf(); |
524 | bp->b_flags |= B_INVAL; | |
3efdd860 | 525 | bremhash(bp); |
37392cf8 | 526 | binshash(bp, &invalhash); |
521a4688 | 527 | bp->b_bcount = 0; |
ec67a3ce MK |
528 | #ifdef SECSIZE |
529 | bp->b_blksize = DEV_BSIZE; | |
530 | #endif SECSIZE | |
4f083fd7 | 531 | bp->b_error = 0; |
7188ac27 | 532 | bp->b_resid = 0; |
521a4688 | 533 | allocbuf(bp, size); |
a5e62f37 | 534 | return (bp); |
663dbc72 BJ |
535 | } |
536 | ||
ad30fb67 | 537 | /* |
521a4688 | 538 | * Expand or contract the actual memory allocated to a buffer. |
d42a4811 | 539 | * If no memory is available, release buffer and take error exit. |
ad30fb67 | 540 | */ |
521a4688 KM |
541 | allocbuf(tp, size) |
542 | register struct buf *tp; | |
ad30fb67 KM |
543 | int size; |
544 | { | |
521a4688 KM |
545 | register struct buf *bp, *ep; |
546 | int sizealloc, take, s; | |
ad30fb67 | 547 | |
521a4688 KM |
548 | sizealloc = roundup(size, CLBYTES); |
549 | /* | |
550 | * Buffer size does not change | |
551 | */ | |
552 | if (sizealloc == tp->b_bufsize) | |
553 | goto out; | |
554 | /* | |
555 | * Buffer size is shrinking. | |
556 | * Place excess space in a buffer header taken from the | |
557 | * BQ_EMPTY buffer list and placed on the "most free" list. | |
558 | * If no extra buffer headers are available, leave the | |
559 | * extra space in the present buffer. | |
560 | */ | |
561 | if (sizealloc < tp->b_bufsize) { | |
37392cf8 | 562 | if ((ep = bufqueues[BQ_EMPTY].buffreehead) == NULL) |
521a4688 KM |
563 | goto out; |
564 | s = splbio(); | |
565 | bremfree(ep); | |
566 | ep->b_flags |= B_BUSY; | |
567 | splx(s); | |
568 | pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, | |
569 | (int)tp->b_bufsize - sizealloc); | |
570 | ep->b_bufsize = tp->b_bufsize - sizealloc; | |
571 | tp->b_bufsize = sizealloc; | |
572 | ep->b_flags |= B_INVAL; | |
573 | ep->b_bcount = 0; | |
574 | brelse(ep); | |
575 | goto out; | |
576 | } | |
577 | /* | |
578 | * More buffer space is needed. Get it out of buffers on | |
579 | * the "most free" list, placing the empty headers on the | |
580 | * BQ_EMPTY buffer header list. | |
581 | */ | |
582 | while (tp->b_bufsize < sizealloc) { | |
583 | take = sizealloc - tp->b_bufsize; | |
584 | bp = getnewbuf(); | |
585 | if (take >= bp->b_bufsize) | |
586 | take = bp->b_bufsize; | |
587 | pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], | |
588 | &tp->b_un.b_addr[tp->b_bufsize], take); | |
589 | tp->b_bufsize += take; | |
590 | bp->b_bufsize = bp->b_bufsize - take; | |
591 | if (bp->b_bcount > bp->b_bufsize) | |
592 | bp->b_bcount = bp->b_bufsize; | |
593 | if (bp->b_bufsize <= 0) { | |
594 | bremhash(bp); | |
37392cf8 | 595 | binshash(bp, &invalhash); |
d42a4811 | 596 | bp->b_dev = NODEV; |
521a4688 KM |
597 | bp->b_error = 0; |
598 | bp->b_flags |= B_INVAL; | |
599 | } | |
600 | brelse(bp); | |
601 | } | |
602 | out: | |
603 | tp->b_bcount = size; | |
604 | return (1); | |
4f083fd7 SL |
605 | } |
606 | ||
4f083fd7 SL |
607 | /* |
608 | * Find a buffer which is available for use. | |
609 | * Select something from a free list. | |
610 | * Preference is to AGE list, then LRU list. | |
611 | */ | |
612 | struct buf * | |
613 | getnewbuf() | |
614 | { | |
37392cf8 KM |
615 | register struct buf *bp; |
616 | register struct bufqueue *dp; | |
a937f856 | 617 | register struct ucred *cred; |
4f083fd7 SL |
618 | int s; |
619 | ||
620 | loop: | |
a5e62f37 | 621 | s = splbio(); |
37392cf8 KM |
622 | for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) |
623 | if (dp->buffreehead) | |
4f083fd7 | 624 | break; |
37392cf8 KM |
625 | if (dp == bufqueues) { /* no free blocks */ |
626 | needbuffer = 1; | |
627 | sleep((caddr_t)&needbuffer, PRIBIO + 1); | |
4b7d506c | 628 | splx(s); |
4f083fd7 SL |
629 | goto loop; |
630 | } | |
37392cf8 | 631 | bp = dp->buffreehead; |
c669f646 KM |
632 | bremfree(bp); |
633 | bp->b_flags |= B_BUSY; | |
634 | splx(s); | |
4f083fd7 | 635 | if (bp->b_flags & B_DELWRI) { |
033a786e | 636 | (void) bawrite(bp); |
4f083fd7 SL |
637 | goto loop; |
638 | } | |
c5a600cf | 639 | trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); |
edadbc2c KM |
640 | if (bp->b_vp) |
641 | brelvp(bp); | |
a937f856 KM |
642 | if (bp->b_rcred != NOCRED) { |
643 | cred = bp->b_rcred; | |
644 | bp->b_rcred = NOCRED; | |
645 | crfree(cred); | |
646 | } | |
647 | if (bp->b_wcred != NOCRED) { | |
648 | cred = bp->b_wcred; | |
649 | bp->b_wcred = NOCRED; | |
650 | crfree(cred); | |
651 | } | |
4f083fd7 | 652 | bp->b_flags = B_BUSY; |
1c89915d | 653 | bp->b_dirtyoff = bp->b_dirtyend = 0; |
bb1626f7 | 654 | bp->b_validoff = bp->b_validend = 0; |
4f083fd7 SL |
655 | return (bp); |
656 | } | |
657 | ||
663dbc72 | 658 | /* |
d42a4811 KM |
659 | * Wait for I/O to complete. |
660 | * | |
661 | * Extract and return any errors associated with the I/O. | |
662 | * If the error flag is set, but no specific error is | |
663 | * given, return EIO. | |
663dbc72 | 664 | */ |
3efdd860 | 665 | biowait(bp) |
ad30fb67 | 666 | register struct buf *bp; |
663dbc72 | 667 | { |
530d0032 | 668 | int s; |
663dbc72 | 669 | |
a5e62f37 | 670 | s = splbio(); |
a937f856 | 671 | while ((bp->b_flags & B_DONE) == 0) |
663dbc72 | 672 | sleep((caddr_t)bp, PRIBIO); |
530d0032 | 673 | splx(s); |
7188ac27 KM |
674 | if ((bp->b_flags & B_ERROR) == 0) |
675 | return (0); | |
676 | if (bp->b_error) | |
677 | return (bp->b_error); | |
678 | return (EIO); | |
663dbc72 BJ |
679 | } |
680 | ||
663dbc72 | 681 | /* |
af04ce66 | 682 | * Mark I/O complete on a buffer. |
d42a4811 KM |
683 | * |
684 | * If a callback has been requested, e.g. the pageout | |
685 | * daemon, do so. Otherwise, awaken waiting processes. | |
663dbc72 | 686 | */ |
251f56ba | 687 | void |
3efdd860 KM |
688 | biodone(bp) |
689 | register struct buf *bp; | |
663dbc72 | 690 | { |
663dbc72 | 691 | |
80e7c811 | 692 | if (bp->b_flags & B_DONE) |
3efdd860 | 693 | panic("dup biodone"); |
663dbc72 | 694 | bp->b_flags |= B_DONE; |
76429560 KM |
695 | if ((bp->b_flags & B_READ) == 0) |
696 | vwakeup(bp); | |
961945a8 SL |
697 | if (bp->b_flags & B_CALL) { |
698 | bp->b_flags &= ~B_CALL; | |
699 | (*bp->b_iodone)(bp); | |
700 | return; | |
701 | } | |
d42a4811 | 702 | if (bp->b_flags & B_ASYNC) |
663dbc72 BJ |
703 | brelse(bp); |
704 | else { | |
705 | bp->b_flags &= ~B_WANTED; | |
706 | wakeup((caddr_t)bp); | |
707 | } | |
708 | } | |
aa95c6fc KM |
709 | |
710 | #ifdef DIAGNOSTIC | |
711 | /* | |
712 | * Print out statistics on the current allocation of the buffer pool. | |
713 | * Can be enabled to print out on every ``sync'' by setting "syncprt" | |
714 | * above. | |
715 | */ | |
716 | void | |
717 | vfs_bufstats() | |
718 | { | |
719 | int s, i, j, count; | |
37392cf8 KM |
720 | register struct buf *bp; |
721 | register struct bufqueue *dp; | |
aa95c6fc KM |
722 | int counts[MAXBSIZE/CLBYTES+1]; |
723 | static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; | |
724 | ||
37392cf8 | 725 | for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { |
aa95c6fc KM |
726 | count = 0; |
727 | for (j = 0; j <= MAXBSIZE/CLBYTES; j++) | |
728 | counts[j] = 0; | |
729 | s = splbio(); | |
37392cf8 | 730 | for (bp = dp->buffreehead; bp; bp = bp->b_actf) { |
aa95c6fc KM |
731 | counts[bp->b_bufsize/CLBYTES]++; |
732 | count++; | |
733 | } | |
734 | splx(s); | |
735 | printf("%s: total-%d", bname[i], count); | |
736 | for (j = 0; j <= MAXBSIZE/CLBYTES; j++) | |
737 | if (counts[j] != 0) | |
738 | printf(", %d-%d", j * CLBYTES, counts[j]); | |
739 | printf("\n"); | |
740 | } | |
741 | } | |
742 | #endif /* DIAGNOSTIC */ |