Commit | Line | Data |
---|---|---|
5dc2581e | 1 | /*- |
adb35f79 KB |
2 | * Copyright (c) 1986, 1989, 1993 |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * (c) UNIX System Laboratories, Inc. | |
5 | * All or some portions of this file are derived from material licensed | |
6 | * to the University of California by American Telephone and Telegraph | |
7 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with | |
8 | * the permission of UNIX System Laboratories, Inc. | |
da7c5cc6 | 9 | * |
86bab001 KB |
10 | * This code is derived from software contributed to Berkeley by |
11 | * Berkeley Software Design Inc. | |
7188ac27 | 12 | * |
86bab001 KB |
13 | * %sccs.include.redist.c% |
14 | * | |
24096dc3 | 15 | * @(#)vfs_bio.c 8.10 (Berkeley) %G% |
da7c5cc6 | 16 | */ |
961945a8 | 17 | |
251f56ba | 18 | #include <sys/param.h> |
af50abe6 | 19 | #include <sys/systm.h> |
251f56ba KB |
20 | #include <sys/proc.h> |
21 | #include <sys/buf.h> | |
22 | #include <sys/vnode.h> | |
251f56ba KB |
23 | #include <sys/mount.h> |
24 | #include <sys/trace.h> | |
37392cf8 | 25 | #include <sys/malloc.h> |
5c8652bb | 26 | #include <sys/resourcevar.h> |
b88d365e KM |
27 | #include <ufs/ufs/quota.h> |
28 | #include <ufs/ufs/inode.h> | |
37392cf8 KM |
29 | |
30 | /* | |
31 | * Definitions for the buffer hash lists. | |
32 | */ | |
33 | #define BUFHASH(dvp, lbn) \ | |
34 | (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) | |
af50abe6 | 35 | LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; |
37392cf8 KM |
36 | u_long bufhash; |
37 | ||
38 | /* | |
39 | * Insq/Remq for the buffer hash lists. | |
40 | */ | |
af50abe6 KM |
41 | #define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) |
42 | #define bremhash(bp) LIST_REMOVE(bp, b_hash) | |
37392cf8 KM |
43 | |
44 | /* | |
45 | * Definitions for the buffer free lists. | |
46 | */ | |
47 | #define BQUEUES 4 /* number of free buffer queues */ | |
48 | ||
49 | #define BQ_LOCKED 0 /* super-blocks &c */ | |
50 | #define BQ_LRU 1 /* lru, useful buffers */ | |
51 | #define BQ_AGE 2 /* rubbish */ | |
52 | #define BQ_EMPTY 3 /* buffer headers with no memory */ | |
53 | ||
af50abe6 | 54 | TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; |
37392cf8 KM |
55 | int needbuffer; |
56 | ||
57 | /* | |
58 | * Insq/Remq for the buffer free lists. | |
59 | */ | |
af50abe6 KM |
60 | #define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist) |
61 | #define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist) | |
e3249ec0 | 62 | |
37392cf8 KM |
63 | void |
64 | bremfree(bp) | |
65 | struct buf *bp; | |
66 | { | |
af50abe6 | 67 | struct bqueues *dp = NULL; |
37392cf8 | 68 | |
e3249ec0 KM |
69 | /* |
70 | * We only calculate the head of the freelist when removing | |
71 | * the last element of the list as that is the only time that | |
72 | * it is needed (e.g. to reset the tail pointer). | |
af50abe6 KM |
73 | * |
74 | * NB: This makes an assumption about how tailq's are implemented. | |
e3249ec0 | 75 | */ |
af50abe6 | 76 | if (bp->b_freelist.tqe_next == NULL) { |
37392cf8 | 77 | for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) |
af50abe6 | 78 | if (dp->tqh_last == &bp->b_freelist.tqe_next) |
37392cf8 KM |
79 | break; |
80 | if (dp == &bufqueues[BQUEUES]) | |
81 | panic("bremfree: lost tail"); | |
37392cf8 | 82 | } |
af50abe6 | 83 | TAILQ_REMOVE(dp, bp, b_freelist); |
37392cf8 | 84 | } |
663dbc72 | 85 | |
e7db227e MK |
86 | /* |
87 | * Initialize buffers and hash links for buffers. | |
88 | */ | |
251f56ba | 89 | void |
e7db227e MK |
90 | bufinit() |
91 | { | |
37392cf8 | 92 | register struct buf *bp; |
af50abe6 | 93 | struct bqueues *dp; |
e7db227e | 94 | register int i; |
e7db227e MK |
95 | int base, residual; |
96 | ||
37392cf8 | 97 | for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) |
af50abe6 KM |
98 | TAILQ_INIT(dp); |
99 | bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); | |
e7db227e MK |
100 | base = bufpages / nbuf; |
101 | residual = bufpages % nbuf; | |
102 | for (i = 0; i < nbuf; i++) { | |
103 | bp = &buf[i]; | |
37392cf8 | 104 | bzero((char *)bp, sizeof *bp); |
e7db227e | 105 | bp->b_dev = NODEV; |
e7db227e MK |
106 | bp->b_rcred = NOCRED; |
107 | bp->b_wcred = NOCRED; | |
34f384c1 | 108 | bp->b_vnbufs.le_next = NOLIST; |
cb84e0ab | 109 | bp->b_data = buffers + i * MAXBSIZE; |
e7db227e MK |
110 | if (i < residual) |
111 | bp->b_bufsize = (base + 1) * CLBYTES; | |
112 | else | |
113 | bp->b_bufsize = base * CLBYTES; | |
31222d0d | 114 | bp->b_flags = B_INVAL; |
37392cf8 | 115 | dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; |
31222d0d | 116 | binsheadfree(bp, dp); |
37392cf8 | 117 | binshash(bp, &invalhash); |
e7db227e MK |
118 | } |
119 | } | |
120 | ||
663dbc72 | 121 | /* |
d42a4811 KM |
122 | * Find the block in the buffer pool. |
123 | * If the buffer is not present, allocate a new buffer and load | |
124 | * its contents according to the filesystem fill routine. | |
663dbc72 | 125 | */ |
a937f856 | 126 | bread(vp, blkno, size, cred, bpp) |
7188ac27 | 127 | struct vnode *vp; |
ad30fb67 KM |
128 | daddr_t blkno; |
129 | int size; | |
a937f856 | 130 | struct ucred *cred; |
7188ac27 | 131 | struct buf **bpp; |
ec67a3ce MK |
132 | #ifdef SECSIZE |
133 | long secsize; | |
134 | #endif SECSIZE | |
663dbc72 | 135 | { |
3789a403 | 136 | struct proc *p = curproc; /* XXX */ |
663dbc72 BJ |
137 | register struct buf *bp; |
138 | ||
4f083fd7 SL |
139 | if (size == 0) |
140 | panic("bread: size 0"); | |
ec67a3ce MK |
141 | #ifdef SECSIZE |
142 | bp = getblk(dev, blkno, size, secsize); | |
143 | #else SECSIZE | |
e140149a | 144 | *bpp = bp = getblk(vp, blkno, size, 0, 0); |
ec67a3ce | 145 | #endif SECSIZE |
d42a4811 | 146 | if (bp->b_flags & (B_DONE | B_DELWRI)) { |
c5a600cf | 147 | trace(TR_BREADHIT, pack(vp, size), blkno); |
7188ac27 | 148 | return (0); |
663dbc72 BJ |
149 | } |
150 | bp->b_flags |= B_READ; | |
4f083fd7 SL |
151 | if (bp->b_bcount > bp->b_bufsize) |
152 | panic("bread"); | |
a937f856 KM |
153 | if (bp->b_rcred == NOCRED && cred != NOCRED) { |
154 | crhold(cred); | |
155 | bp->b_rcred = cred; | |
156 | } | |
7188ac27 | 157 | VOP_STRATEGY(bp); |
c5a600cf | 158 | trace(TR_BREADMISS, pack(vp, size), blkno); |
3789a403 | 159 | p->p_stats->p_ru.ru_inblock++; /* pay for read */ |
7188ac27 | 160 | return (biowait(bp)); |
663dbc72 BJ |
161 | } |
162 | ||
163 | /* | |
bb1626f7 KM |
164 | * Operates like bread, but also starts I/O on the N specified |
165 | * read-ahead blocks. | |
663dbc72 | 166 | */ |
bb1626f7 | 167 | breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) |
7188ac27 | 168 | struct vnode *vp; |
84baaab3 | 169 | daddr_t blkno; int size; |
ec67a3ce MK |
170 | #ifdef SECSIZE |
171 | long secsize; | |
172 | #endif SECSIZE | |
bb1626f7 KM |
173 | daddr_t rablkno[]; int rabsize[]; |
174 | int num; | |
a937f856 | 175 | struct ucred *cred; |
7188ac27 | 176 | struct buf **bpp; |
663dbc72 | 177 | { |
3789a403 | 178 | struct proc *p = curproc; /* XXX */ |
663dbc72 | 179 | register struct buf *bp, *rabp; |
bb1626f7 | 180 | register int i; |
663dbc72 BJ |
181 | |
182 | bp = NULL; | |
3efdd860 | 183 | /* |
d42a4811 KM |
184 | * If the block is not memory resident, |
185 | * allocate a buffer and start I/O. | |
3efdd860 | 186 | */ |
7188ac27 | 187 | if (!incore(vp, blkno)) { |
e140149a | 188 | *bpp = bp = getblk(vp, blkno, size, 0, 0); |
ec67a3ce | 189 | #endif SECSIZE |
d42a4811 | 190 | if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { |
663dbc72 | 191 | bp->b_flags |= B_READ; |
4f083fd7 | 192 | if (bp->b_bcount > bp->b_bufsize) |
bb1626f7 | 193 | panic("breadn"); |
a937f856 KM |
194 | if (bp->b_rcred == NOCRED && cred != NOCRED) { |
195 | crhold(cred); | |
196 | bp->b_rcred = cred; | |
197 | } | |
7188ac27 | 198 | VOP_STRATEGY(bp); |
c5a600cf | 199 | trace(TR_BREADMISS, pack(vp, size), blkno); |
3789a403 | 200 | p->p_stats->p_ru.ru_inblock++; /* pay for read */ |
7d1e9cf4 | 201 | } else { |
c5a600cf | 202 | trace(TR_BREADHIT, pack(vp, size), blkno); |
7d1e9cf4 | 203 | } |
663dbc72 | 204 | } |
3efdd860 KM |
205 | |
206 | /* | |
bb1626f7 KM |
207 | * If there's read-ahead block(s), start I/O |
208 | * on them also (as above). | |
3efdd860 | 209 | */ |
bb1626f7 KM |
210 | for (i = 0; i < num; i++) { |
211 | if (incore(vp, rablkno[i])) | |
212 | continue; | |
e140149a | 213 | rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0); |
ec67a3ce | 214 | #endif SECSIZE |
d42a4811 | 215 | if (rabp->b_flags & (B_DONE | B_DELWRI)) { |
663dbc72 | 216 | brelse(rabp); |
bb1626f7 | 217 | trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); |
973ecc4f | 218 | } else { |
d42a4811 | 219 | rabp->b_flags |= B_ASYNC | B_READ; |
4f083fd7 SL |
220 | if (rabp->b_bcount > rabp->b_bufsize) |
221 | panic("breadrabp"); | |
5062ac4a | 222 | if (rabp->b_rcred == NOCRED && cred != NOCRED) { |
a937f856 | 223 | crhold(cred); |
5062ac4a | 224 | rabp->b_rcred = cred; |
a937f856 | 225 | } |
7188ac27 | 226 | VOP_STRATEGY(rabp); |
bb1626f7 | 227 | trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); |
3789a403 | 228 | p->p_stats->p_ru.ru_inblock++; /* pay in advance */ |
663dbc72 BJ |
229 | } |
230 | } | |
3efdd860 KM |
231 | |
232 | /* | |
d42a4811 KM |
233 | * If block was memory resident, let bread get it. |
234 | * If block was not memory resident, the read was | |
235 | * started above, so just wait for the read to complete. | |
3efdd860 | 236 | */ |
84baaab3 | 237 | if (bp == NULL) |
ec67a3ce MK |
238 | #ifdef SECSIZE |
239 | return (bread(dev, blkno, size, secsize)); | |
240 | #else SECSIZE | |
a937f856 | 241 | return (bread(vp, blkno, size, cred, bpp)); |
7188ac27 | 242 | return (biowait(bp)); |
663dbc72 BJ |
243 | } |
244 | ||
245 | /* | |
d42a4811 KM |
246 | * Synchronous write. |
247 | * Release buffer on completion. | |
663dbc72 BJ |
248 | */ |
249 | bwrite(bp) | |
3efdd860 | 250 | register struct buf *bp; |
663dbc72 | 251 | { |
3789a403 | 252 | struct proc *p = curproc; /* XXX */ |
7188ac27 | 253 | register int flag; |
31222d0d | 254 | int s, error = 0; |
663dbc72 | 255 | |
fe668e35 | 256 | if ((bp->b_flags & B_ASYNC) == 0 && |
188f11ee MH |
257 | bp->b_vp && bp->b_vp->v_mount && |
258 | (bp->b_vp->v_mount->mnt_flag & MNT_ASYNC)) { | |
fe668e35 KM |
259 | bdwrite(bp); |
260 | return (0); | |
261 | } | |
663dbc72 | 262 | flag = bp->b_flags; |
f844ee62 | 263 | bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); |
77dc8a8c KM |
264 | if (flag & B_ASYNC) { |
265 | if ((flag & B_DELWRI) == 0) | |
266 | p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ | |
267 | else | |
268 | reassignbuf(bp, bp->b_vp); | |
269 | } | |
c5a600cf | 270 | trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); |
4f083fd7 SL |
271 | if (bp->b_bcount > bp->b_bufsize) |
272 | panic("bwrite"); | |
86e7dd3b | 273 | s = splbio(); |
c669f646 | 274 | bp->b_vp->v_numoutput++; |
e140149a | 275 | bp->b_flags |= B_WRITEINPROG; |
86e7dd3b | 276 | splx(s); |
7188ac27 | 277 | VOP_STRATEGY(bp); |
3efdd860 KM |
278 | |
279 | /* | |
d42a4811 | 280 | * If the write was synchronous, then await I/O completion. |
3efdd860 | 281 | * If the write was "delayed", then we put the buffer on |
d42a4811 | 282 | * the queue of blocks awaiting I/O completion status. |
3efdd860 | 283 | */ |
d42a4811 | 284 | if ((flag & B_ASYNC) == 0) { |
7188ac27 | 285 | error = biowait(bp); |
77dc8a8c KM |
286 | if ((flag&B_DELWRI) == 0) |
287 | p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ | |
288 | else | |
289 | reassignbuf(bp, bp->b_vp); | |
e140149a KM |
290 | if (bp->b_flags & B_EINTR) { |
291 | bp->b_flags &= ~B_EINTR; | |
292 | error = EINTR; | |
293 | } | |
663dbc72 | 294 | brelse(bp); |
7188ac27 | 295 | } else if (flag & B_DELWRI) { |
31222d0d | 296 | s = splbio(); |
663dbc72 | 297 | bp->b_flags |= B_AGE; |
31222d0d | 298 | splx(s); |
7188ac27 KM |
299 | } |
300 | return (error); | |
663dbc72 BJ |
301 | } |
302 | ||
80746147 JH |
303 | int |
304 | vn_bwrite(ap) | |
305 | struct vop_bwrite_args *ap; | |
306 | { | |
ac85da8a | 307 | |
37392cf8 | 308 | return (bwrite(ap->a_bp)); |
80746147 JH |
309 | } |
310 | ||
311 | ||
663dbc72 | 312 | /* |
d42a4811 KM |
313 | * Delayed write. |
314 | * | |
315 | * The buffer is marked dirty, but is not queued for I/O. | |
316 | * This routine should be used when the buffer is expected | |
317 | * to be modified again soon, typically a small write that | |
318 | * partially fills a buffer. | |
319 | * | |
320 | * NB: magnetic tapes cannot be delayed; they must be | |
321 | * written in the order that the writes are requested. | |
663dbc72 BJ |
322 | */ |
323 | bdwrite(bp) | |
3efdd860 | 324 | register struct buf *bp; |
663dbc72 | 325 | { |
3789a403 | 326 | struct proc *p = curproc; /* XXX */ |
663dbc72 | 327 | |
c669f646 KM |
328 | if ((bp->b_flags & B_DELWRI) == 0) { |
329 | bp->b_flags |= B_DELWRI; | |
330 | reassignbuf(bp, bp->b_vp); | |
3789a403 | 331 | p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ |
c669f646 | 332 | } |
7188ac27 | 333 | /* |
edadbc2c | 334 | * If this is a tape drive, the write must be initiated. |
7188ac27 | 335 | */ |
ec67a3ce | 336 | if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE) |
663dbc72 | 337 | bawrite(bp); |
edadbc2c | 338 | } else { |
d42a4811 | 339 | bp->b_flags |= (B_DONE | B_DELWRI); |
663dbc72 BJ |
340 | brelse(bp); |
341 | } | |
342 | } | |
343 | ||
344 | /* | |
d42a4811 KM |
345 | * Asynchronous write. |
346 | * Start I/O on a buffer, but do not wait for it to complete. | |
347 | * The buffer is released when the I/O completes. | |
663dbc72 BJ |
348 | */ |
349 | bawrite(bp) | |
3efdd860 | 350 | register struct buf *bp; |
663dbc72 BJ |
351 | { |
352 | ||
d42a4811 KM |
353 | /* |
354 | * Setting the ASYNC flag causes bwrite to return | |
355 | * after starting the I/O. | |
356 | */ | |
663dbc72 | 357 | bp->b_flags |= B_ASYNC; |
e140149a | 358 | (void) VOP_BWRITE(bp); |
663dbc72 BJ |
359 | } |
360 | ||
361 | /* | |
d42a4811 KM |
362 | * Release a buffer. |
363 | * Even if the buffer is dirty, no I/O is started. | |
663dbc72 BJ |
364 | */ |
365 | brelse(bp) | |
3efdd860 | 366 | register struct buf *bp; |
663dbc72 | 367 | { |
af50abe6 | 368 | register struct bqueues *flist; |
d42a4811 | 369 | int s; |
663dbc72 | 370 | |
c5a600cf | 371 | trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); |
3efdd860 | 372 | /* |
edadbc2c KM |
373 | * If a process is waiting for the buffer, or |
374 | * is waiting for a free buffer, awaken it. | |
3efdd860 | 375 | */ |
d42a4811 | 376 | if (bp->b_flags & B_WANTED) |
663dbc72 | 377 | wakeup((caddr_t)bp); |
37392cf8 KM |
378 | if (needbuffer) { |
379 | needbuffer = 0; | |
380 | wakeup((caddr_t)&needbuffer); | |
663dbc72 | 381 | } |
edadbc2c KM |
382 | /* |
383 | * Retry I/O for locked buffers rather than invalidating them. | |
384 | */ | |
31222d0d | 385 | s = splbio(); |
edadbc2c KM |
386 | if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) |
387 | bp->b_flags &= ~B_ERROR; | |
edadbc2c KM |
388 | /* |
389 | * Disassociate buffers that are no longer valid. | |
390 | */ | |
d42a4811 | 391 | if (bp->b_flags & (B_NOCACHE | B_ERROR)) |
7188ac27 | 392 | bp->b_flags |= B_INVAL; |
d42a4811 | 393 | if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { |
edadbc2c KM |
394 | if (bp->b_vp) |
395 | brelvp(bp); | |
396 | bp->b_flags &= ~B_DELWRI; | |
7188ac27 | 397 | } |
3efdd860 KM |
398 | /* |
399 | * Stick the buffer back on a free list. | |
400 | */ | |
4f083fd7 SL |
401 | if (bp->b_bufsize <= 0) { |
402 | /* block has no buffer ... put at front of unused buffer list */ | |
37392cf8 | 403 | flist = &bufqueues[BQ_EMPTY]; |
4f083fd7 | 404 | binsheadfree(bp, flist); |
d42a4811 | 405 | } else if (bp->b_flags & (B_ERROR | B_INVAL)) { |
46387ee3 | 406 | /* block has no info ... put at front of most free list */ |
37392cf8 | 407 | flist = &bufqueues[BQ_AGE]; |
3efdd860 | 408 | binsheadfree(bp, flist); |
663dbc72 | 409 | } else { |
46387ee3 | 410 | if (bp->b_flags & B_LOCKED) |
37392cf8 | 411 | flist = &bufqueues[BQ_LOCKED]; |
46387ee3 | 412 | else if (bp->b_flags & B_AGE) |
37392cf8 | 413 | flist = &bufqueues[BQ_AGE]; |
46387ee3 | 414 | else |
37392cf8 | 415 | flist = &bufqueues[BQ_LRU]; |
3efdd860 | 416 | binstailfree(bp, flist); |
663dbc72 | 417 | } |
d42a4811 | 418 | bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); |
663dbc72 BJ |
419 | splx(s); |
420 | } | |
421 | ||
422 | /* | |
d42a4811 | 423 | * Check to see if a block is currently memory resident. |
663dbc72 | 424 | */ |
e140149a | 425 | struct buf * |
7188ac27 KM |
426 | incore(vp, blkno) |
427 | struct vnode *vp; | |
3efdd860 | 428 | daddr_t blkno; |
663dbc72 BJ |
429 | { |
430 | register struct buf *bp; | |
663dbc72 | 431 | |
af50abe6 | 432 | for (bp = BUFHASH(vp, blkno)->lh_first; bp; bp = bp->b_hash.le_next) |
edadbc2c | 433 | if (bp->b_lblkno == blkno && bp->b_vp == vp && |
3efdd860 | 434 | (bp->b_flags & B_INVAL) == 0) |
e140149a KM |
435 | return (bp); |
436 | return (NULL); | |
663dbc72 BJ |
437 | } |
438 | ||
edadbc2c | 439 | /* |
d42a4811 KM |
440 | * Check to see if a block is currently memory resident. |
441 | * If it is resident, return it. If it is not resident, | |
442 | * allocate a new buffer and assign it to the block. | |
663dbc72 BJ |
443 | */ |
444 | struct buf * | |
ec67a3ce MK |
445 | #ifdef SECSIZE |
446 | getblk(dev, blkno, size, secsize) | |
447 | #else SECSIZE | |
e140149a | 448 | getblk(vp, blkno, size, slpflag, slptimeo) |
7188ac27 | 449 | register struct vnode *vp; |
ad30fb67 | 450 | daddr_t blkno; |
e140149a | 451 | int size, slpflag, slptimeo; |
ec67a3ce MK |
452 | #ifdef SECSIZE |
453 | long secsize; | |
454 | #endif SECSIZE | |
663dbc72 | 455 | { |
e3249ec0 | 456 | register struct buf *bp; |
af50abe6 | 457 | struct bufhashhdr *dp; |
e140149a | 458 | int s, error; |
663dbc72 | 459 | |
00a6a148 KM |
460 | if (size > MAXBSIZE) |
461 | panic("getblk: size too big"); | |
3efdd860 | 462 | /* |
d42a4811 KM |
463 | * Search the cache for the block. If the buffer is found, |
464 | * but it is currently locked, the we must wait for it to | |
465 | * become available. | |
3efdd860 | 466 | */ |
7188ac27 | 467 | dp = BUFHASH(vp, blkno); |
3efdd860 | 468 | loop: |
af50abe6 | 469 | for (bp = dp->lh_first; bp; bp = bp->b_hash.le_next) { |
e140149a | 470 | if (bp->b_lblkno != blkno || bp->b_vp != vp) |
663dbc72 | 471 | continue; |
a5e62f37 | 472 | s = splbio(); |
d42a4811 | 473 | if (bp->b_flags & B_BUSY) { |
663dbc72 | 474 | bp->b_flags |= B_WANTED; |
e140149a KM |
475 | error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), |
476 | "getblk", slptimeo); | |
23900030 | 477 | splx(s); |
e140149a KM |
478 | if (error) |
479 | return (NULL); | |
663dbc72 BJ |
480 | goto loop; |
481 | } | |
e140149a KM |
482 | /* |
483 | * The test for B_INVAL is moved down here, since there | |
484 | * are cases where B_INVAL is set before VOP_BWRITE() is | |
485 | * called and for NFS, the process cannot be allowed to | |
486 | * allocate a new buffer for the same block until the write | |
487 | * back to the server has been completed. (ie. B_BUSY clears) | |
488 | */ | |
489 | if (bp->b_flags & B_INVAL) { | |
490 | splx(s); | |
491 | continue; | |
492 | } | |
c669f646 KM |
493 | bremfree(bp); |
494 | bp->b_flags |= B_BUSY; | |
23900030 | 495 | splx(s); |
32a56bda | 496 | if (bp->b_bcount != size) { |
24096dc3 | 497 | printf("getblk: stray size\n"); |
edadbc2c | 498 | bp->b_flags |= B_INVAL; |
e140149a | 499 | VOP_BWRITE(bp); |
9d6d37ce | 500 | goto loop; |
edadbc2c | 501 | } |
663dbc72 | 502 | bp->b_flags |= B_CACHE; |
a5e62f37 | 503 | return (bp); |
663dbc72 | 504 | } |
e140149a KM |
505 | /* |
506 | * The loop back to the top when getnewbuf() fails is because | |
507 | * stateless filesystems like NFS have no node locks. Thus, | |
508 | * there is a slight chance that more than one process will | |
509 | * try and getnewbuf() for the same block concurrently when | |
510 | * the first sleeps in getnewbuf(). So after a sleep, go back | |
511 | * up to the top to check the hash lists again. | |
512 | */ | |
513 | if ((bp = getnewbuf(slpflag, slptimeo)) == 0) | |
514 | goto loop; | |
3efdd860 | 515 | bremhash(bp); |
edadbc2c | 516 | bgetvp(vp, bp); |
521a4688 | 517 | bp->b_bcount = 0; |
edadbc2c | 518 | bp->b_lblkno = blkno; |
ec67a3ce MK |
519 | #ifdef SECSIZE |
520 | bp->b_blksize = secsize; | |
521 | #endif SECSIZE | |
ad30fb67 | 522 | bp->b_blkno = blkno; |
4f083fd7 | 523 | bp->b_error = 0; |
7188ac27 KM |
524 | bp->b_resid = 0; |
525 | binshash(bp, dp); | |
521a4688 | 526 | allocbuf(bp, size); |
a5e62f37 | 527 | return (bp); |
663dbc72 BJ |
528 | } |
529 | ||
530 | /* | |
d42a4811 KM |
531 | * Allocate a buffer. |
532 | * The caller will assign it to a block. | |
663dbc72 BJ |
533 | */ |
534 | struct buf * | |
ad30fb67 KM |
535 | geteblk(size) |
536 | int size; | |
663dbc72 | 537 | { |
37392cf8 | 538 | register struct buf *bp; |
663dbc72 | 539 | |
00a6a148 KM |
540 | if (size > MAXBSIZE) |
541 | panic("geteblk: size too big"); | |
e140149a KM |
542 | while ((bp = getnewbuf(0, 0)) == NULL) |
543 | /* void */; | |
4f083fd7 | 544 | bp->b_flags |= B_INVAL; |
3efdd860 | 545 | bremhash(bp); |
37392cf8 | 546 | binshash(bp, &invalhash); |
521a4688 | 547 | bp->b_bcount = 0; |
ec67a3ce MK |
548 | #ifdef SECSIZE |
549 | bp->b_blksize = DEV_BSIZE; | |
550 | #endif SECSIZE | |
4f083fd7 | 551 | bp->b_error = 0; |
7188ac27 | 552 | bp->b_resid = 0; |
521a4688 | 553 | allocbuf(bp, size); |
a5e62f37 | 554 | return (bp); |
663dbc72 BJ |
555 | } |
556 | ||
ad30fb67 | 557 | /* |
521a4688 | 558 | * Expand or contract the actual memory allocated to a buffer. |
d42a4811 | 559 | * If no memory is available, release buffer and take error exit. |
ad30fb67 | 560 | */ |
521a4688 KM |
561 | allocbuf(tp, size) |
562 | register struct buf *tp; | |
ad30fb67 KM |
563 | int size; |
564 | { | |
521a4688 KM |
565 | register struct buf *bp, *ep; |
566 | int sizealloc, take, s; | |
ad30fb67 | 567 | |
521a4688 KM |
568 | sizealloc = roundup(size, CLBYTES); |
569 | /* | |
570 | * Buffer size does not change | |
571 | */ | |
572 | if (sizealloc == tp->b_bufsize) | |
573 | goto out; | |
574 | /* | |
575 | * Buffer size is shrinking. | |
576 | * Place excess space in a buffer header taken from the | |
577 | * BQ_EMPTY buffer list and placed on the "most free" list. | |
578 | * If no extra buffer headers are available, leave the | |
579 | * extra space in the present buffer. | |
580 | */ | |
581 | if (sizealloc < tp->b_bufsize) { | |
af50abe6 | 582 | if ((ep = bufqueues[BQ_EMPTY].tqh_first) == NULL) |
521a4688 KM |
583 | goto out; |
584 | s = splbio(); | |
585 | bremfree(ep); | |
586 | ep->b_flags |= B_BUSY; | |
587 | splx(s); | |
cb84e0ab | 588 | pagemove((char *)tp->b_data + sizealloc, ep->b_data, |
521a4688 KM |
589 | (int)tp->b_bufsize - sizealloc); |
590 | ep->b_bufsize = tp->b_bufsize - sizealloc; | |
591 | tp->b_bufsize = sizealloc; | |
592 | ep->b_flags |= B_INVAL; | |
593 | ep->b_bcount = 0; | |
594 | brelse(ep); | |
595 | goto out; | |
596 | } | |
597 | /* | |
598 | * More buffer space is needed. Get it out of buffers on | |
599 | * the "most free" list, placing the empty headers on the | |
600 | * BQ_EMPTY buffer header list. | |
601 | */ | |
602 | while (tp->b_bufsize < sizealloc) { | |
603 | take = sizealloc - tp->b_bufsize; | |
e140149a KM |
604 | while ((bp = getnewbuf(0, 0)) == NULL) |
605 | /* void */; | |
521a4688 KM |
606 | if (take >= bp->b_bufsize) |
607 | take = bp->b_bufsize; | |
cb84e0ab KB |
608 | pagemove(&((char *)bp->b_data)[bp->b_bufsize - take], |
609 | &((char *)tp->b_data)[tp->b_bufsize], take); | |
521a4688 KM |
610 | tp->b_bufsize += take; |
611 | bp->b_bufsize = bp->b_bufsize - take; | |
612 | if (bp->b_bcount > bp->b_bufsize) | |
613 | bp->b_bcount = bp->b_bufsize; | |
614 | if (bp->b_bufsize <= 0) { | |
615 | bremhash(bp); | |
37392cf8 | 616 | binshash(bp, &invalhash); |
d42a4811 | 617 | bp->b_dev = NODEV; |
521a4688 KM |
618 | bp->b_error = 0; |
619 | bp->b_flags |= B_INVAL; | |
620 | } | |
621 | brelse(bp); | |
622 | } | |
623 | out: | |
624 | tp->b_bcount = size; | |
625 | return (1); | |
4f083fd7 SL |
626 | } |
627 | ||
4f083fd7 SL |
628 | /* |
629 | * Find a buffer which is available for use. | |
630 | * Select something from a free list. | |
631 | * Preference is to AGE list, then LRU list. | |
632 | */ | |
633 | struct buf * | |
e140149a KM |
634 | getnewbuf(slpflag, slptimeo) |
635 | int slpflag, slptimeo; | |
4f083fd7 | 636 | { |
37392cf8 | 637 | register struct buf *bp; |
af50abe6 | 638 | register struct bqueues *dp; |
a937f856 | 639 | register struct ucred *cred; |
4f083fd7 | 640 | int s; |
b88d365e KM |
641 | struct buf *abp; |
642 | static int losecnt = 0; | |
4f083fd7 SL |
643 | |
644 | loop: | |
a5e62f37 | 645 | s = splbio(); |
b88d365e KM |
646 | abp = NULL; |
647 | for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) { | |
648 | for (bp = dp->qe_next; bp; bp = bp->b_freelist.qe_next) { | |
649 | if (abp == NULL) | |
650 | abp = bp; | |
651 | if ((bp->b_flags & B_DELWRI) && | |
652 | bp->b_vp && VOP_ISLOCKED(bp->b_vp)) | |
653 | continue; | |
654 | goto found; | |
655 | } | |
656 | } | |
37392cf8 | 657 | if (dp == bufqueues) { /* no free blocks */ |
b88d365e KM |
658 | if (abp) { |
659 | bp = abp; | |
660 | bp->b_flags |= B_XXX; | |
661 | if (losecnt++ < 20) { | |
662 | vprint("skipping blkno check", bp->b_vp); | |
663 | printf("\tlblkno %d, blkno %d\n", | |
664 | bp->b_lblkno, bp->b_blkno); | |
665 | } | |
666 | goto found; | |
667 | } | |
37392cf8 | 668 | needbuffer = 1; |
e140149a KM |
669 | (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1), |
670 | "getnewbuf", slptimeo); | |
4b7d506c | 671 | splx(s); |
e140149a | 672 | return (NULL); |
4f083fd7 | 673 | } |
b88d365e | 674 | found: |
c669f646 KM |
675 | bremfree(bp); |
676 | bp->b_flags |= B_BUSY; | |
677 | splx(s); | |
4f083fd7 | 678 | if (bp->b_flags & B_DELWRI) { |
033a786e | 679 | (void) bawrite(bp); |
4f083fd7 SL |
680 | goto loop; |
681 | } | |
c5a600cf | 682 | trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); |
edadbc2c KM |
683 | if (bp->b_vp) |
684 | brelvp(bp); | |
a937f856 KM |
685 | if (bp->b_rcred != NOCRED) { |
686 | cred = bp->b_rcred; | |
687 | bp->b_rcred = NOCRED; | |
688 | crfree(cred); | |
689 | } | |
690 | if (bp->b_wcred != NOCRED) { | |
691 | cred = bp->b_wcred; | |
692 | bp->b_wcred = NOCRED; | |
693 | crfree(cred); | |
694 | } | |
4f083fd7 | 695 | bp->b_flags = B_BUSY; |
1c89915d | 696 | bp->b_dirtyoff = bp->b_dirtyend = 0; |
bb1626f7 | 697 | bp->b_validoff = bp->b_validend = 0; |
4f083fd7 SL |
698 | return (bp); |
699 | } | |
700 | ||
663dbc72 | 701 | /* |
d42a4811 KM |
702 | * Wait for I/O to complete. |
703 | * | |
704 | * Extract and return any errors associated with the I/O. | |
705 | * If the error flag is set, but no specific error is | |
706 | * given, return EIO. | |
663dbc72 | 707 | */ |
3efdd860 | 708 | biowait(bp) |
ad30fb67 | 709 | register struct buf *bp; |
663dbc72 | 710 | { |
530d0032 | 711 | int s; |
663dbc72 | 712 | |
a5e62f37 | 713 | s = splbio(); |
a937f856 | 714 | while ((bp->b_flags & B_DONE) == 0) |
663dbc72 | 715 | sleep((caddr_t)bp, PRIBIO); |
530d0032 | 716 | splx(s); |
7188ac27 KM |
717 | if ((bp->b_flags & B_ERROR) == 0) |
718 | return (0); | |
719 | if (bp->b_error) | |
720 | return (bp->b_error); | |
721 | return (EIO); | |
663dbc72 BJ |
722 | } |
723 | ||
663dbc72 | 724 | /* |
af04ce66 | 725 | * Mark I/O complete on a buffer. |
d42a4811 KM |
726 | * |
727 | * If a callback has been requested, e.g. the pageout | |
728 | * daemon, do so. Otherwise, awaken waiting processes. | |
663dbc72 | 729 | */ |
251f56ba | 730 | void |
3efdd860 KM |
731 | biodone(bp) |
732 | register struct buf *bp; | |
663dbc72 | 733 | { |
663dbc72 | 734 | |
80e7c811 | 735 | if (bp->b_flags & B_DONE) |
3efdd860 | 736 | panic("dup biodone"); |
663dbc72 | 737 | bp->b_flags |= B_DONE; |
76429560 KM |
738 | if ((bp->b_flags & B_READ) == 0) |
739 | vwakeup(bp); | |
961945a8 SL |
740 | if (bp->b_flags & B_CALL) { |
741 | bp->b_flags &= ~B_CALL; | |
742 | (*bp->b_iodone)(bp); | |
743 | return; | |
744 | } | |
d42a4811 | 745 | if (bp->b_flags & B_ASYNC) |
663dbc72 BJ |
746 | brelse(bp); |
747 | else { | |
748 | bp->b_flags &= ~B_WANTED; | |
749 | wakeup((caddr_t)bp); | |
750 | } | |
751 | } | |
aa95c6fc | 752 | |
b5d79df9 MS |
753 | int |
754 | count_lock_queue() | |
755 | { | |
756 | register struct buf *bp; | |
757 | register int ret; | |
758 | ||
af50abe6 KM |
759 | for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].tqh_first; |
760 | bp; bp = (struct buf *)bp->b_freelist.tqe_next) | |
b5d79df9 MS |
761 | ++ret; |
762 | return(ret); | |
763 | } | |
764 | ||
aa95c6fc KM |
765 | #ifdef DIAGNOSTIC |
766 | /* | |
767 | * Print out statistics on the current allocation of the buffer pool. | |
768 | * Can be enabled to print out on every ``sync'' by setting "syncprt" | |
5c8652bb | 769 | * in vfs_syscalls.c using sysctl. |
aa95c6fc KM |
770 | */ |
771 | void | |
772 | vfs_bufstats() | |
773 | { | |
774 | int s, i, j, count; | |
37392cf8 | 775 | register struct buf *bp; |
af50abe6 | 776 | register struct bqueues *dp; |
aa95c6fc KM |
777 | int counts[MAXBSIZE/CLBYTES+1]; |
778 | static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; | |
779 | ||
37392cf8 | 780 | for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { |
aa95c6fc KM |
781 | count = 0; |
782 | for (j = 0; j <= MAXBSIZE/CLBYTES; j++) | |
783 | counts[j] = 0; | |
784 | s = splbio(); | |
af50abe6 | 785 | for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) { |
aa95c6fc KM |
786 | counts[bp->b_bufsize/CLBYTES]++; |
787 | count++; | |
788 | } | |
789 | splx(s); | |
790 | printf("%s: total-%d", bname[i], count); | |
791 | for (j = 0; j <= MAXBSIZE/CLBYTES; j++) | |
792 | if (counts[j] != 0) | |
793 | printf(", %d-%d", j * CLBYTES, counts[j]); | |
794 | printf("\n"); | |
795 | } | |
796 | } | |
797 | #endif /* DIAGNOSTIC */ |