have to protect acct_process from acctwatch closing the vnode
[unix-history] / usr / src / sys / kern / vfs_bio.c
CommitLineData
5dc2581e 1/*-
adb35f79
KB
2 * Copyright (c) 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
da7c5cc6 9 *
86bab001
KB
10 * This code is derived from software contributed to Berkeley by
11 * Berkeley Software Design Inc.
7188ac27 12 *
86bab001
KB
13 * %sccs.include.redist.c%
14 *
24096dc3 15 * @(#)vfs_bio.c 8.10 (Berkeley) %G%
da7c5cc6 16 */
961945a8 17
251f56ba 18#include <sys/param.h>
af50abe6 19#include <sys/systm.h>
251f56ba
KB
20#include <sys/proc.h>
21#include <sys/buf.h>
22#include <sys/vnode.h>
251f56ba
KB
23#include <sys/mount.h>
24#include <sys/trace.h>
37392cf8 25#include <sys/malloc.h>
5c8652bb 26#include <sys/resourcevar.h>
b88d365e
KM
27#include <ufs/ufs/quota.h>
28#include <ufs/ufs/inode.h>
37392cf8
KM
29
30/*
31 * Definitions for the buffer hash lists.
32 */
33#define BUFHASH(dvp, lbn) \
34 (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
af50abe6 35LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
37392cf8
KM
36u_long bufhash;
37
38/*
39 * Insq/Remq for the buffer hash lists.
40 */
af50abe6
KM
41#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash)
42#define bremhash(bp) LIST_REMOVE(bp, b_hash)
37392cf8
KM
43
44/*
45 * Definitions for the buffer free lists.
46 */
47#define BQUEUES 4 /* number of free buffer queues */
48
49#define BQ_LOCKED 0 /* super-blocks &c */
50#define BQ_LRU 1 /* lru, useful buffers */
51#define BQ_AGE 2 /* rubbish */
52#define BQ_EMPTY 3 /* buffer headers with no memory */
53
af50abe6 54TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
37392cf8
KM
55int needbuffer;
56
57/*
58 * Insq/Remq for the buffer free lists.
59 */
af50abe6
KM
60#define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist)
61#define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist)
e3249ec0 62
37392cf8
KM
63void
64bremfree(bp)
65 struct buf *bp;
66{
af50abe6 67 struct bqueues *dp = NULL;
37392cf8 68
e3249ec0
KM
69 /*
70 * We only calculate the head of the freelist when removing
71 * the last element of the list as that is the only time that
72 * it is needed (e.g. to reset the tail pointer).
af50abe6
KM
73 *
74 * NB: This makes an assumption about how tailq's are implemented.
e3249ec0 75 */
af50abe6 76 if (bp->b_freelist.tqe_next == NULL) {
37392cf8 77 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
af50abe6 78 if (dp->tqh_last == &bp->b_freelist.tqe_next)
37392cf8
KM
79 break;
80 if (dp == &bufqueues[BQUEUES])
81 panic("bremfree: lost tail");
37392cf8 82 }
af50abe6 83 TAILQ_REMOVE(dp, bp, b_freelist);
37392cf8 84}
663dbc72 85
e7db227e
MK
86/*
87 * Initialize buffers and hash links for buffers.
88 */
251f56ba 89void
e7db227e
MK
90bufinit()
91{
37392cf8 92 register struct buf *bp;
af50abe6 93 struct bqueues *dp;
e7db227e 94 register int i;
e7db227e
MK
95 int base, residual;
96
37392cf8 97 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
af50abe6
KM
98 TAILQ_INIT(dp);
99 bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash);
e7db227e
MK
100 base = bufpages / nbuf;
101 residual = bufpages % nbuf;
102 for (i = 0; i < nbuf; i++) {
103 bp = &buf[i];
37392cf8 104 bzero((char *)bp, sizeof *bp);
e7db227e 105 bp->b_dev = NODEV;
e7db227e
MK
106 bp->b_rcred = NOCRED;
107 bp->b_wcred = NOCRED;
34f384c1 108 bp->b_vnbufs.le_next = NOLIST;
cb84e0ab 109 bp->b_data = buffers + i * MAXBSIZE;
e7db227e
MK
110 if (i < residual)
111 bp->b_bufsize = (base + 1) * CLBYTES;
112 else
113 bp->b_bufsize = base * CLBYTES;
31222d0d 114 bp->b_flags = B_INVAL;
37392cf8 115 dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
31222d0d 116 binsheadfree(bp, dp);
37392cf8 117 binshash(bp, &invalhash);
e7db227e
MK
118 }
119}
120
663dbc72 121/*
d42a4811
KM
122 * Find the block in the buffer pool.
123 * If the buffer is not present, allocate a new buffer and load
124 * its contents according to the filesystem fill routine.
663dbc72 125 */
a937f856 126bread(vp, blkno, size, cred, bpp)
7188ac27 127 struct vnode *vp;
ad30fb67
KM
128 daddr_t blkno;
129 int size;
a937f856 130 struct ucred *cred;
7188ac27 131 struct buf **bpp;
ec67a3ce
MK
132#ifdef SECSIZE
133 long secsize;
134#endif SECSIZE
663dbc72 135{
3789a403 136 struct proc *p = curproc; /* XXX */
663dbc72
BJ
137 register struct buf *bp;
138
4f083fd7
SL
139 if (size == 0)
140 panic("bread: size 0");
ec67a3ce
MK
141#ifdef SECSIZE
142 bp = getblk(dev, blkno, size, secsize);
143#else SECSIZE
e140149a 144 *bpp = bp = getblk(vp, blkno, size, 0, 0);
ec67a3ce 145#endif SECSIZE
d42a4811 146 if (bp->b_flags & (B_DONE | B_DELWRI)) {
c5a600cf 147 trace(TR_BREADHIT, pack(vp, size), blkno);
7188ac27 148 return (0);
663dbc72
BJ
149 }
150 bp->b_flags |= B_READ;
4f083fd7
SL
151 if (bp->b_bcount > bp->b_bufsize)
152 panic("bread");
a937f856
KM
153 if (bp->b_rcred == NOCRED && cred != NOCRED) {
154 crhold(cred);
155 bp->b_rcred = cred;
156 }
7188ac27 157 VOP_STRATEGY(bp);
c5a600cf 158 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 159 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7188ac27 160 return (biowait(bp));
663dbc72
BJ
161}
162
163/*
bb1626f7
KM
164 * Operates like bread, but also starts I/O on the N specified
165 * read-ahead blocks.
663dbc72 166 */
bb1626f7 167breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp)
7188ac27 168 struct vnode *vp;
84baaab3 169 daddr_t blkno; int size;
ec67a3ce
MK
170#ifdef SECSIZE
171 long secsize;
172#endif SECSIZE
bb1626f7
KM
173 daddr_t rablkno[]; int rabsize[];
174 int num;
a937f856 175 struct ucred *cred;
7188ac27 176 struct buf **bpp;
663dbc72 177{
3789a403 178 struct proc *p = curproc; /* XXX */
663dbc72 179 register struct buf *bp, *rabp;
bb1626f7 180 register int i;
663dbc72
BJ
181
182 bp = NULL;
3efdd860 183 /*
d42a4811
KM
184 * If the block is not memory resident,
185 * allocate a buffer and start I/O.
3efdd860 186 */
7188ac27 187 if (!incore(vp, blkno)) {
e140149a 188 *bpp = bp = getblk(vp, blkno, size, 0, 0);
ec67a3ce 189#endif SECSIZE
d42a4811 190 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
663dbc72 191 bp->b_flags |= B_READ;
4f083fd7 192 if (bp->b_bcount > bp->b_bufsize)
bb1626f7 193 panic("breadn");
a937f856
KM
194 if (bp->b_rcred == NOCRED && cred != NOCRED) {
195 crhold(cred);
196 bp->b_rcred = cred;
197 }
7188ac27 198 VOP_STRATEGY(bp);
c5a600cf 199 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 200 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7d1e9cf4 201 } else {
c5a600cf 202 trace(TR_BREADHIT, pack(vp, size), blkno);
7d1e9cf4 203 }
663dbc72 204 }
3efdd860
KM
205
206 /*
bb1626f7
KM
207 * If there's read-ahead block(s), start I/O
208 * on them also (as above).
3efdd860 209 */
bb1626f7
KM
210 for (i = 0; i < num; i++) {
211 if (incore(vp, rablkno[i]))
212 continue;
e140149a 213 rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0);
ec67a3ce 214#endif SECSIZE
d42a4811 215 if (rabp->b_flags & (B_DONE | B_DELWRI)) {
663dbc72 216 brelse(rabp);
bb1626f7 217 trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]);
973ecc4f 218 } else {
d42a4811 219 rabp->b_flags |= B_ASYNC | B_READ;
4f083fd7
SL
220 if (rabp->b_bcount > rabp->b_bufsize)
221 panic("breadrabp");
5062ac4a 222 if (rabp->b_rcred == NOCRED && cred != NOCRED) {
a937f856 223 crhold(cred);
5062ac4a 224 rabp->b_rcred = cred;
a937f856 225 }
7188ac27 226 VOP_STRATEGY(rabp);
bb1626f7 227 trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]);
3789a403 228 p->p_stats->p_ru.ru_inblock++; /* pay in advance */
663dbc72
BJ
229 }
230 }
3efdd860
KM
231
232 /*
d42a4811
KM
233 * If block was memory resident, let bread get it.
234 * If block was not memory resident, the read was
235 * started above, so just wait for the read to complete.
3efdd860 236 */
84baaab3 237 if (bp == NULL)
ec67a3ce
MK
238#ifdef SECSIZE
239 return (bread(dev, blkno, size, secsize));
240#else SECSIZE
a937f856 241 return (bread(vp, blkno, size, cred, bpp));
7188ac27 242 return (biowait(bp));
663dbc72
BJ
243}
244
245/*
d42a4811
KM
246 * Synchronous write.
247 * Release buffer on completion.
663dbc72
BJ
248 */
249bwrite(bp)
3efdd860 250 register struct buf *bp;
663dbc72 251{
3789a403 252 struct proc *p = curproc; /* XXX */
7188ac27 253 register int flag;
31222d0d 254 int s, error = 0;
663dbc72 255
fe668e35 256 if ((bp->b_flags & B_ASYNC) == 0 &&
188f11ee
MH
257 bp->b_vp && bp->b_vp->v_mount &&
258 (bp->b_vp->v_mount->mnt_flag & MNT_ASYNC)) {
fe668e35
KM
259 bdwrite(bp);
260 return (0);
261 }
663dbc72 262 flag = bp->b_flags;
f844ee62 263 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
77dc8a8c
KM
264 if (flag & B_ASYNC) {
265 if ((flag & B_DELWRI) == 0)
266 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
267 else
268 reassignbuf(bp, bp->b_vp);
269 }
c5a600cf 270 trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
4f083fd7
SL
271 if (bp->b_bcount > bp->b_bufsize)
272 panic("bwrite");
86e7dd3b 273 s = splbio();
c669f646 274 bp->b_vp->v_numoutput++;
e140149a 275 bp->b_flags |= B_WRITEINPROG;
86e7dd3b 276 splx(s);
7188ac27 277 VOP_STRATEGY(bp);
3efdd860
KM
278
279 /*
d42a4811 280 * If the write was synchronous, then await I/O completion.
3efdd860 281 * If the write was "delayed", then we put the buffer on
d42a4811 282 * the queue of blocks awaiting I/O completion status.
3efdd860 283 */
d42a4811 284 if ((flag & B_ASYNC) == 0) {
7188ac27 285 error = biowait(bp);
77dc8a8c
KM
286 if ((flag&B_DELWRI) == 0)
287 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
288 else
289 reassignbuf(bp, bp->b_vp);
e140149a
KM
290 if (bp->b_flags & B_EINTR) {
291 bp->b_flags &= ~B_EINTR;
292 error = EINTR;
293 }
663dbc72 294 brelse(bp);
7188ac27 295 } else if (flag & B_DELWRI) {
31222d0d 296 s = splbio();
663dbc72 297 bp->b_flags |= B_AGE;
31222d0d 298 splx(s);
7188ac27
KM
299 }
300 return (error);
663dbc72
BJ
301}
302
80746147
JH
303int
304vn_bwrite(ap)
305 struct vop_bwrite_args *ap;
306{
ac85da8a 307
37392cf8 308 return (bwrite(ap->a_bp));
80746147
JH
309}
310
311
663dbc72 312/*
d42a4811
KM
313 * Delayed write.
314 *
315 * The buffer is marked dirty, but is not queued for I/O.
316 * This routine should be used when the buffer is expected
317 * to be modified again soon, typically a small write that
318 * partially fills a buffer.
319 *
320 * NB: magnetic tapes cannot be delayed; they must be
321 * written in the order that the writes are requested.
663dbc72
BJ
322 */
323bdwrite(bp)
3efdd860 324 register struct buf *bp;
663dbc72 325{
3789a403 326 struct proc *p = curproc; /* XXX */
663dbc72 327
c669f646
KM
328 if ((bp->b_flags & B_DELWRI) == 0) {
329 bp->b_flags |= B_DELWRI;
330 reassignbuf(bp, bp->b_vp);
3789a403 331 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
c669f646 332 }
7188ac27 333 /*
edadbc2c 334 * If this is a tape drive, the write must be initiated.
7188ac27 335 */
ec67a3ce 336 if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
663dbc72 337 bawrite(bp);
edadbc2c 338 } else {
d42a4811 339 bp->b_flags |= (B_DONE | B_DELWRI);
663dbc72
BJ
340 brelse(bp);
341 }
342}
343
344/*
d42a4811
KM
345 * Asynchronous write.
346 * Start I/O on a buffer, but do not wait for it to complete.
347 * The buffer is released when the I/O completes.
663dbc72
BJ
348 */
349bawrite(bp)
3efdd860 350 register struct buf *bp;
663dbc72
BJ
351{
352
d42a4811
KM
353 /*
354 * Setting the ASYNC flag causes bwrite to return
355 * after starting the I/O.
356 */
663dbc72 357 bp->b_flags |= B_ASYNC;
e140149a 358 (void) VOP_BWRITE(bp);
663dbc72
BJ
359}
360
361/*
d42a4811
KM
362 * Release a buffer.
363 * Even if the buffer is dirty, no I/O is started.
663dbc72
BJ
364 */
365brelse(bp)
3efdd860 366 register struct buf *bp;
663dbc72 367{
af50abe6 368 register struct bqueues *flist;
d42a4811 369 int s;
663dbc72 370
c5a600cf 371 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
3efdd860 372 /*
edadbc2c
KM
373 * If a process is waiting for the buffer, or
374 * is waiting for a free buffer, awaken it.
3efdd860 375 */
d42a4811 376 if (bp->b_flags & B_WANTED)
663dbc72 377 wakeup((caddr_t)bp);
37392cf8
KM
378 if (needbuffer) {
379 needbuffer = 0;
380 wakeup((caddr_t)&needbuffer);
663dbc72 381 }
edadbc2c
KM
382 /*
383 * Retry I/O for locked buffers rather than invalidating them.
384 */
31222d0d 385 s = splbio();
edadbc2c
KM
386 if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
387 bp->b_flags &= ~B_ERROR;
edadbc2c
KM
388 /*
389 * Disassociate buffers that are no longer valid.
390 */
d42a4811 391 if (bp->b_flags & (B_NOCACHE | B_ERROR))
7188ac27 392 bp->b_flags |= B_INVAL;
d42a4811 393 if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
edadbc2c
KM
394 if (bp->b_vp)
395 brelvp(bp);
396 bp->b_flags &= ~B_DELWRI;
7188ac27 397 }
3efdd860
KM
398 /*
399 * Stick the buffer back on a free list.
400 */
4f083fd7
SL
401 if (bp->b_bufsize <= 0) {
402 /* block has no buffer ... put at front of unused buffer list */
37392cf8 403 flist = &bufqueues[BQ_EMPTY];
4f083fd7 404 binsheadfree(bp, flist);
d42a4811 405 } else if (bp->b_flags & (B_ERROR | B_INVAL)) {
46387ee3 406 /* block has no info ... put at front of most free list */
37392cf8 407 flist = &bufqueues[BQ_AGE];
3efdd860 408 binsheadfree(bp, flist);
663dbc72 409 } else {
46387ee3 410 if (bp->b_flags & B_LOCKED)
37392cf8 411 flist = &bufqueues[BQ_LOCKED];
46387ee3 412 else if (bp->b_flags & B_AGE)
37392cf8 413 flist = &bufqueues[BQ_AGE];
46387ee3 414 else
37392cf8 415 flist = &bufqueues[BQ_LRU];
3efdd860 416 binstailfree(bp, flist);
663dbc72 417 }
d42a4811 418 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
663dbc72
BJ
419 splx(s);
420}
421
422/*
d42a4811 423 * Check to see if a block is currently memory resident.
663dbc72 424 */
e140149a 425struct buf *
7188ac27
KM
426incore(vp, blkno)
427 struct vnode *vp;
3efdd860 428 daddr_t blkno;
663dbc72
BJ
429{
430 register struct buf *bp;
663dbc72 431
af50abe6 432 for (bp = BUFHASH(vp, blkno)->lh_first; bp; bp = bp->b_hash.le_next)
edadbc2c 433 if (bp->b_lblkno == blkno && bp->b_vp == vp &&
3efdd860 434 (bp->b_flags & B_INVAL) == 0)
e140149a
KM
435 return (bp);
436 return (NULL);
663dbc72
BJ
437}
438
edadbc2c 439/*
d42a4811
KM
440 * Check to see if a block is currently memory resident.
441 * If it is resident, return it. If it is not resident,
442 * allocate a new buffer and assign it to the block.
663dbc72
BJ
443 */
444struct buf *
ec67a3ce
MK
445#ifdef SECSIZE
446getblk(dev, blkno, size, secsize)
447#else SECSIZE
e140149a 448getblk(vp, blkno, size, slpflag, slptimeo)
7188ac27 449 register struct vnode *vp;
ad30fb67 450 daddr_t blkno;
e140149a 451 int size, slpflag, slptimeo;
ec67a3ce
MK
452#ifdef SECSIZE
453 long secsize;
454#endif SECSIZE
663dbc72 455{
e3249ec0 456 register struct buf *bp;
af50abe6 457 struct bufhashhdr *dp;
e140149a 458 int s, error;
663dbc72 459
00a6a148
KM
460 if (size > MAXBSIZE)
461 panic("getblk: size too big");
3efdd860 462 /*
d42a4811
KM
463 * Search the cache for the block. If the buffer is found,
464 * but it is currently locked, the we must wait for it to
465 * become available.
3efdd860 466 */
7188ac27 467 dp = BUFHASH(vp, blkno);
3efdd860 468loop:
af50abe6 469 for (bp = dp->lh_first; bp; bp = bp->b_hash.le_next) {
e140149a 470 if (bp->b_lblkno != blkno || bp->b_vp != vp)
663dbc72 471 continue;
a5e62f37 472 s = splbio();
d42a4811 473 if (bp->b_flags & B_BUSY) {
663dbc72 474 bp->b_flags |= B_WANTED;
e140149a
KM
475 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
476 "getblk", slptimeo);
23900030 477 splx(s);
e140149a
KM
478 if (error)
479 return (NULL);
663dbc72
BJ
480 goto loop;
481 }
e140149a
KM
482 /*
483 * The test for B_INVAL is moved down here, since there
484 * are cases where B_INVAL is set before VOP_BWRITE() is
485 * called and for NFS, the process cannot be allowed to
486 * allocate a new buffer for the same block until the write
487 * back to the server has been completed. (ie. B_BUSY clears)
488 */
489 if (bp->b_flags & B_INVAL) {
490 splx(s);
491 continue;
492 }
c669f646
KM
493 bremfree(bp);
494 bp->b_flags |= B_BUSY;
23900030 495 splx(s);
32a56bda 496 if (bp->b_bcount != size) {
24096dc3 497 printf("getblk: stray size\n");
edadbc2c 498 bp->b_flags |= B_INVAL;
e140149a 499 VOP_BWRITE(bp);
9d6d37ce 500 goto loop;
edadbc2c 501 }
663dbc72 502 bp->b_flags |= B_CACHE;
a5e62f37 503 return (bp);
663dbc72 504 }
e140149a
KM
505 /*
506 * The loop back to the top when getnewbuf() fails is because
507 * stateless filesystems like NFS have no node locks. Thus,
508 * there is a slight chance that more than one process will
509 * try and getnewbuf() for the same block concurrently when
510 * the first sleeps in getnewbuf(). So after a sleep, go back
511 * up to the top to check the hash lists again.
512 */
513 if ((bp = getnewbuf(slpflag, slptimeo)) == 0)
514 goto loop;
3efdd860 515 bremhash(bp);
edadbc2c 516 bgetvp(vp, bp);
521a4688 517 bp->b_bcount = 0;
edadbc2c 518 bp->b_lblkno = blkno;
ec67a3ce
MK
519#ifdef SECSIZE
520 bp->b_blksize = secsize;
521#endif SECSIZE
ad30fb67 522 bp->b_blkno = blkno;
4f083fd7 523 bp->b_error = 0;
7188ac27
KM
524 bp->b_resid = 0;
525 binshash(bp, dp);
521a4688 526 allocbuf(bp, size);
a5e62f37 527 return (bp);
663dbc72
BJ
528}
529
530/*
d42a4811
KM
531 * Allocate a buffer.
532 * The caller will assign it to a block.
663dbc72
BJ
533 */
534struct buf *
ad30fb67
KM
535geteblk(size)
536 int size;
663dbc72 537{
37392cf8 538 register struct buf *bp;
663dbc72 539
00a6a148
KM
540 if (size > MAXBSIZE)
541 panic("geteblk: size too big");
e140149a
KM
542 while ((bp = getnewbuf(0, 0)) == NULL)
543 /* void */;
4f083fd7 544 bp->b_flags |= B_INVAL;
3efdd860 545 bremhash(bp);
37392cf8 546 binshash(bp, &invalhash);
521a4688 547 bp->b_bcount = 0;
ec67a3ce
MK
548#ifdef SECSIZE
549 bp->b_blksize = DEV_BSIZE;
550#endif SECSIZE
4f083fd7 551 bp->b_error = 0;
7188ac27 552 bp->b_resid = 0;
521a4688 553 allocbuf(bp, size);
a5e62f37 554 return (bp);
663dbc72
BJ
555}
556
ad30fb67 557/*
521a4688 558 * Expand or contract the actual memory allocated to a buffer.
d42a4811 559 * If no memory is available, release buffer and take error exit.
ad30fb67 560 */
521a4688
KM
561allocbuf(tp, size)
562 register struct buf *tp;
ad30fb67
KM
563 int size;
564{
521a4688
KM
565 register struct buf *bp, *ep;
566 int sizealloc, take, s;
ad30fb67 567
521a4688
KM
568 sizealloc = roundup(size, CLBYTES);
569 /*
570 * Buffer size does not change
571 */
572 if (sizealloc == tp->b_bufsize)
573 goto out;
574 /*
575 * Buffer size is shrinking.
576 * Place excess space in a buffer header taken from the
577 * BQ_EMPTY buffer list and placed on the "most free" list.
578 * If no extra buffer headers are available, leave the
579 * extra space in the present buffer.
580 */
581 if (sizealloc < tp->b_bufsize) {
af50abe6 582 if ((ep = bufqueues[BQ_EMPTY].tqh_first) == NULL)
521a4688
KM
583 goto out;
584 s = splbio();
585 bremfree(ep);
586 ep->b_flags |= B_BUSY;
587 splx(s);
cb84e0ab 588 pagemove((char *)tp->b_data + sizealloc, ep->b_data,
521a4688
KM
589 (int)tp->b_bufsize - sizealloc);
590 ep->b_bufsize = tp->b_bufsize - sizealloc;
591 tp->b_bufsize = sizealloc;
592 ep->b_flags |= B_INVAL;
593 ep->b_bcount = 0;
594 brelse(ep);
595 goto out;
596 }
597 /*
598 * More buffer space is needed. Get it out of buffers on
599 * the "most free" list, placing the empty headers on the
600 * BQ_EMPTY buffer header list.
601 */
602 while (tp->b_bufsize < sizealloc) {
603 take = sizealloc - tp->b_bufsize;
e140149a
KM
604 while ((bp = getnewbuf(0, 0)) == NULL)
605 /* void */;
521a4688
KM
606 if (take >= bp->b_bufsize)
607 take = bp->b_bufsize;
cb84e0ab
KB
608 pagemove(&((char *)bp->b_data)[bp->b_bufsize - take],
609 &((char *)tp->b_data)[tp->b_bufsize], take);
521a4688
KM
610 tp->b_bufsize += take;
611 bp->b_bufsize = bp->b_bufsize - take;
612 if (bp->b_bcount > bp->b_bufsize)
613 bp->b_bcount = bp->b_bufsize;
614 if (bp->b_bufsize <= 0) {
615 bremhash(bp);
37392cf8 616 binshash(bp, &invalhash);
d42a4811 617 bp->b_dev = NODEV;
521a4688
KM
618 bp->b_error = 0;
619 bp->b_flags |= B_INVAL;
620 }
621 brelse(bp);
622 }
623out:
624 tp->b_bcount = size;
625 return (1);
4f083fd7
SL
626}
627
4f083fd7
SL
628/*
629 * Find a buffer which is available for use.
630 * Select something from a free list.
631 * Preference is to AGE list, then LRU list.
632 */
633struct buf *
e140149a
KM
634getnewbuf(slpflag, slptimeo)
635 int slpflag, slptimeo;
4f083fd7 636{
37392cf8 637 register struct buf *bp;
af50abe6 638 register struct bqueues *dp;
a937f856 639 register struct ucred *cred;
4f083fd7 640 int s;
b88d365e
KM
641 struct buf *abp;
642 static int losecnt = 0;
4f083fd7
SL
643
644loop:
a5e62f37 645 s = splbio();
b88d365e
KM
646 abp = NULL;
647 for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) {
648 for (bp = dp->qe_next; bp; bp = bp->b_freelist.qe_next) {
649 if (abp == NULL)
650 abp = bp;
651 if ((bp->b_flags & B_DELWRI) &&
652 bp->b_vp && VOP_ISLOCKED(bp->b_vp))
653 continue;
654 goto found;
655 }
656 }
37392cf8 657 if (dp == bufqueues) { /* no free blocks */
b88d365e
KM
658 if (abp) {
659 bp = abp;
660 bp->b_flags |= B_XXX;
661 if (losecnt++ < 20) {
662 vprint("skipping blkno check", bp->b_vp);
663 printf("\tlblkno %d, blkno %d\n",
664 bp->b_lblkno, bp->b_blkno);
665 }
666 goto found;
667 }
37392cf8 668 needbuffer = 1;
e140149a
KM
669 (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1),
670 "getnewbuf", slptimeo);
4b7d506c 671 splx(s);
e140149a 672 return (NULL);
4f083fd7 673 }
b88d365e 674found:
c669f646
KM
675 bremfree(bp);
676 bp->b_flags |= B_BUSY;
677 splx(s);
4f083fd7 678 if (bp->b_flags & B_DELWRI) {
033a786e 679 (void) bawrite(bp);
4f083fd7
SL
680 goto loop;
681 }
c5a600cf 682 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
edadbc2c
KM
683 if (bp->b_vp)
684 brelvp(bp);
a937f856
KM
685 if (bp->b_rcred != NOCRED) {
686 cred = bp->b_rcred;
687 bp->b_rcred = NOCRED;
688 crfree(cred);
689 }
690 if (bp->b_wcred != NOCRED) {
691 cred = bp->b_wcred;
692 bp->b_wcred = NOCRED;
693 crfree(cred);
694 }
4f083fd7 695 bp->b_flags = B_BUSY;
1c89915d 696 bp->b_dirtyoff = bp->b_dirtyend = 0;
bb1626f7 697 bp->b_validoff = bp->b_validend = 0;
4f083fd7
SL
698 return (bp);
699}
700
663dbc72 701/*
d42a4811
KM
702 * Wait for I/O to complete.
703 *
704 * Extract and return any errors associated with the I/O.
705 * If the error flag is set, but no specific error is
706 * given, return EIO.
663dbc72 707 */
3efdd860 708biowait(bp)
ad30fb67 709 register struct buf *bp;
663dbc72 710{
530d0032 711 int s;
663dbc72 712
a5e62f37 713 s = splbio();
a937f856 714 while ((bp->b_flags & B_DONE) == 0)
663dbc72 715 sleep((caddr_t)bp, PRIBIO);
530d0032 716 splx(s);
7188ac27
KM
717 if ((bp->b_flags & B_ERROR) == 0)
718 return (0);
719 if (bp->b_error)
720 return (bp->b_error);
721 return (EIO);
663dbc72
BJ
722}
723
663dbc72 724/*
af04ce66 725 * Mark I/O complete on a buffer.
d42a4811
KM
726 *
727 * If a callback has been requested, e.g. the pageout
728 * daemon, do so. Otherwise, awaken waiting processes.
663dbc72 729 */
251f56ba 730void
3efdd860
KM
731biodone(bp)
732 register struct buf *bp;
663dbc72 733{
663dbc72 734
80e7c811 735 if (bp->b_flags & B_DONE)
3efdd860 736 panic("dup biodone");
663dbc72 737 bp->b_flags |= B_DONE;
76429560
KM
738 if ((bp->b_flags & B_READ) == 0)
739 vwakeup(bp);
961945a8
SL
740 if (bp->b_flags & B_CALL) {
741 bp->b_flags &= ~B_CALL;
742 (*bp->b_iodone)(bp);
743 return;
744 }
d42a4811 745 if (bp->b_flags & B_ASYNC)
663dbc72
BJ
746 brelse(bp);
747 else {
748 bp->b_flags &= ~B_WANTED;
749 wakeup((caddr_t)bp);
750 }
751}
aa95c6fc 752
b5d79df9
MS
753int
754count_lock_queue()
755{
756 register struct buf *bp;
757 register int ret;
758
af50abe6
KM
759 for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].tqh_first;
760 bp; bp = (struct buf *)bp->b_freelist.tqe_next)
b5d79df9
MS
761 ++ret;
762 return(ret);
763}
764
aa95c6fc
KM
765#ifdef DIAGNOSTIC
766/*
767 * Print out statistics on the current allocation of the buffer pool.
768 * Can be enabled to print out on every ``sync'' by setting "syncprt"
5c8652bb 769 * in vfs_syscalls.c using sysctl.
aa95c6fc
KM
770 */
771void
772vfs_bufstats()
773{
774 int s, i, j, count;
37392cf8 775 register struct buf *bp;
af50abe6 776 register struct bqueues *dp;
aa95c6fc
KM
777 int counts[MAXBSIZE/CLBYTES+1];
778 static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
779
37392cf8 780 for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
aa95c6fc
KM
781 count = 0;
782 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
783 counts[j] = 0;
784 s = splbio();
af50abe6 785 for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) {
aa95c6fc
KM
786 counts[bp->b_bufsize/CLBYTES]++;
787 count++;
788 }
789 splx(s);
790 printf("%s: total-%d", bname[i], count);
791 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
792 if (counts[j] != 0)
793 printf(", %d-%d", j * CLBYTES, counts[j]);
794 printf("\n");
795 }
796}
797#endif /* DIAGNOSTIC */