don't allow process to attach to itself
[unix-history] / usr / src / sys / kern / vfs_cluster.c
CommitLineData
5dc2581e
KB
1/*-
2 * Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
7188ac27 3 * All rights reserved.
da7c5cc6 4 *
217c3be4
KM
5 * This module is believed to contain source code proprietary to AT&T.
6 * Use and redistribution is subject to the Berkeley Software License
7 * Agreement and your Software Agreement with AT&T (Western Electric).
7188ac27 8 *
e3249ec0 9 * @(#)vfs_cluster.c 7.55 (Berkeley) %G%
da7c5cc6 10 */
961945a8 11
251f56ba
KB
12#include <sys/param.h>
13#include <sys/proc.h>
14#include <sys/buf.h>
15#include <sys/vnode.h>
251f56ba
KB
16#include <sys/mount.h>
17#include <sys/trace.h>
18#include <sys/resourcevar.h>
37392cf8
KM
19#include <sys/malloc.h>
20#include <libkern/libkern.h>
21
22/*
23 * Definitions for the buffer hash lists.
24 */
25#define BUFHASH(dvp, lbn) \
26 (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
e3249ec0 27struct list_entry *bufhashtbl, invalhash;
37392cf8
KM
28u_long bufhash;
29
30/*
31 * Insq/Remq for the buffer hash lists.
32 */
e3249ec0
KM
33#define binshash(bp, dp) list_enter_head(dp, bp, struct buf *, b_hash)
34#define bremhash(bp) list_remove(bp, struct buf *, b_hash)
37392cf8
KM
35
36/*
37 * Definitions for the buffer free lists.
38 */
39#define BQUEUES 4 /* number of free buffer queues */
40
41#define BQ_LOCKED 0 /* super-blocks &c */
42#define BQ_LRU 1 /* lru, useful buffers */
43#define BQ_AGE 2 /* rubbish */
44#define BQ_EMPTY 3 /* buffer headers with no memory */
45
e3249ec0 46struct queue_entry bufqueues[BQUEUES];
37392cf8
KM
47int needbuffer;
48
49/*
50 * Insq/Remq for the buffer free lists.
51 */
e3249ec0
KM
52#define binsheadfree(bp, dp) \
53 queue_enter_head(dp, bp, struct buf *, b_freelist)
54#define binstailfree(bp, dp) \
55 queue_enter_tail(dp, bp, struct buf *, b_freelist)
56
37392cf8
KM
57void
58bremfree(bp)
59 struct buf *bp;
60{
e3249ec0 61 struct queue_entry *dp;
37392cf8 62
e3249ec0
KM
63 /*
64 * We only calculate the head of the freelist when removing
65 * the last element of the list as that is the only time that
66 * it is needed (e.g. to reset the tail pointer).
67 */
68 if (bp->b_freelist.qe_next == NULL) {
37392cf8 69 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
e3249ec0 70 if (dp->qe_prev == &bp->b_freelist.qe_next)
37392cf8
KM
71 break;
72 if (dp == &bufqueues[BQUEUES])
73 panic("bremfree: lost tail");
37392cf8 74 }
e3249ec0 75 queue_remove(dp, bp, struct buf *, b_freelist);
37392cf8 76}
663dbc72 77
e7db227e
MK
78/*
79 * Initialize buffers and hash links for buffers.
80 */
251f56ba 81void
e7db227e
MK
82bufinit()
83{
37392cf8 84 register struct buf *bp;
e3249ec0 85 struct queue_entry *dp;
e7db227e 86 register int i;
e7db227e
MK
87 int base, residual;
88
37392cf8 89 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
e3249ec0
KM
90 queue_init(dp);
91 bufhashtbl = (struct list_entry *)hashinit(nbuf, M_CACHE, &bufhash);
e7db227e
MK
92 base = bufpages / nbuf;
93 residual = bufpages % nbuf;
94 for (i = 0; i < nbuf; i++) {
95 bp = &buf[i];
37392cf8 96 bzero((char *)bp, sizeof *bp);
e7db227e 97 bp->b_dev = NODEV;
e7db227e
MK
98 bp->b_rcred = NOCRED;
99 bp->b_wcred = NOCRED;
e7db227e
MK
100 bp->b_un.b_addr = buffers + i * MAXBSIZE;
101 if (i < residual)
102 bp->b_bufsize = (base + 1) * CLBYTES;
103 else
104 bp->b_bufsize = base * CLBYTES;
31222d0d 105 bp->b_flags = B_INVAL;
37392cf8 106 dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
31222d0d 107 binsheadfree(bp, dp);
37392cf8 108 binshash(bp, &invalhash);
e7db227e
MK
109 }
110}
111
663dbc72 112/*
d42a4811
KM
113 * Find the block in the buffer pool.
114 * If the buffer is not present, allocate a new buffer and load
115 * its contents according to the filesystem fill routine.
663dbc72 116 */
a937f856 117bread(vp, blkno, size, cred, bpp)
7188ac27 118 struct vnode *vp;
ad30fb67
KM
119 daddr_t blkno;
120 int size;
a937f856 121 struct ucred *cred;
7188ac27 122 struct buf **bpp;
ec67a3ce
MK
123#ifdef SECSIZE
124 long secsize;
125#endif SECSIZE
663dbc72 126{
3789a403 127 struct proc *p = curproc; /* XXX */
663dbc72
BJ
128 register struct buf *bp;
129
4f083fd7
SL
130 if (size == 0)
131 panic("bread: size 0");
ec67a3ce
MK
132#ifdef SECSIZE
133 bp = getblk(dev, blkno, size, secsize);
134#else SECSIZE
7188ac27 135 *bpp = bp = getblk(vp, blkno, size);
ec67a3ce 136#endif SECSIZE
d42a4811 137 if (bp->b_flags & (B_DONE | B_DELWRI)) {
c5a600cf 138 trace(TR_BREADHIT, pack(vp, size), blkno);
7188ac27 139 return (0);
663dbc72
BJ
140 }
141 bp->b_flags |= B_READ;
4f083fd7
SL
142 if (bp->b_bcount > bp->b_bufsize)
143 panic("bread");
a937f856
KM
144 if (bp->b_rcred == NOCRED && cred != NOCRED) {
145 crhold(cred);
146 bp->b_rcred = cred;
147 }
7188ac27 148 VOP_STRATEGY(bp);
c5a600cf 149 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 150 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7188ac27 151 return (biowait(bp));
663dbc72
BJ
152}
153
154/*
bb1626f7
KM
155 * Operates like bread, but also starts I/O on the N specified
156 * read-ahead blocks.
663dbc72 157 */
bb1626f7 158breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp)
7188ac27 159 struct vnode *vp;
84baaab3 160 daddr_t blkno; int size;
ec67a3ce
MK
161#ifdef SECSIZE
162 long secsize;
163#endif SECSIZE
bb1626f7
KM
164 daddr_t rablkno[]; int rabsize[];
165 int num;
a937f856 166 struct ucred *cred;
7188ac27 167 struct buf **bpp;
663dbc72 168{
3789a403 169 struct proc *p = curproc; /* XXX */
663dbc72 170 register struct buf *bp, *rabp;
bb1626f7 171 register int i;
663dbc72
BJ
172
173 bp = NULL;
3efdd860 174 /*
d42a4811
KM
175 * If the block is not memory resident,
176 * allocate a buffer and start I/O.
3efdd860 177 */
7188ac27
KM
178 if (!incore(vp, blkno)) {
179 *bpp = bp = getblk(vp, blkno, size);
ec67a3ce 180#endif SECSIZE
d42a4811 181 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
663dbc72 182 bp->b_flags |= B_READ;
4f083fd7 183 if (bp->b_bcount > bp->b_bufsize)
bb1626f7 184 panic("breadn");
a937f856
KM
185 if (bp->b_rcred == NOCRED && cred != NOCRED) {
186 crhold(cred);
187 bp->b_rcred = cred;
188 }
7188ac27 189 VOP_STRATEGY(bp);
c5a600cf 190 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 191 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7d1e9cf4 192 } else {
c5a600cf 193 trace(TR_BREADHIT, pack(vp, size), blkno);
7d1e9cf4 194 }
663dbc72 195 }
3efdd860
KM
196
197 /*
bb1626f7
KM
198 * If there's read-ahead block(s), start I/O
199 * on them also (as above).
3efdd860 200 */
bb1626f7
KM
201 for (i = 0; i < num; i++) {
202 if (incore(vp, rablkno[i]))
203 continue;
204 rabp = getblk(vp, rablkno[i], rabsize[i]);
ec67a3ce 205#endif SECSIZE
d42a4811 206 if (rabp->b_flags & (B_DONE | B_DELWRI)) {
663dbc72 207 brelse(rabp);
bb1626f7 208 trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]);
973ecc4f 209 } else {
d42a4811 210 rabp->b_flags |= B_ASYNC | B_READ;
4f083fd7
SL
211 if (rabp->b_bcount > rabp->b_bufsize)
212 panic("breadrabp");
5062ac4a 213 if (rabp->b_rcred == NOCRED && cred != NOCRED) {
a937f856 214 crhold(cred);
5062ac4a 215 rabp->b_rcred = cred;
a937f856 216 }
7188ac27 217 VOP_STRATEGY(rabp);
bb1626f7 218 trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]);
3789a403 219 p->p_stats->p_ru.ru_inblock++; /* pay in advance */
663dbc72
BJ
220 }
221 }
3efdd860
KM
222
223 /*
d42a4811
KM
224 * If block was memory resident, let bread get it.
225 * If block was not memory resident, the read was
226 * started above, so just wait for the read to complete.
3efdd860 227 */
84baaab3 228 if (bp == NULL)
ec67a3ce
MK
229#ifdef SECSIZE
230 return (bread(dev, blkno, size, secsize));
231#else SECSIZE
a937f856 232 return (bread(vp, blkno, size, cred, bpp));
7188ac27 233 return (biowait(bp));
663dbc72
BJ
234}
235
236/*
d42a4811
KM
237 * Synchronous write.
238 * Release buffer on completion.
663dbc72
BJ
239 */
240bwrite(bp)
3efdd860 241 register struct buf *bp;
663dbc72 242{
3789a403 243 struct proc *p = curproc; /* XXX */
7188ac27 244 register int flag;
31222d0d 245 int s, error = 0;
663dbc72
BJ
246
247 flag = bp->b_flags;
f844ee62 248 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
77dc8a8c
KM
249 if (flag & B_ASYNC) {
250 if ((flag & B_DELWRI) == 0)
251 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
252 else
253 reassignbuf(bp, bp->b_vp);
254 }
c5a600cf 255 trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
4f083fd7
SL
256 if (bp->b_bcount > bp->b_bufsize)
257 panic("bwrite");
86e7dd3b 258 s = splbio();
c669f646 259 bp->b_vp->v_numoutput++;
86e7dd3b 260 splx(s);
7188ac27 261 VOP_STRATEGY(bp);
3efdd860
KM
262
263 /*
d42a4811 264 * If the write was synchronous, then await I/O completion.
3efdd860 265 * If the write was "delayed", then we put the buffer on
d42a4811 266 * the queue of blocks awaiting I/O completion status.
3efdd860 267 */
d42a4811 268 if ((flag & B_ASYNC) == 0) {
7188ac27 269 error = biowait(bp);
77dc8a8c
KM
270 if ((flag&B_DELWRI) == 0)
271 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
272 else
273 reassignbuf(bp, bp->b_vp);
663dbc72 274 brelse(bp);
7188ac27 275 } else if (flag & B_DELWRI) {
31222d0d 276 s = splbio();
663dbc72 277 bp->b_flags |= B_AGE;
31222d0d 278 splx(s);
7188ac27
KM
279 }
280 return (error);
663dbc72
BJ
281}
282
80746147
JH
283int
284vn_bwrite(ap)
285 struct vop_bwrite_args *ap;
286{
37392cf8 287 return (bwrite(ap->a_bp));
80746147
JH
288}
289
290
663dbc72 291/*
d42a4811
KM
292 * Delayed write.
293 *
294 * The buffer is marked dirty, but is not queued for I/O.
295 * This routine should be used when the buffer is expected
296 * to be modified again soon, typically a small write that
297 * partially fills a buffer.
298 *
299 * NB: magnetic tapes cannot be delayed; they must be
300 * written in the order that the writes are requested.
663dbc72
BJ
301 */
302bdwrite(bp)
3efdd860 303 register struct buf *bp;
663dbc72 304{
3789a403 305 struct proc *p = curproc; /* XXX */
663dbc72 306
c669f646
KM
307 if ((bp->b_flags & B_DELWRI) == 0) {
308 bp->b_flags |= B_DELWRI;
309 reassignbuf(bp, bp->b_vp);
3789a403 310 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
c669f646 311 }
7188ac27 312 /*
edadbc2c 313 * If this is a tape drive, the write must be initiated.
7188ac27 314 */
ec67a3ce 315 if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
663dbc72 316 bawrite(bp);
edadbc2c 317 } else {
d42a4811 318 bp->b_flags |= (B_DONE | B_DELWRI);
663dbc72
BJ
319 brelse(bp);
320 }
321}
322
323/*
d42a4811
KM
324 * Asynchronous write.
325 * Start I/O on a buffer, but do not wait for it to complete.
326 * The buffer is released when the I/O completes.
663dbc72
BJ
327 */
328bawrite(bp)
3efdd860 329 register struct buf *bp;
663dbc72
BJ
330{
331
d42a4811
KM
332 /*
333 * Setting the ASYNC flag causes bwrite to return
334 * after starting the I/O.
335 */
663dbc72 336 bp->b_flags |= B_ASYNC;
7188ac27 337 (void) bwrite(bp);
663dbc72
BJ
338}
339
340/*
d42a4811
KM
341 * Release a buffer.
342 * Even if the buffer is dirty, no I/O is started.
663dbc72
BJ
343 */
344brelse(bp)
3efdd860 345 register struct buf *bp;
663dbc72 346{
e3249ec0 347 register struct queue_entry *flist;
d42a4811 348 int s;
663dbc72 349
c5a600cf 350 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
3efdd860 351 /*
edadbc2c
KM
352 * If a process is waiting for the buffer, or
353 * is waiting for a free buffer, awaken it.
3efdd860 354 */
d42a4811 355 if (bp->b_flags & B_WANTED)
663dbc72 356 wakeup((caddr_t)bp);
37392cf8
KM
357 if (needbuffer) {
358 needbuffer = 0;
359 wakeup((caddr_t)&needbuffer);
663dbc72 360 }
edadbc2c
KM
361 /*
362 * Retry I/O for locked buffers rather than invalidating them.
363 */
31222d0d 364 s = splbio();
edadbc2c
KM
365 if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
366 bp->b_flags &= ~B_ERROR;
edadbc2c
KM
367 /*
368 * Disassociate buffers that are no longer valid.
369 */
d42a4811 370 if (bp->b_flags & (B_NOCACHE | B_ERROR))
7188ac27 371 bp->b_flags |= B_INVAL;
d42a4811 372 if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
edadbc2c
KM
373 if (bp->b_vp)
374 brelvp(bp);
375 bp->b_flags &= ~B_DELWRI;
7188ac27 376 }
3efdd860
KM
377 /*
378 * Stick the buffer back on a free list.
379 */
4f083fd7
SL
380 if (bp->b_bufsize <= 0) {
381 /* block has no buffer ... put at front of unused buffer list */
37392cf8 382 flist = &bufqueues[BQ_EMPTY];
4f083fd7 383 binsheadfree(bp, flist);
d42a4811 384 } else if (bp->b_flags & (B_ERROR | B_INVAL)) {
46387ee3 385 /* block has no info ... put at front of most free list */
37392cf8 386 flist = &bufqueues[BQ_AGE];
3efdd860 387 binsheadfree(bp, flist);
663dbc72 388 } else {
46387ee3 389 if (bp->b_flags & B_LOCKED)
37392cf8 390 flist = &bufqueues[BQ_LOCKED];
46387ee3 391 else if (bp->b_flags & B_AGE)
37392cf8 392 flist = &bufqueues[BQ_AGE];
46387ee3 393 else
37392cf8 394 flist = &bufqueues[BQ_LRU];
3efdd860 395 binstailfree(bp, flist);
663dbc72 396 }
d42a4811 397 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
663dbc72
BJ
398 splx(s);
399}
400
401/*
d42a4811 402 * Check to see if a block is currently memory resident.
663dbc72 403 */
7188ac27
KM
404incore(vp, blkno)
405 struct vnode *vp;
3efdd860 406 daddr_t blkno;
663dbc72
BJ
407{
408 register struct buf *bp;
663dbc72 409
e3249ec0 410 for (bp = BUFHASH(vp, blkno)->le_next; bp; bp = bp->b_hash.qe_next)
edadbc2c 411 if (bp->b_lblkno == blkno && bp->b_vp == vp &&
3efdd860 412 (bp->b_flags & B_INVAL) == 0)
5603d07d 413 return (1);
5603d07d 414 return (0);
663dbc72
BJ
415}
416
edadbc2c 417/*
d42a4811
KM
418 * Check to see if a block is currently memory resident.
419 * If it is resident, return it. If it is not resident,
420 * allocate a new buffer and assign it to the block.
663dbc72
BJ
421 */
422struct buf *
ec67a3ce
MK
423#ifdef SECSIZE
424getblk(dev, blkno, size, secsize)
425#else SECSIZE
7188ac27
KM
426getblk(vp, blkno, size)
427 register struct vnode *vp;
ad30fb67
KM
428 daddr_t blkno;
429 int size;
ec67a3ce
MK
430#ifdef SECSIZE
431 long secsize;
432#endif SECSIZE
663dbc72 433{
e3249ec0
KM
434 register struct buf *bp;
435 struct list_entry *dp;
23900030 436 int s;
663dbc72 437
00a6a148
KM
438 if (size > MAXBSIZE)
439 panic("getblk: size too big");
3efdd860 440 /*
d42a4811
KM
441 * Search the cache for the block. If the buffer is found,
442 * but it is currently locked, the we must wait for it to
443 * become available.
3efdd860 444 */
7188ac27 445 dp = BUFHASH(vp, blkno);
3efdd860 446loop:
e3249ec0 447 for (bp = dp->le_next; bp; bp = bp->b_hash.qe_next) {
edadbc2c 448 if (bp->b_lblkno != blkno || bp->b_vp != vp ||
d42a4811 449 (bp->b_flags & B_INVAL))
663dbc72 450 continue;
a5e62f37 451 s = splbio();
d42a4811 452 if (bp->b_flags & B_BUSY) {
663dbc72 453 bp->b_flags |= B_WANTED;
d42a4811 454 sleep((caddr_t)bp, PRIBIO + 1);
23900030 455 splx(s);
663dbc72
BJ
456 goto loop;
457 }
c669f646
KM
458 bremfree(bp);
459 bp->b_flags |= B_BUSY;
23900030 460 splx(s);
32a56bda 461 if (bp->b_bcount != size) {
edadbc2c
KM
462 printf("getblk: stray size");
463 bp->b_flags |= B_INVAL;
464 bwrite(bp);
9d6d37ce 465 goto loop;
edadbc2c 466 }
663dbc72 467 bp->b_flags |= B_CACHE;
a5e62f37 468 return (bp);
663dbc72 469 }
4f083fd7 470 bp = getnewbuf();
3efdd860 471 bremhash(bp);
edadbc2c 472 bgetvp(vp, bp);
521a4688 473 bp->b_bcount = 0;
edadbc2c 474 bp->b_lblkno = blkno;
ec67a3ce
MK
475#ifdef SECSIZE
476 bp->b_blksize = secsize;
477#endif SECSIZE
ad30fb67 478 bp->b_blkno = blkno;
4f083fd7 479 bp->b_error = 0;
7188ac27
KM
480 bp->b_resid = 0;
481 binshash(bp, dp);
521a4688 482 allocbuf(bp, size);
a5e62f37 483 return (bp);
663dbc72
BJ
484}
485
486/*
d42a4811
KM
487 * Allocate a buffer.
488 * The caller will assign it to a block.
663dbc72
BJ
489 */
490struct buf *
ad30fb67
KM
491geteblk(size)
492 int size;
663dbc72 493{
37392cf8 494 register struct buf *bp;
663dbc72 495
00a6a148
KM
496 if (size > MAXBSIZE)
497 panic("geteblk: size too big");
4f083fd7
SL
498 bp = getnewbuf();
499 bp->b_flags |= B_INVAL;
3efdd860 500 bremhash(bp);
37392cf8 501 binshash(bp, &invalhash);
521a4688 502 bp->b_bcount = 0;
ec67a3ce
MK
503#ifdef SECSIZE
504 bp->b_blksize = DEV_BSIZE;
505#endif SECSIZE
4f083fd7 506 bp->b_error = 0;
7188ac27 507 bp->b_resid = 0;
521a4688 508 allocbuf(bp, size);
a5e62f37 509 return (bp);
663dbc72
BJ
510}
511
ad30fb67 512/*
521a4688 513 * Expand or contract the actual memory allocated to a buffer.
d42a4811 514 * If no memory is available, release buffer and take error exit.
ad30fb67 515 */
521a4688
KM
516allocbuf(tp, size)
517 register struct buf *tp;
ad30fb67
KM
518 int size;
519{
521a4688
KM
520 register struct buf *bp, *ep;
521 int sizealloc, take, s;
ad30fb67 522
521a4688
KM
523 sizealloc = roundup(size, CLBYTES);
524 /*
525 * Buffer size does not change
526 */
527 if (sizealloc == tp->b_bufsize)
528 goto out;
529 /*
530 * Buffer size is shrinking.
531 * Place excess space in a buffer header taken from the
532 * BQ_EMPTY buffer list and placed on the "most free" list.
533 * If no extra buffer headers are available, leave the
534 * extra space in the present buffer.
535 */
536 if (sizealloc < tp->b_bufsize) {
e3249ec0 537 if ((ep = bufqueues[BQ_EMPTY].qe_next) == NULL)
521a4688
KM
538 goto out;
539 s = splbio();
540 bremfree(ep);
541 ep->b_flags |= B_BUSY;
542 splx(s);
543 pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
544 (int)tp->b_bufsize - sizealloc);
545 ep->b_bufsize = tp->b_bufsize - sizealloc;
546 tp->b_bufsize = sizealloc;
547 ep->b_flags |= B_INVAL;
548 ep->b_bcount = 0;
549 brelse(ep);
550 goto out;
551 }
552 /*
553 * More buffer space is needed. Get it out of buffers on
554 * the "most free" list, placing the empty headers on the
555 * BQ_EMPTY buffer header list.
556 */
557 while (tp->b_bufsize < sizealloc) {
558 take = sizealloc - tp->b_bufsize;
559 bp = getnewbuf();
560 if (take >= bp->b_bufsize)
561 take = bp->b_bufsize;
562 pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
563 &tp->b_un.b_addr[tp->b_bufsize], take);
564 tp->b_bufsize += take;
565 bp->b_bufsize = bp->b_bufsize - take;
566 if (bp->b_bcount > bp->b_bufsize)
567 bp->b_bcount = bp->b_bufsize;
568 if (bp->b_bufsize <= 0) {
569 bremhash(bp);
37392cf8 570 binshash(bp, &invalhash);
d42a4811 571 bp->b_dev = NODEV;
521a4688
KM
572 bp->b_error = 0;
573 bp->b_flags |= B_INVAL;
574 }
575 brelse(bp);
576 }
577out:
578 tp->b_bcount = size;
579 return (1);
4f083fd7
SL
580}
581
4f083fd7
SL
582/*
583 * Find a buffer which is available for use.
584 * Select something from a free list.
585 * Preference is to AGE list, then LRU list.
586 */
587struct buf *
588getnewbuf()
589{
37392cf8 590 register struct buf *bp;
e3249ec0 591 register struct queue_entry *dp;
a937f856 592 register struct ucred *cred;
4f083fd7
SL
593 int s;
594
595loop:
a5e62f37 596 s = splbio();
37392cf8 597 for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--)
e3249ec0 598 if (dp->qe_next)
4f083fd7 599 break;
37392cf8
KM
600 if (dp == bufqueues) { /* no free blocks */
601 needbuffer = 1;
602 sleep((caddr_t)&needbuffer, PRIBIO + 1);
4b7d506c 603 splx(s);
4f083fd7
SL
604 goto loop;
605 }
e3249ec0 606 bp = dp->qe_next;
c669f646
KM
607 bremfree(bp);
608 bp->b_flags |= B_BUSY;
609 splx(s);
4f083fd7 610 if (bp->b_flags & B_DELWRI) {
033a786e 611 (void) bawrite(bp);
4f083fd7
SL
612 goto loop;
613 }
c5a600cf 614 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
edadbc2c
KM
615 if (bp->b_vp)
616 brelvp(bp);
a937f856
KM
617 if (bp->b_rcred != NOCRED) {
618 cred = bp->b_rcred;
619 bp->b_rcred = NOCRED;
620 crfree(cred);
621 }
622 if (bp->b_wcred != NOCRED) {
623 cred = bp->b_wcred;
624 bp->b_wcred = NOCRED;
625 crfree(cred);
626 }
4f083fd7 627 bp->b_flags = B_BUSY;
1c89915d 628 bp->b_dirtyoff = bp->b_dirtyend = 0;
bb1626f7 629 bp->b_validoff = bp->b_validend = 0;
4f083fd7
SL
630 return (bp);
631}
632
663dbc72 633/*
d42a4811
KM
634 * Wait for I/O to complete.
635 *
636 * Extract and return any errors associated with the I/O.
637 * If the error flag is set, but no specific error is
638 * given, return EIO.
663dbc72 639 */
3efdd860 640biowait(bp)
ad30fb67 641 register struct buf *bp;
663dbc72 642{
530d0032 643 int s;
663dbc72 644
a5e62f37 645 s = splbio();
a937f856 646 while ((bp->b_flags & B_DONE) == 0)
663dbc72 647 sleep((caddr_t)bp, PRIBIO);
530d0032 648 splx(s);
7188ac27
KM
649 if ((bp->b_flags & B_ERROR) == 0)
650 return (0);
651 if (bp->b_error)
652 return (bp->b_error);
653 return (EIO);
663dbc72
BJ
654}
655
663dbc72 656/*
af04ce66 657 * Mark I/O complete on a buffer.
d42a4811
KM
658 *
659 * If a callback has been requested, e.g. the pageout
660 * daemon, do so. Otherwise, awaken waiting processes.
663dbc72 661 */
251f56ba 662void
3efdd860
KM
663biodone(bp)
664 register struct buf *bp;
663dbc72 665{
663dbc72 666
80e7c811 667 if (bp->b_flags & B_DONE)
3efdd860 668 panic("dup biodone");
663dbc72 669 bp->b_flags |= B_DONE;
76429560
KM
670 if ((bp->b_flags & B_READ) == 0)
671 vwakeup(bp);
961945a8
SL
672 if (bp->b_flags & B_CALL) {
673 bp->b_flags &= ~B_CALL;
674 (*bp->b_iodone)(bp);
675 return;
676 }
d42a4811 677 if (bp->b_flags & B_ASYNC)
663dbc72
BJ
678 brelse(bp);
679 else {
680 bp->b_flags &= ~B_WANTED;
681 wakeup((caddr_t)bp);
682 }
683}
aa95c6fc
KM
684
685#ifdef DIAGNOSTIC
686/*
687 * Print out statistics on the current allocation of the buffer pool.
688 * Can be enabled to print out on every ``sync'' by setting "syncprt"
689 * above.
690 */
691void
692vfs_bufstats()
693{
694 int s, i, j, count;
37392cf8 695 register struct buf *bp;
e3249ec0 696 register struct queue_entry *dp;
aa95c6fc
KM
697 int counts[MAXBSIZE/CLBYTES+1];
698 static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
699
37392cf8 700 for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
aa95c6fc
KM
701 count = 0;
702 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
703 counts[j] = 0;
704 s = splbio();
e3249ec0 705 for (bp = dp->qe_next; bp; bp = bp->b_freelist.qe_next) {
aa95c6fc
KM
706 counts[bp->b_bufsize/CLBYTES]++;
707 count++;
708 }
709 splx(s);
710 printf("%s: total-%d", bname[i], count);
711 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
712 if (counts[j] != 0)
713 printf(", %d-%d", j * CLBYTES, counts[j]);
714 printf("\n");
715 }
716}
717#endif /* DIAGNOSTIC */