test on VA_UTIMES_NULL was reversed
[unix-history] / usr / src / sys / kern / vfs_cluster.c
CommitLineData
5dc2581e
KB
1/*-
2 * Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
7188ac27 3 * All rights reserved.
da7c5cc6 4 *
217c3be4
KM
5 * This module is believed to contain source code proprietary to AT&T.
6 * Use and redistribution is subject to the Berkeley Software License
7 * Agreement and your Software Agreement with AT&T (Western Electric).
7188ac27 8 *
e140149a 9 * @(#)vfs_cluster.c 7.58 (Berkeley) %G%
da7c5cc6 10 */
961945a8 11
251f56ba
KB
12#include <sys/param.h>
13#include <sys/proc.h>
14#include <sys/buf.h>
15#include <sys/vnode.h>
251f56ba
KB
16#include <sys/mount.h>
17#include <sys/trace.h>
18#include <sys/resourcevar.h>
37392cf8
KM
19#include <sys/malloc.h>
20#include <libkern/libkern.h>
21
22/*
23 * Definitions for the buffer hash lists.
24 */
25#define BUFHASH(dvp, lbn) \
26 (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
e3249ec0 27struct list_entry *bufhashtbl, invalhash;
37392cf8
KM
28u_long bufhash;
29
30/*
31 * Insq/Remq for the buffer hash lists.
32 */
e3249ec0
KM
33#define binshash(bp, dp) list_enter_head(dp, bp, struct buf *, b_hash)
34#define bremhash(bp) list_remove(bp, struct buf *, b_hash)
37392cf8
KM
35
36/*
37 * Definitions for the buffer free lists.
38 */
39#define BQUEUES 4 /* number of free buffer queues */
40
41#define BQ_LOCKED 0 /* super-blocks &c */
42#define BQ_LRU 1 /* lru, useful buffers */
43#define BQ_AGE 2 /* rubbish */
44#define BQ_EMPTY 3 /* buffer headers with no memory */
45
e3249ec0 46struct queue_entry bufqueues[BQUEUES];
37392cf8
KM
47int needbuffer;
48
49/*
50 * Insq/Remq for the buffer free lists.
51 */
e3249ec0
KM
52#define binsheadfree(bp, dp) \
53 queue_enter_head(dp, bp, struct buf *, b_freelist)
54#define binstailfree(bp, dp) \
55 queue_enter_tail(dp, bp, struct buf *, b_freelist)
56
888c761e
MS
57/*
58 * Local declarations
59 */
60struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t,
61 daddr_t, long, int));
62struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,
63 daddr_t, daddr_t, long, int, long));
64void cluster_wbuild __P((struct vnode *, struct buf *, long size,
65 daddr_t start_lbn, int len, daddr_t lbn));
66
37392cf8
KM
67void
68bremfree(bp)
69 struct buf *bp;
70{
e3249ec0 71 struct queue_entry *dp;
37392cf8 72
e3249ec0
KM
73 /*
74 * We only calculate the head of the freelist when removing
75 * the last element of the list as that is the only time that
76 * it is needed (e.g. to reset the tail pointer).
77 */
78 if (bp->b_freelist.qe_next == NULL) {
37392cf8 79 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
e3249ec0 80 if (dp->qe_prev == &bp->b_freelist.qe_next)
37392cf8
KM
81 break;
82 if (dp == &bufqueues[BQUEUES])
83 panic("bremfree: lost tail");
37392cf8 84 }
e3249ec0 85 queue_remove(dp, bp, struct buf *, b_freelist);
37392cf8 86}
663dbc72 87
e7db227e
MK
88/*
89 * Initialize buffers and hash links for buffers.
90 */
251f56ba 91void
e7db227e
MK
92bufinit()
93{
37392cf8 94 register struct buf *bp;
e3249ec0 95 struct queue_entry *dp;
e7db227e 96 register int i;
e7db227e
MK
97 int base, residual;
98
37392cf8 99 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
e3249ec0
KM
100 queue_init(dp);
101 bufhashtbl = (struct list_entry *)hashinit(nbuf, M_CACHE, &bufhash);
e7db227e
MK
102 base = bufpages / nbuf;
103 residual = bufpages % nbuf;
104 for (i = 0; i < nbuf; i++) {
105 bp = &buf[i];
37392cf8 106 bzero((char *)bp, sizeof *bp);
e7db227e 107 bp->b_dev = NODEV;
e7db227e
MK
108 bp->b_rcred = NOCRED;
109 bp->b_wcred = NOCRED;
e7db227e
MK
110 bp->b_un.b_addr = buffers + i * MAXBSIZE;
111 if (i < residual)
112 bp->b_bufsize = (base + 1) * CLBYTES;
113 else
114 bp->b_bufsize = base * CLBYTES;
31222d0d 115 bp->b_flags = B_INVAL;
37392cf8 116 dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
31222d0d 117 binsheadfree(bp, dp);
37392cf8 118 binshash(bp, &invalhash);
e7db227e
MK
119 }
120}
121
663dbc72 122/*
d42a4811
KM
123 * Find the block in the buffer pool.
124 * If the buffer is not present, allocate a new buffer and load
125 * its contents according to the filesystem fill routine.
663dbc72 126 */
a937f856 127bread(vp, blkno, size, cred, bpp)
7188ac27 128 struct vnode *vp;
ad30fb67
KM
129 daddr_t blkno;
130 int size;
a937f856 131 struct ucred *cred;
7188ac27 132 struct buf **bpp;
ec67a3ce
MK
133#ifdef SECSIZE
134 long secsize;
135#endif SECSIZE
663dbc72 136{
3789a403 137 struct proc *p = curproc; /* XXX */
663dbc72
BJ
138 register struct buf *bp;
139
4f083fd7
SL
140 if (size == 0)
141 panic("bread: size 0");
ec67a3ce
MK
142#ifdef SECSIZE
143 bp = getblk(dev, blkno, size, secsize);
144#else SECSIZE
e140149a 145 *bpp = bp = getblk(vp, blkno, size, 0, 0);
ec67a3ce 146#endif SECSIZE
d42a4811 147 if (bp->b_flags & (B_DONE | B_DELWRI)) {
c5a600cf 148 trace(TR_BREADHIT, pack(vp, size), blkno);
7188ac27 149 return (0);
663dbc72
BJ
150 }
151 bp->b_flags |= B_READ;
4f083fd7
SL
152 if (bp->b_bcount > bp->b_bufsize)
153 panic("bread");
a937f856
KM
154 if (bp->b_rcred == NOCRED && cred != NOCRED) {
155 crhold(cred);
156 bp->b_rcred = cred;
157 }
7188ac27 158 VOP_STRATEGY(bp);
c5a600cf 159 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 160 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7188ac27 161 return (biowait(bp));
663dbc72
BJ
162}
163
164/*
bb1626f7
KM
165 * Operates like bread, but also starts I/O on the N specified
166 * read-ahead blocks.
663dbc72 167 */
bb1626f7 168breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp)
7188ac27 169 struct vnode *vp;
84baaab3 170 daddr_t blkno; int size;
ec67a3ce
MK
171#ifdef SECSIZE
172 long secsize;
173#endif SECSIZE
bb1626f7
KM
174 daddr_t rablkno[]; int rabsize[];
175 int num;
a937f856 176 struct ucred *cred;
7188ac27 177 struct buf **bpp;
663dbc72 178{
3789a403 179 struct proc *p = curproc; /* XXX */
663dbc72 180 register struct buf *bp, *rabp;
bb1626f7 181 register int i;
663dbc72
BJ
182
183 bp = NULL;
3efdd860 184 /*
d42a4811
KM
185 * If the block is not memory resident,
186 * allocate a buffer and start I/O.
3efdd860 187 */
7188ac27 188 if (!incore(vp, blkno)) {
e140149a 189 *bpp = bp = getblk(vp, blkno, size, 0, 0);
ec67a3ce 190#endif SECSIZE
d42a4811 191 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
663dbc72 192 bp->b_flags |= B_READ;
4f083fd7 193 if (bp->b_bcount > bp->b_bufsize)
bb1626f7 194 panic("breadn");
a937f856
KM
195 if (bp->b_rcred == NOCRED && cred != NOCRED) {
196 crhold(cred);
197 bp->b_rcred = cred;
198 }
7188ac27 199 VOP_STRATEGY(bp);
c5a600cf 200 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 201 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7d1e9cf4 202 } else {
c5a600cf 203 trace(TR_BREADHIT, pack(vp, size), blkno);
7d1e9cf4 204 }
663dbc72 205 }
3efdd860
KM
206
207 /*
bb1626f7
KM
208 * If there's read-ahead block(s), start I/O
209 * on them also (as above).
3efdd860 210 */
bb1626f7
KM
211 for (i = 0; i < num; i++) {
212 if (incore(vp, rablkno[i]))
213 continue;
e140149a 214 rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0);
ec67a3ce 215#endif SECSIZE
d42a4811 216 if (rabp->b_flags & (B_DONE | B_DELWRI)) {
663dbc72 217 brelse(rabp);
bb1626f7 218 trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]);
973ecc4f 219 } else {
d42a4811 220 rabp->b_flags |= B_ASYNC | B_READ;
4f083fd7
SL
221 if (rabp->b_bcount > rabp->b_bufsize)
222 panic("breadrabp");
5062ac4a 223 if (rabp->b_rcred == NOCRED && cred != NOCRED) {
a937f856 224 crhold(cred);
5062ac4a 225 rabp->b_rcred = cred;
a937f856 226 }
7188ac27 227 VOP_STRATEGY(rabp);
bb1626f7 228 trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]);
3789a403 229 p->p_stats->p_ru.ru_inblock++; /* pay in advance */
663dbc72
BJ
230 }
231 }
3efdd860
KM
232
233 /*
d42a4811
KM
234 * If block was memory resident, let bread get it.
235 * If block was not memory resident, the read was
236 * started above, so just wait for the read to complete.
3efdd860 237 */
84baaab3 238 if (bp == NULL)
ec67a3ce
MK
239#ifdef SECSIZE
240 return (bread(dev, blkno, size, secsize));
241#else SECSIZE
a937f856 242 return (bread(vp, blkno, size, cred, bpp));
7188ac27 243 return (biowait(bp));
663dbc72
BJ
244}
245
888c761e
MS
246/*
247 * We could optimize this by keeping track of where the last read-ahead
248 * was, but it would involve adding fields to the vnode. For now, let's
249 * just get it working.
250 *
251 * This replaces bread. If this is a bread at the beginning of a file and
252 * lastr is 0, we assume this is the first read and we'll read up to two
253 * blocks if they are sequential. After that, we'll do regular read ahead
254 * in clustered chunks.
255 *
256 * There are 4 or 5 cases depending on how you count:
257 * Desired block is in the cache:
258 * 1 Not sequential access (0 I/Os).
259 * 2 Access is sequential, do read-ahead (1 ASYNC).
260 * Desired block is not in cache:
261 * 3 Not sequential access (1 SYNC).
262 * 4 Sequential access, next block is contiguous (1 SYNC).
263 * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC)
264 *
265 * There are potentially two buffers that require I/O.
266 * bp is the block requested.
267 * rbp is the read-ahead block.
268 * If either is NULL, then you don't have to do the I/O.
269 */
270cluster_read(vp, filesize, lblkno, size, cred, bpp)
271 struct vnode *vp;
272 u_quad_t filesize;
273 daddr_t lblkno;
274 long size;
275 struct ucred *cred;
276 struct buf **bpp;
277{
278 struct buf *bp, *rbp;
279 daddr_t blkno, ioblkno;
280 long flags;
281 int error, num_ra, alreadyincore;
282
283#ifdef DIAGNOSTIC
284 if (size == 0)
285 panic("cluster_read: size = 0");
286#endif
287
288 error = 0;
289 flags = B_READ;
e140149a 290 *bpp = bp = getblk(vp, lblkno, size, 0, 0);
888c761e
MS
291 if (bp->b_flags & (B_CACHE | B_DONE | B_DELWRI)) {
292 /*
293 * Desired block is in cache; do any readahead ASYNC.
294 * Case 1, 2.
295 */
296 trace(TR_BREADHIT, pack(vp, size), lblkno);
297 flags |= B_ASYNC;
298 ioblkno = lblkno +
299 (lblkno < vp->v_ralen ? vp->v_ralen >> 1 : vp->v_ralen);
e140149a 300 alreadyincore = (int)incore(vp, ioblkno);
888c761e
MS
301 bp = NULL;
302 } else {
303 /* Block wasn't in cache, case 3, 4, 5. */
304 trace(TR_BREADMISS, pack(vp, size), lblkno);
305 ioblkno = lblkno;
306 bp->b_flags |= flags;
307 alreadyincore = 0;
308 curproc->p_stats->p_ru.ru_inblock++; /* XXX */
309 }
310 /*
311 * XXX
312 * Replace 1 with a window size based on some permutation of
313 * maxcontig and rot_delay. This will let you figure out how
314 * many blocks you should read-ahead (case 2, 4, 5).
315 *
316 * If the access isn't sequential, cut the window size in half.
317 */
318 rbp = NULL;
319 if (lblkno != vp->v_lastr + 1 && lblkno != 0)
320 vp->v_ralen = max(vp->v_ralen >> 1, 1);
321 else if ((ioblkno + 1) * size < filesize && !alreadyincore &&
322 !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra))) {
323 /*
324 * Reading sequentially, and the next block is not in the
325 * cache. We are going to try reading ahead. If this is
326 * the first read of a file, then limit read-ahead to a
327 * single block, else read as much as we're allowed.
328 */
329 if (num_ra > vp->v_ralen) {
330 num_ra = vp->v_ralen;
331 vp->v_ralen = min(MAXPHYS / size, vp->v_ralen << 1);
332 } else
333 vp->v_ralen = num_ra + 1;
334
335
336 if (num_ra) /* case 2, 4 */
337 rbp = cluster_rbuild(vp, filesize,
338 bp, ioblkno, blkno, size, num_ra, flags);
339 else if (lblkno != 0 && ioblkno == lblkno) {
340 /* Case 5: check how many blocks to read ahead */
341 ++ioblkno;
342 if ((ioblkno + 1) * size > filesize ||
343 (error = VOP_BMAP(vp,
344 ioblkno, NULL, &blkno, &num_ra)))
345 goto skip_readahead;
346 flags |= B_ASYNC;
347 if (num_ra)
348 rbp = cluster_rbuild(vp, filesize,
349 NULL, ioblkno, blkno, size, num_ra, flags);
350 else {
e140149a 351 rbp = getblk(vp, ioblkno, size, 0, 0);
888c761e
MS
352 rbp->b_flags |= flags;
353 rbp->b_blkno = blkno;
354 }
355 } else if (lblkno != 0) {
356 /* case 2; read ahead single block */
e140149a 357 rbp = getblk(vp, ioblkno, size, 0, 0);
888c761e
MS
358 rbp->b_flags |= flags;
359 rbp->b_blkno = blkno;
360 } else if (bp) /* case 1, 3, block 0 */
361 bp->b_blkno = blkno;
362 /* Case 1 on block 0; not really doing sequential I/O */
363
364 if (rbp == bp) /* case 4 */
365 rbp = NULL;
366 else if (rbp) { /* case 2, 5 */
367 trace(TR_BREADMISSRA,
368 pack(vp, (num_ra + 1) * size), ioblkno);
369 curproc->p_stats->p_ru.ru_inblock++; /* XXX */
370 }
371 }
372
373 /* XXX Kirk, do we need to make sure the bp has creds? */
374skip_readahead:
375 if (bp)
376 if (bp->b_flags & (B_DONE | B_DELWRI))
377 panic("cluster_read: DONE bp");
378 else
379 error = VOP_STRATEGY(bp);
380
381 if (rbp)
382 if (error || rbp->b_flags & (B_DONE | B_DELWRI)) {
383 rbp->b_flags &= ~(B_ASYNC | B_READ);
384 brelse(rbp);
385 } else
386 (void) VOP_STRATEGY(rbp);
387
388 if (bp)
389 return(biowait(bp));
390 return(error);
391}
392
393/*
394 * If blocks are contiguous on disk, use this to provide clustered
395 * read ahead. We will read as many blocks as possible sequentially
396 * and then parcel them up into logical blocks in the buffer hash table.
397 */
398struct buf *
399cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
400 struct vnode *vp;
401 u_quad_t filesize;
402 struct buf *bp;
403 daddr_t lbn;
404 daddr_t blkno;
405 long size;
406 int run;
407 long flags;
408{
409 struct cluster_save *b_save;
410 struct buf *tbp;
411 daddr_t bn;
412 int i, inc;
413
414 if (size * (lbn + run + 1) > filesize)
415 --run;
416 if (run == 0) {
417 if (!bp) {
e140149a 418 bp = getblk(vp, lbn, size, 0, 0);
888c761e
MS
419 bp->b_blkno = blkno;
420 bp->b_flags |= flags;
421 }
422 return(bp);
423 }
424
425 bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1);
426 if (bp->b_flags & (B_DONE | B_DELWRI))
427 return (bp);
428
429 b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),
430 M_SEGMENT, M_WAITOK);
431 b_save->bs_bufsize = b_save->bs_bcount = size;
432 b_save->bs_nchildren = 0;
433 b_save->bs_children = (struct buf **)(b_save + 1);
434 b_save->bs_saveaddr = bp->b_saveaddr;
435 bp->b_saveaddr = (caddr_t) b_save;
436
437 inc = size / DEV_BSIZE;
438 for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) {
439 if (incore(vp, lbn + i)) {
440 if (i == 1) {
441 bp->b_saveaddr = b_save->bs_saveaddr;
442 bp->b_flags &= ~B_CALL;
443 bp->b_iodone = NULL;
444 allocbuf(bp, size);
445 free(b_save, M_SEGMENT);
446 } else
447 allocbuf(bp, size * i);
448 break;
449 }
e140149a 450 tbp = getblk(vp, lbn + i, 0, 0, 0);
888c761e
MS
451 tbp->b_bcount = tbp->b_bufsize = size;
452 tbp->b_blkno = bn;
453 tbp->b_flags |= flags | B_READ | B_ASYNC;
454 ++b_save->bs_nchildren;
455 b_save->bs_children[i - 1] = tbp;
456 }
457 if (!(bp->b_flags & B_ASYNC))
458 vp->v_ralen = max(vp->v_ralen - 1, 1);
459 return(bp);
460}
461
462/*
463 * Either get a new buffer or grow the existing one.
464 */
465struct buf *
466cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run)
467 struct vnode *vp;
468 struct buf *bp;
469 long flags;
470 daddr_t blkno;
471 daddr_t lblkno;
472 long size;
473 int run;
474{
475 if (!bp) {
e140149a 476 bp = getblk(vp, lblkno, size, 0, 0);
888c761e
MS
477 if (bp->b_flags & (B_DONE | B_DELWRI)) {
478 bp->b_blkno = blkno;
479 return(bp);
480 }
481 }
482 allocbuf(bp, run * size);
483 bp->b_blkno = blkno;
484 bp->b_iodone = cluster_callback;
485 bp->b_flags |= flags | B_CALL;
486 return(bp);
487}
488
489/*
490 * Cleanup after a clustered read or write.
491 */
492void
493cluster_callback(bp)
494 struct buf *bp;
495{
496 struct cluster_save *b_save;
497 struct buf **tbp;
498 long bsize;
499 caddr_t cp;
888c761e
MS
500 b_save = (struct cluster_save *)(bp->b_saveaddr);
501 bp->b_saveaddr = b_save->bs_saveaddr;
502
503 cp = bp->b_un.b_addr + b_save->bs_bufsize;
504 for (tbp = b_save->bs_children; b_save->bs_nchildren--; ++tbp) {
505 pagemove(cp, (*tbp)->b_un.b_addr, (*tbp)->b_bufsize);
506 cp += (*tbp)->b_bufsize;
507 bp->b_bufsize -= (*tbp)->b_bufsize;
508 biodone(*tbp);
509 }
510#ifdef DIAGNOSTIC
511 if (bp->b_bufsize != b_save->bs_bufsize)
512 panic ("cluster_callback: more space to reclaim");
513#endif
514 bp->b_bcount = bp->b_bufsize;
515 bp->b_iodone = NULL;
516 free(b_save, M_SEGMENT);
517 if (bp->b_flags & B_ASYNC)
518 brelse(bp);
519 else
520 wakeup((caddr_t)bp);
521}
522
663dbc72 523/*
d42a4811
KM
524 * Synchronous write.
525 * Release buffer on completion.
663dbc72
BJ
526 */
527bwrite(bp)
3efdd860 528 register struct buf *bp;
663dbc72 529{
3789a403 530 struct proc *p = curproc; /* XXX */
7188ac27 531 register int flag;
31222d0d 532 int s, error = 0;
663dbc72
BJ
533
534 flag = bp->b_flags;
f844ee62 535 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
77dc8a8c
KM
536 if (flag & B_ASYNC) {
537 if ((flag & B_DELWRI) == 0)
538 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
539 else
540 reassignbuf(bp, bp->b_vp);
541 }
c5a600cf 542 trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
4f083fd7
SL
543 if (bp->b_bcount > bp->b_bufsize)
544 panic("bwrite");
86e7dd3b 545 s = splbio();
c669f646 546 bp->b_vp->v_numoutput++;
e140149a 547 bp->b_flags |= B_WRITEINPROG;
86e7dd3b 548 splx(s);
7188ac27 549 VOP_STRATEGY(bp);
3efdd860
KM
550
551 /*
d42a4811 552 * If the write was synchronous, then await I/O completion.
3efdd860 553 * If the write was "delayed", then we put the buffer on
d42a4811 554 * the queue of blocks awaiting I/O completion status.
3efdd860 555 */
d42a4811 556 if ((flag & B_ASYNC) == 0) {
7188ac27 557 error = biowait(bp);
77dc8a8c
KM
558 if ((flag&B_DELWRI) == 0)
559 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
560 else
561 reassignbuf(bp, bp->b_vp);
e140149a
KM
562 if (bp->b_flags & B_EINTR) {
563 bp->b_flags &= ~B_EINTR;
564 error = EINTR;
565 }
663dbc72 566 brelse(bp);
7188ac27 567 } else if (flag & B_DELWRI) {
31222d0d 568 s = splbio();
663dbc72 569 bp->b_flags |= B_AGE;
31222d0d 570 splx(s);
7188ac27
KM
571 }
572 return (error);
663dbc72
BJ
573}
574
80746147
JH
575int
576vn_bwrite(ap)
577 struct vop_bwrite_args *ap;
578{
37392cf8 579 return (bwrite(ap->a_bp));
80746147
JH
580}
581
582
663dbc72 583/*
d42a4811
KM
584 * Delayed write.
585 *
586 * The buffer is marked dirty, but is not queued for I/O.
587 * This routine should be used when the buffer is expected
588 * to be modified again soon, typically a small write that
589 * partially fills a buffer.
590 *
591 * NB: magnetic tapes cannot be delayed; they must be
592 * written in the order that the writes are requested.
663dbc72
BJ
593 */
594bdwrite(bp)
3efdd860 595 register struct buf *bp;
663dbc72 596{
3789a403 597 struct proc *p = curproc; /* XXX */
663dbc72 598
c669f646
KM
599 if ((bp->b_flags & B_DELWRI) == 0) {
600 bp->b_flags |= B_DELWRI;
601 reassignbuf(bp, bp->b_vp);
3789a403 602 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
c669f646 603 }
7188ac27 604 /*
edadbc2c 605 * If this is a tape drive, the write must be initiated.
7188ac27 606 */
ec67a3ce 607 if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
663dbc72 608 bawrite(bp);
edadbc2c 609 } else {
d42a4811 610 bp->b_flags |= (B_DONE | B_DELWRI);
663dbc72
BJ
611 brelse(bp);
612 }
613}
614
615/*
d42a4811
KM
616 * Asynchronous write.
617 * Start I/O on a buffer, but do not wait for it to complete.
618 * The buffer is released when the I/O completes.
663dbc72
BJ
619 */
620bawrite(bp)
3efdd860 621 register struct buf *bp;
663dbc72
BJ
622{
623
d42a4811
KM
624 /*
625 * Setting the ASYNC flag causes bwrite to return
626 * after starting the I/O.
627 */
663dbc72 628 bp->b_flags |= B_ASYNC;
e140149a 629 (void) VOP_BWRITE(bp);
663dbc72
BJ
630}
631
888c761e
MS
632/*
633 * Do clustered write for FFS.
634 *
635 * Three cases:
636 * 1. Write is not sequential (write asynchronously)
637 * Write is sequential:
638 * 2. beginning of cluster - begin cluster
639 * 3. middle of a cluster - add to cluster
640 * 4. end of a cluster - asynchronously write cluster
641 */
642void
643cluster_write(bp, filesize)
644 struct buf *bp;
645 u_quad_t filesize;
646{
647 struct vnode *vp;
648 daddr_t lbn;
649 int clen, error, maxrun;
650
651 vp = bp->b_vp;
652 lbn = bp->b_lblkno;
653 clen = 0;
654
655 /*
656 * Handle end of file first. If we are appending, we need to check
657 * if the current block was allocated contiguously. If it wasn't,
658 * then we need to fire off a previous cluster if it existed.
659 * Additionally, when we're appending, we need to figure out how
660 * to initialize vp->v_clen.
661 */
662 if ((lbn + 1) * bp->b_bcount == filesize) {
663 if (bp->b_blkno != vp->v_lasta + bp->b_bcount / DEV_BSIZE) {
664 /* This block was not allocated contiguously */
665 if (vp->v_clen)
666 cluster_wbuild(vp, NULL, bp->b_bcount, vp->v_cstart,
667 vp->v_lastw - vp->v_cstart + 1, lbn);
668 vp->v_cstart = lbn;
669 clen = vp->v_clen =
670 MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1;
671 /*
672 * Next cluster started. Write this buffer and return.
673 */
674 vp->v_lastw = lbn;
675 vp->v_lasta = bp->b_blkno;
676 bdwrite(bp);
677 return;
678 }
679 vp->v_lasta = bp->b_blkno;
680 } else if (lbn == 0) {
681 vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
682 }
683 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1) {
684 if (vp->v_clen != 0)
685 /*
686 * Write is not sequential.
687 */
688 cluster_wbuild(vp, NULL, bp->b_bcount, vp->v_cstart,
689 vp->v_lastw - vp->v_cstart + 1, lbn);
690 /*
691 * Consider beginning a cluster.
692 */
693 if (error = VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &clen)) {
694 bawrite(bp);
695 vp->v_cstart = lbn + 1;
696 vp->v_lastw = lbn;
697 return;
698 }
699 vp->v_clen = clen;
700 if (clen == 0) { /* I/O not contiguous */
701 vp->v_cstart = lbn + 1;
702 bawrite(bp);
703 } else { /* Wait for rest of cluster */
704 vp->v_cstart = lbn;
705 bdwrite(bp);
706 }
707 } else if (lbn == vp->v_cstart + vp->v_clen) {
708 /*
709 * At end of cluster, write it out.
710 */
711 cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart,
712 vp->v_clen + 1, lbn);
713 vp->v_clen = 0;
714 vp->v_cstart = lbn + 1;
715 } else
716 /*
717 * In the middle of a cluster, so just delay the
718 * I/O for now.
719 */
720 bdwrite(bp);
721 vp->v_lastw = lbn;
722}
723
724
725/*
726 * This is an awful lot like cluster_rbuild...wish they could be combined.
727 * The last lbn argument is the current block on which I/O is being
728 * performed. Check to see that it doesn't fall in the middle of
729 * the current block.
730 */
731void
732cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
733 struct vnode *vp;
734 struct buf *last_bp;
735 long size;
736 daddr_t start_lbn;
737 int len;
738 daddr_t lbn;
739{
740 struct cluster_save *b_save;
741 struct buf *bp, *tbp;
742 caddr_t cp;
743 int i, s;
744
745redo:
746 while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) {
747 ++start_lbn;
748 --len;
749 }
750
751 /* Get more memory for current buffer */
752 if (len <= 1) {
753 if (last_bp)
754 bawrite(last_bp);
755 return;
756 }
757
e140149a 758 bp = getblk(vp, start_lbn, size, 0, 0);
888c761e
MS
759 if (!(bp->b_flags & B_DELWRI)) {
760 ++start_lbn;
761 --len;
762 brelse(bp);
763 goto redo;
764 }
765
766 --len;
767 b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save),
768 M_SEGMENT, M_WAITOK);
769 b_save->bs_bcount = bp->b_bcount;
770 b_save->bs_bufsize = bp->b_bufsize;
771 b_save->bs_nchildren = 0;
772 b_save->bs_children = (struct buf **)(b_save + 1);
773 b_save->bs_saveaddr = bp->b_saveaddr;
774 bp->b_saveaddr = (caddr_t) b_save;
775
776
777 bp->b_flags |= B_CALL;
778 bp->b_iodone = cluster_callback;
779 cp = bp->b_un.b_addr + bp->b_bufsize;
780 for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) {
781 if (!incore(vp, start_lbn) || start_lbn == lbn)
782 break;
783
784 if (last_bp == NULL || start_lbn != last_bp->b_lblkno) {
e140149a 785 tbp = getblk(vp, start_lbn, size, 0, 0);
888c761e
MS
786#ifdef DIAGNOSTIC
787 if (tbp->b_bcount != tbp->b_bufsize)
788 panic("cluster_wbuild: Buffer too big");
789#endif
790 if (!(tbp->b_flags & B_DELWRI)) {
791 brelse(tbp);
792 break;
793 }
794 } else
795 tbp = last_bp;
796
797 ++b_save->bs_nchildren;
798
799 /* Move memory from children to parent */
800 pagemove(tbp->b_un.b_daddr, cp, size);
801 bp->b_bcount += size;
802 bp->b_bufsize += size;
803
804 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
805 tbp->b_flags |= B_ASYNC;
806 s = splbio();
807 reassignbuf(tbp, tbp->b_vp); /* put on clean list */
808 ++tbp->b_vp->v_numoutput;
809 splx(s);
810 b_save->bs_children[i] = tbp;
811
812 cp += tbp->b_bufsize;
813 }
814
815 if (i == 0) {
816 /* None to cluster */
817 bp->b_saveaddr = b_save->bs_saveaddr;
818 bp->b_flags &= ~B_CALL;
819 bp->b_iodone = NULL;
820 free(b_save, M_SEGMENT);
821 }
822 bawrite(bp);
823 if (i < len) {
824 len -= i + 1;
825 start_lbn += 1;
826 goto redo;
827 }
828}
829
663dbc72 830/*
d42a4811
KM
831 * Release a buffer.
832 * Even if the buffer is dirty, no I/O is started.
663dbc72
BJ
833 */
834brelse(bp)
3efdd860 835 register struct buf *bp;
663dbc72 836{
e3249ec0 837 register struct queue_entry *flist;
d42a4811 838 int s;
663dbc72 839
c5a600cf 840 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
3efdd860 841 /*
edadbc2c
KM
842 * If a process is waiting for the buffer, or
843 * is waiting for a free buffer, awaken it.
3efdd860 844 */
d42a4811 845 if (bp->b_flags & B_WANTED)
663dbc72 846 wakeup((caddr_t)bp);
37392cf8
KM
847 if (needbuffer) {
848 needbuffer = 0;
849 wakeup((caddr_t)&needbuffer);
663dbc72 850 }
edadbc2c
KM
851 /*
852 * Retry I/O for locked buffers rather than invalidating them.
853 */
31222d0d 854 s = splbio();
edadbc2c
KM
855 if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
856 bp->b_flags &= ~B_ERROR;
edadbc2c
KM
857 /*
858 * Disassociate buffers that are no longer valid.
859 */
d42a4811 860 if (bp->b_flags & (B_NOCACHE | B_ERROR))
7188ac27 861 bp->b_flags |= B_INVAL;
d42a4811 862 if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
edadbc2c
KM
863 if (bp->b_vp)
864 brelvp(bp);
865 bp->b_flags &= ~B_DELWRI;
7188ac27 866 }
3efdd860
KM
867 /*
868 * Stick the buffer back on a free list.
869 */
4f083fd7
SL
870 if (bp->b_bufsize <= 0) {
871 /* block has no buffer ... put at front of unused buffer list */
37392cf8 872 flist = &bufqueues[BQ_EMPTY];
4f083fd7 873 binsheadfree(bp, flist);
d42a4811 874 } else if (bp->b_flags & (B_ERROR | B_INVAL)) {
46387ee3 875 /* block has no info ... put at front of most free list */
37392cf8 876 flist = &bufqueues[BQ_AGE];
3efdd860 877 binsheadfree(bp, flist);
663dbc72 878 } else {
46387ee3 879 if (bp->b_flags & B_LOCKED)
37392cf8 880 flist = &bufqueues[BQ_LOCKED];
46387ee3 881 else if (bp->b_flags & B_AGE)
37392cf8 882 flist = &bufqueues[BQ_AGE];
46387ee3 883 else
37392cf8 884 flist = &bufqueues[BQ_LRU];
3efdd860 885 binstailfree(bp, flist);
663dbc72 886 }
d42a4811 887 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
663dbc72
BJ
888 splx(s);
889}
890
891/*
d42a4811 892 * Check to see if a block is currently memory resident.
663dbc72 893 */
e140149a 894struct buf *
7188ac27
KM
895incore(vp, blkno)
896 struct vnode *vp;
3efdd860 897 daddr_t blkno;
663dbc72
BJ
898{
899 register struct buf *bp;
663dbc72 900
e3249ec0 901 for (bp = BUFHASH(vp, blkno)->le_next; bp; bp = bp->b_hash.qe_next)
edadbc2c 902 if (bp->b_lblkno == blkno && bp->b_vp == vp &&
3efdd860 903 (bp->b_flags & B_INVAL) == 0)
e140149a
KM
904 return (bp);
905 return (NULL);
663dbc72
BJ
906}
907
edadbc2c 908/*
d42a4811
KM
909 * Check to see if a block is currently memory resident.
910 * If it is resident, return it. If it is not resident,
911 * allocate a new buffer and assign it to the block.
663dbc72
BJ
912 */
913struct buf *
ec67a3ce
MK
914#ifdef SECSIZE
915getblk(dev, blkno, size, secsize)
916#else SECSIZE
e140149a 917getblk(vp, blkno, size, slpflag, slptimeo)
7188ac27 918 register struct vnode *vp;
ad30fb67 919 daddr_t blkno;
e140149a 920 int size, slpflag, slptimeo;
ec67a3ce
MK
921#ifdef SECSIZE
922 long secsize;
923#endif SECSIZE
663dbc72 924{
e3249ec0
KM
925 register struct buf *bp;
926 struct list_entry *dp;
e140149a 927 int s, error;
663dbc72 928
00a6a148
KM
929 if (size > MAXBSIZE)
930 panic("getblk: size too big");
3efdd860 931 /*
d42a4811
KM
932 * Search the cache for the block. If the buffer is found,
933 * but it is currently locked, the we must wait for it to
934 * become available.
3efdd860 935 */
7188ac27 936 dp = BUFHASH(vp, blkno);
3efdd860 937loop:
e3249ec0 938 for (bp = dp->le_next; bp; bp = bp->b_hash.qe_next) {
e140149a 939 if (bp->b_lblkno != blkno || bp->b_vp != vp)
663dbc72 940 continue;
a5e62f37 941 s = splbio();
d42a4811 942 if (bp->b_flags & B_BUSY) {
663dbc72 943 bp->b_flags |= B_WANTED;
e140149a
KM
944 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
945 "getblk", slptimeo);
23900030 946 splx(s);
e140149a
KM
947 if (error)
948 return (NULL);
663dbc72
BJ
949 goto loop;
950 }
e140149a
KM
951 /*
952 * The test for B_INVAL is moved down here, since there
953 * are cases where B_INVAL is set before VOP_BWRITE() is
954 * called and for NFS, the process cannot be allowed to
955 * allocate a new buffer for the same block until the write
956 * back to the server has been completed. (ie. B_BUSY clears)
957 */
958 if (bp->b_flags & B_INVAL) {
959 splx(s);
960 continue;
961 }
c669f646
KM
962 bremfree(bp);
963 bp->b_flags |= B_BUSY;
23900030 964 splx(s);
32a56bda 965 if (bp->b_bcount != size) {
edadbc2c
KM
966 printf("getblk: stray size");
967 bp->b_flags |= B_INVAL;
e140149a 968 VOP_BWRITE(bp);
9d6d37ce 969 goto loop;
edadbc2c 970 }
663dbc72 971 bp->b_flags |= B_CACHE;
a5e62f37 972 return (bp);
663dbc72 973 }
e140149a
KM
974 /*
975 * The loop back to the top when getnewbuf() fails is because
976 * stateless filesystems like NFS have no node locks. Thus,
977 * there is a slight chance that more than one process will
978 * try and getnewbuf() for the same block concurrently when
979 * the first sleeps in getnewbuf(). So after a sleep, go back
980 * up to the top to check the hash lists again.
981 */
982 if ((bp = getnewbuf(slpflag, slptimeo)) == 0)
983 goto loop;
3efdd860 984 bremhash(bp);
edadbc2c 985 bgetvp(vp, bp);
521a4688 986 bp->b_bcount = 0;
edadbc2c 987 bp->b_lblkno = blkno;
ec67a3ce
MK
988#ifdef SECSIZE
989 bp->b_blksize = secsize;
990#endif SECSIZE
ad30fb67 991 bp->b_blkno = blkno;
4f083fd7 992 bp->b_error = 0;
7188ac27
KM
993 bp->b_resid = 0;
994 binshash(bp, dp);
521a4688 995 allocbuf(bp, size);
a5e62f37 996 return (bp);
663dbc72
BJ
997}
998
999/*
d42a4811
KM
1000 * Allocate a buffer.
1001 * The caller will assign it to a block.
663dbc72
BJ
1002 */
1003struct buf *
ad30fb67
KM
1004geteblk(size)
1005 int size;
663dbc72 1006{
37392cf8 1007 register struct buf *bp;
663dbc72 1008
00a6a148
KM
1009 if (size > MAXBSIZE)
1010 panic("geteblk: size too big");
e140149a
KM
1011 while ((bp = getnewbuf(0, 0)) == NULL)
1012 /* void */;
4f083fd7 1013 bp->b_flags |= B_INVAL;
3efdd860 1014 bremhash(bp);
37392cf8 1015 binshash(bp, &invalhash);
521a4688 1016 bp->b_bcount = 0;
ec67a3ce
MK
1017#ifdef SECSIZE
1018 bp->b_blksize = DEV_BSIZE;
1019#endif SECSIZE
4f083fd7 1020 bp->b_error = 0;
7188ac27 1021 bp->b_resid = 0;
521a4688 1022 allocbuf(bp, size);
a5e62f37 1023 return (bp);
663dbc72
BJ
1024}
1025
ad30fb67 1026/*
521a4688 1027 * Expand or contract the actual memory allocated to a buffer.
d42a4811 1028 * If no memory is available, release buffer and take error exit.
ad30fb67 1029 */
521a4688
KM
1030allocbuf(tp, size)
1031 register struct buf *tp;
ad30fb67
KM
1032 int size;
1033{
521a4688
KM
1034 register struct buf *bp, *ep;
1035 int sizealloc, take, s;
ad30fb67 1036
521a4688
KM
1037 sizealloc = roundup(size, CLBYTES);
1038 /*
1039 * Buffer size does not change
1040 */
1041 if (sizealloc == tp->b_bufsize)
1042 goto out;
1043 /*
1044 * Buffer size is shrinking.
1045 * Place excess space in a buffer header taken from the
1046 * BQ_EMPTY buffer list and placed on the "most free" list.
1047 * If no extra buffer headers are available, leave the
1048 * extra space in the present buffer.
1049 */
1050 if (sizealloc < tp->b_bufsize) {
e3249ec0 1051 if ((ep = bufqueues[BQ_EMPTY].qe_next) == NULL)
521a4688
KM
1052 goto out;
1053 s = splbio();
1054 bremfree(ep);
1055 ep->b_flags |= B_BUSY;
1056 splx(s);
1057 pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
1058 (int)tp->b_bufsize - sizealloc);
1059 ep->b_bufsize = tp->b_bufsize - sizealloc;
1060 tp->b_bufsize = sizealloc;
1061 ep->b_flags |= B_INVAL;
1062 ep->b_bcount = 0;
1063 brelse(ep);
1064 goto out;
1065 }
1066 /*
1067 * More buffer space is needed. Get it out of buffers on
1068 * the "most free" list, placing the empty headers on the
1069 * BQ_EMPTY buffer header list.
1070 */
1071 while (tp->b_bufsize < sizealloc) {
1072 take = sizealloc - tp->b_bufsize;
e140149a
KM
1073 while ((bp = getnewbuf(0, 0)) == NULL)
1074 /* void */;
521a4688
KM
1075 if (take >= bp->b_bufsize)
1076 take = bp->b_bufsize;
1077 pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
1078 &tp->b_un.b_addr[tp->b_bufsize], take);
1079 tp->b_bufsize += take;
1080 bp->b_bufsize = bp->b_bufsize - take;
1081 if (bp->b_bcount > bp->b_bufsize)
1082 bp->b_bcount = bp->b_bufsize;
1083 if (bp->b_bufsize <= 0) {
1084 bremhash(bp);
37392cf8 1085 binshash(bp, &invalhash);
d42a4811 1086 bp->b_dev = NODEV;
521a4688
KM
1087 bp->b_error = 0;
1088 bp->b_flags |= B_INVAL;
1089 }
1090 brelse(bp);
1091 }
1092out:
1093 tp->b_bcount = size;
1094 return (1);
4f083fd7
SL
1095}
1096
4f083fd7
SL
1097/*
1098 * Find a buffer which is available for use.
1099 * Select something from a free list.
1100 * Preference is to AGE list, then LRU list.
1101 */
1102struct buf *
e140149a
KM
1103getnewbuf(slpflag, slptimeo)
1104 int slpflag, slptimeo;
4f083fd7 1105{
37392cf8 1106 register struct buf *bp;
e3249ec0 1107 register struct queue_entry *dp;
a937f856 1108 register struct ucred *cred;
4f083fd7
SL
1109 int s;
1110
1111loop:
a5e62f37 1112 s = splbio();
37392cf8 1113 for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--)
e3249ec0 1114 if (dp->qe_next)
4f083fd7 1115 break;
37392cf8
KM
1116 if (dp == bufqueues) { /* no free blocks */
1117 needbuffer = 1;
e140149a
KM
1118 (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1),
1119 "getnewbuf", slptimeo);
4b7d506c 1120 splx(s);
e140149a 1121 return (NULL);
4f083fd7 1122 }
e3249ec0 1123 bp = dp->qe_next;
c669f646
KM
1124 bremfree(bp);
1125 bp->b_flags |= B_BUSY;
1126 splx(s);
4f083fd7 1127 if (bp->b_flags & B_DELWRI) {
033a786e 1128 (void) bawrite(bp);
4f083fd7
SL
1129 goto loop;
1130 }
c5a600cf 1131 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
edadbc2c
KM
1132 if (bp->b_vp)
1133 brelvp(bp);
a937f856
KM
1134 if (bp->b_rcred != NOCRED) {
1135 cred = bp->b_rcred;
1136 bp->b_rcred = NOCRED;
1137 crfree(cred);
1138 }
1139 if (bp->b_wcred != NOCRED) {
1140 cred = bp->b_wcred;
1141 bp->b_wcred = NOCRED;
1142 crfree(cred);
1143 }
4f083fd7 1144 bp->b_flags = B_BUSY;
1c89915d 1145 bp->b_dirtyoff = bp->b_dirtyend = 0;
bb1626f7 1146 bp->b_validoff = bp->b_validend = 0;
4f083fd7
SL
1147 return (bp);
1148}
1149
663dbc72 1150/*
d42a4811
KM
1151 * Wait for I/O to complete.
1152 *
1153 * Extract and return any errors associated with the I/O.
1154 * If the error flag is set, but no specific error is
1155 * given, return EIO.
663dbc72 1156 */
3efdd860 1157biowait(bp)
ad30fb67 1158 register struct buf *bp;
663dbc72 1159{
530d0032 1160 int s;
663dbc72 1161
a5e62f37 1162 s = splbio();
a937f856 1163 while ((bp->b_flags & B_DONE) == 0)
663dbc72 1164 sleep((caddr_t)bp, PRIBIO);
530d0032 1165 splx(s);
7188ac27
KM
1166 if ((bp->b_flags & B_ERROR) == 0)
1167 return (0);
1168 if (bp->b_error)
1169 return (bp->b_error);
1170 return (EIO);
663dbc72
BJ
1171}
1172
663dbc72 1173/*
af04ce66 1174 * Mark I/O complete on a buffer.
d42a4811
KM
1175 *
1176 * If a callback has been requested, e.g. the pageout
1177 * daemon, do so. Otherwise, awaken waiting processes.
663dbc72 1178 */
251f56ba 1179void
3efdd860
KM
1180biodone(bp)
1181 register struct buf *bp;
663dbc72 1182{
663dbc72 1183
80e7c811 1184 if (bp->b_flags & B_DONE)
3efdd860 1185 panic("dup biodone");
663dbc72 1186 bp->b_flags |= B_DONE;
76429560
KM
1187 if ((bp->b_flags & B_READ) == 0)
1188 vwakeup(bp);
961945a8
SL
1189 if (bp->b_flags & B_CALL) {
1190 bp->b_flags &= ~B_CALL;
1191 (*bp->b_iodone)(bp);
1192 return;
1193 }
d42a4811 1194 if (bp->b_flags & B_ASYNC)
663dbc72
BJ
1195 brelse(bp);
1196 else {
1197 bp->b_flags &= ~B_WANTED;
1198 wakeup((caddr_t)bp);
1199 }
1200}
aa95c6fc 1201
b5d79df9
MS
1202int
1203count_lock_queue()
1204{
1205 register struct buf *bp;
1206 register int ret;
1207
1208 for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].qe_next;
1209 bp; bp = (struct buf *)bp->b_freelist.qe_next)
1210 ++ret;
1211 return(ret);
1212}
1213
aa95c6fc
KM
1214#ifdef DIAGNOSTIC
1215/*
1216 * Print out statistics on the current allocation of the buffer pool.
1217 * Can be enabled to print out on every ``sync'' by setting "syncprt"
1218 * above.
1219 */
1220void
1221vfs_bufstats()
1222{
1223 int s, i, j, count;
37392cf8 1224 register struct buf *bp;
e3249ec0 1225 register struct queue_entry *dp;
aa95c6fc
KM
1226 int counts[MAXBSIZE/CLBYTES+1];
1227 static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
1228
37392cf8 1229 for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
aa95c6fc
KM
1230 count = 0;
1231 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
1232 counts[j] = 0;
1233 s = splbio();
e3249ec0 1234 for (bp = dp->qe_next; bp; bp = bp->b_freelist.qe_next) {
aa95c6fc
KM
1235 counts[bp->b_bufsize/CLBYTES]++;
1236 count++;
1237 }
1238 splx(s);
1239 printf("%s: total-%d", bname[i], count);
1240 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
1241 if (counts[j] != 0)
1242 printf(", %d-%d", j * CLBYTES, counts[j]);
1243 printf("\n");
1244 }
1245}
1246#endif /* DIAGNOSTIC */