use definitions of user and super-user changeable flags
[unix-history] / usr / src / sys / kern / vfs_cluster.c
CommitLineData
5dc2581e
KB
1/*-
2 * Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
7188ac27 3 * All rights reserved.
da7c5cc6 4 *
217c3be4
KM
5 * This module is believed to contain source code proprietary to AT&T.
6 * Use and redistribution is subject to the Berkeley Software License
7 * Agreement and your Software Agreement with AT&T (Western Electric).
7188ac27 8 *
c5e0ddad 9 * @(#)vfs_cluster.c 7.59 (Berkeley) %G%
da7c5cc6 10 */
961945a8 11
251f56ba
KB
12#include <sys/param.h>
13#include <sys/proc.h>
14#include <sys/buf.h>
15#include <sys/vnode.h>
251f56ba
KB
16#include <sys/mount.h>
17#include <sys/trace.h>
18#include <sys/resourcevar.h>
37392cf8
KM
19#include <sys/malloc.h>
20#include <libkern/libkern.h>
21
22/*
23 * Definitions for the buffer hash lists.
24 */
25#define BUFHASH(dvp, lbn) \
26 (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
e3249ec0 27struct list_entry *bufhashtbl, invalhash;
37392cf8
KM
28u_long bufhash;
29
30/*
31 * Insq/Remq for the buffer hash lists.
32 */
e3249ec0
KM
33#define binshash(bp, dp) list_enter_head(dp, bp, struct buf *, b_hash)
34#define bremhash(bp) list_remove(bp, struct buf *, b_hash)
37392cf8
KM
35
36/*
37 * Definitions for the buffer free lists.
38 */
39#define BQUEUES 4 /* number of free buffer queues */
40
41#define BQ_LOCKED 0 /* super-blocks &c */
42#define BQ_LRU 1 /* lru, useful buffers */
43#define BQ_AGE 2 /* rubbish */
44#define BQ_EMPTY 3 /* buffer headers with no memory */
45
e3249ec0 46struct queue_entry bufqueues[BQUEUES];
37392cf8
KM
47int needbuffer;
48
49/*
50 * Insq/Remq for the buffer free lists.
51 */
e3249ec0
KM
52#define binsheadfree(bp, dp) \
53 queue_enter_head(dp, bp, struct buf *, b_freelist)
54#define binstailfree(bp, dp) \
55 queue_enter_tail(dp, bp, struct buf *, b_freelist)
56
888c761e
MS
57/*
58 * Local declarations
59 */
60struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t,
61 daddr_t, long, int));
62struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,
63 daddr_t, daddr_t, long, int, long));
64void cluster_wbuild __P((struct vnode *, struct buf *, long size,
65 daddr_t start_lbn, int len, daddr_t lbn));
66
37392cf8
KM
67void
68bremfree(bp)
69 struct buf *bp;
70{
e3249ec0 71 struct queue_entry *dp;
37392cf8 72
e3249ec0
KM
73 /*
74 * We only calculate the head of the freelist when removing
75 * the last element of the list as that is the only time that
76 * it is needed (e.g. to reset the tail pointer).
77 */
78 if (bp->b_freelist.qe_next == NULL) {
37392cf8 79 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
e3249ec0 80 if (dp->qe_prev == &bp->b_freelist.qe_next)
37392cf8
KM
81 break;
82 if (dp == &bufqueues[BQUEUES])
83 panic("bremfree: lost tail");
37392cf8 84 }
e3249ec0 85 queue_remove(dp, bp, struct buf *, b_freelist);
37392cf8 86}
663dbc72 87
e7db227e
MK
88/*
89 * Initialize buffers and hash links for buffers.
90 */
251f56ba 91void
e7db227e
MK
92bufinit()
93{
37392cf8 94 register struct buf *bp;
e3249ec0 95 struct queue_entry *dp;
e7db227e 96 register int i;
e7db227e
MK
97 int base, residual;
98
37392cf8 99 for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
e3249ec0
KM
100 queue_init(dp);
101 bufhashtbl = (struct list_entry *)hashinit(nbuf, M_CACHE, &bufhash);
e7db227e
MK
102 base = bufpages / nbuf;
103 residual = bufpages % nbuf;
104 for (i = 0; i < nbuf; i++) {
105 bp = &buf[i];
37392cf8 106 bzero((char *)bp, sizeof *bp);
e7db227e 107 bp->b_dev = NODEV;
e7db227e
MK
108 bp->b_rcred = NOCRED;
109 bp->b_wcred = NOCRED;
e7db227e
MK
110 bp->b_un.b_addr = buffers + i * MAXBSIZE;
111 if (i < residual)
112 bp->b_bufsize = (base + 1) * CLBYTES;
113 else
114 bp->b_bufsize = base * CLBYTES;
31222d0d 115 bp->b_flags = B_INVAL;
37392cf8 116 dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
31222d0d 117 binsheadfree(bp, dp);
37392cf8 118 binshash(bp, &invalhash);
e7db227e
MK
119 }
120}
121
663dbc72 122/*
d42a4811
KM
123 * Find the block in the buffer pool.
124 * If the buffer is not present, allocate a new buffer and load
125 * its contents according to the filesystem fill routine.
663dbc72 126 */
a937f856 127bread(vp, blkno, size, cred, bpp)
7188ac27 128 struct vnode *vp;
ad30fb67
KM
129 daddr_t blkno;
130 int size;
a937f856 131 struct ucred *cred;
7188ac27 132 struct buf **bpp;
ec67a3ce
MK
133#ifdef SECSIZE
134 long secsize;
135#endif SECSIZE
663dbc72 136{
3789a403 137 struct proc *p = curproc; /* XXX */
663dbc72
BJ
138 register struct buf *bp;
139
4f083fd7
SL
140 if (size == 0)
141 panic("bread: size 0");
ec67a3ce
MK
142#ifdef SECSIZE
143 bp = getblk(dev, blkno, size, secsize);
144#else SECSIZE
e140149a 145 *bpp = bp = getblk(vp, blkno, size, 0, 0);
ec67a3ce 146#endif SECSIZE
d42a4811 147 if (bp->b_flags & (B_DONE | B_DELWRI)) {
c5a600cf 148 trace(TR_BREADHIT, pack(vp, size), blkno);
7188ac27 149 return (0);
663dbc72
BJ
150 }
151 bp->b_flags |= B_READ;
4f083fd7
SL
152 if (bp->b_bcount > bp->b_bufsize)
153 panic("bread");
a937f856
KM
154 if (bp->b_rcred == NOCRED && cred != NOCRED) {
155 crhold(cred);
156 bp->b_rcred = cred;
157 }
7188ac27 158 VOP_STRATEGY(bp);
c5a600cf 159 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 160 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7188ac27 161 return (biowait(bp));
663dbc72
BJ
162}
163
164/*
bb1626f7
KM
165 * Operates like bread, but also starts I/O on the N specified
166 * read-ahead blocks.
663dbc72 167 */
bb1626f7 168breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp)
7188ac27 169 struct vnode *vp;
84baaab3 170 daddr_t blkno; int size;
ec67a3ce
MK
171#ifdef SECSIZE
172 long secsize;
173#endif SECSIZE
bb1626f7
KM
174 daddr_t rablkno[]; int rabsize[];
175 int num;
a937f856 176 struct ucred *cred;
7188ac27 177 struct buf **bpp;
663dbc72 178{
3789a403 179 struct proc *p = curproc; /* XXX */
663dbc72 180 register struct buf *bp, *rabp;
bb1626f7 181 register int i;
663dbc72
BJ
182
183 bp = NULL;
3efdd860 184 /*
d42a4811
KM
185 * If the block is not memory resident,
186 * allocate a buffer and start I/O.
3efdd860 187 */
7188ac27 188 if (!incore(vp, blkno)) {
e140149a 189 *bpp = bp = getblk(vp, blkno, size, 0, 0);
ec67a3ce 190#endif SECSIZE
d42a4811 191 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
663dbc72 192 bp->b_flags |= B_READ;
4f083fd7 193 if (bp->b_bcount > bp->b_bufsize)
bb1626f7 194 panic("breadn");
a937f856
KM
195 if (bp->b_rcred == NOCRED && cred != NOCRED) {
196 crhold(cred);
197 bp->b_rcred = cred;
198 }
7188ac27 199 VOP_STRATEGY(bp);
c5a600cf 200 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 201 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7d1e9cf4 202 } else {
c5a600cf 203 trace(TR_BREADHIT, pack(vp, size), blkno);
7d1e9cf4 204 }
663dbc72 205 }
3efdd860
KM
206
207 /*
bb1626f7
KM
208 * If there's read-ahead block(s), start I/O
209 * on them also (as above).
3efdd860 210 */
bb1626f7
KM
211 for (i = 0; i < num; i++) {
212 if (incore(vp, rablkno[i]))
213 continue;
e140149a 214 rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0);
ec67a3ce 215#endif SECSIZE
d42a4811 216 if (rabp->b_flags & (B_DONE | B_DELWRI)) {
663dbc72 217 brelse(rabp);
bb1626f7 218 trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]);
973ecc4f 219 } else {
d42a4811 220 rabp->b_flags |= B_ASYNC | B_READ;
4f083fd7
SL
221 if (rabp->b_bcount > rabp->b_bufsize)
222 panic("breadrabp");
5062ac4a 223 if (rabp->b_rcred == NOCRED && cred != NOCRED) {
a937f856 224 crhold(cred);
5062ac4a 225 rabp->b_rcred = cred;
a937f856 226 }
7188ac27 227 VOP_STRATEGY(rabp);
bb1626f7 228 trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]);
3789a403 229 p->p_stats->p_ru.ru_inblock++; /* pay in advance */
663dbc72
BJ
230 }
231 }
3efdd860
KM
232
233 /*
d42a4811
KM
234 * If block was memory resident, let bread get it.
235 * If block was not memory resident, the read was
236 * started above, so just wait for the read to complete.
3efdd860 237 */
84baaab3 238 if (bp == NULL)
ec67a3ce
MK
239#ifdef SECSIZE
240 return (bread(dev, blkno, size, secsize));
241#else SECSIZE
a937f856 242 return (bread(vp, blkno, size, cred, bpp));
7188ac27 243 return (biowait(bp));
663dbc72
BJ
244}
245
888c761e
MS
246/*
247 * We could optimize this by keeping track of where the last read-ahead
248 * was, but it would involve adding fields to the vnode. For now, let's
249 * just get it working.
250 *
251 * This replaces bread. If this is a bread at the beginning of a file and
252 * lastr is 0, we assume this is the first read and we'll read up to two
253 * blocks if they are sequential. After that, we'll do regular read ahead
254 * in clustered chunks.
255 *
256 * There are 4 or 5 cases depending on how you count:
257 * Desired block is in the cache:
258 * 1 Not sequential access (0 I/Os).
259 * 2 Access is sequential, do read-ahead (1 ASYNC).
260 * Desired block is not in cache:
261 * 3 Not sequential access (1 SYNC).
262 * 4 Sequential access, next block is contiguous (1 SYNC).
263 * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC)
264 *
265 * There are potentially two buffers that require I/O.
266 * bp is the block requested.
267 * rbp is the read-ahead block.
268 * If either is NULL, then you don't have to do the I/O.
269 */
270cluster_read(vp, filesize, lblkno, size, cred, bpp)
271 struct vnode *vp;
272 u_quad_t filesize;
273 daddr_t lblkno;
274 long size;
275 struct ucred *cred;
276 struct buf **bpp;
277{
278 struct buf *bp, *rbp;
279 daddr_t blkno, ioblkno;
280 long flags;
281 int error, num_ra, alreadyincore;
282
283#ifdef DIAGNOSTIC
284 if (size == 0)
285 panic("cluster_read: size = 0");
286#endif
287
288 error = 0;
289 flags = B_READ;
e140149a 290 *bpp = bp = getblk(vp, lblkno, size, 0, 0);
888c761e
MS
291 if (bp->b_flags & (B_CACHE | B_DONE | B_DELWRI)) {
292 /*
293 * Desired block is in cache; do any readahead ASYNC.
294 * Case 1, 2.
295 */
296 trace(TR_BREADHIT, pack(vp, size), lblkno);
297 flags |= B_ASYNC;
298 ioblkno = lblkno +
299 (lblkno < vp->v_ralen ? vp->v_ralen >> 1 : vp->v_ralen);
e140149a 300 alreadyincore = (int)incore(vp, ioblkno);
888c761e
MS
301 bp = NULL;
302 } else {
303 /* Block wasn't in cache, case 3, 4, 5. */
304 trace(TR_BREADMISS, pack(vp, size), lblkno);
305 ioblkno = lblkno;
306 bp->b_flags |= flags;
307 alreadyincore = 0;
308 curproc->p_stats->p_ru.ru_inblock++; /* XXX */
309 }
310 /*
311 * XXX
312 * Replace 1 with a window size based on some permutation of
313 * maxcontig and rot_delay. This will let you figure out how
314 * many blocks you should read-ahead (case 2, 4, 5).
315 *
316 * If the access isn't sequential, cut the window size in half.
317 */
318 rbp = NULL;
319 if (lblkno != vp->v_lastr + 1 && lblkno != 0)
320 vp->v_ralen = max(vp->v_ralen >> 1, 1);
321 else if ((ioblkno + 1) * size < filesize && !alreadyincore &&
322 !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra))) {
323 /*
324 * Reading sequentially, and the next block is not in the
325 * cache. We are going to try reading ahead. If this is
326 * the first read of a file, then limit read-ahead to a
327 * single block, else read as much as we're allowed.
328 */
329 if (num_ra > vp->v_ralen) {
330 num_ra = vp->v_ralen;
331 vp->v_ralen = min(MAXPHYS / size, vp->v_ralen << 1);
332 } else
333 vp->v_ralen = num_ra + 1;
334
335
336 if (num_ra) /* case 2, 4 */
337 rbp = cluster_rbuild(vp, filesize,
338 bp, ioblkno, blkno, size, num_ra, flags);
339 else if (lblkno != 0 && ioblkno == lblkno) {
340 /* Case 5: check how many blocks to read ahead */
341 ++ioblkno;
342 if ((ioblkno + 1) * size > filesize ||
343 (error = VOP_BMAP(vp,
344 ioblkno, NULL, &blkno, &num_ra)))
345 goto skip_readahead;
346 flags |= B_ASYNC;
347 if (num_ra)
348 rbp = cluster_rbuild(vp, filesize,
349 NULL, ioblkno, blkno, size, num_ra, flags);
350 else {
e140149a 351 rbp = getblk(vp, ioblkno, size, 0, 0);
888c761e
MS
352 rbp->b_flags |= flags;
353 rbp->b_blkno = blkno;
354 }
355 } else if (lblkno != 0) {
356 /* case 2; read ahead single block */
e140149a 357 rbp = getblk(vp, ioblkno, size, 0, 0);
888c761e
MS
358 rbp->b_flags |= flags;
359 rbp->b_blkno = blkno;
360 } else if (bp) /* case 1, 3, block 0 */
361 bp->b_blkno = blkno;
362 /* Case 1 on block 0; not really doing sequential I/O */
363
364 if (rbp == bp) /* case 4 */
365 rbp = NULL;
366 else if (rbp) { /* case 2, 5 */
367 trace(TR_BREADMISSRA,
368 pack(vp, (num_ra + 1) * size), ioblkno);
369 curproc->p_stats->p_ru.ru_inblock++; /* XXX */
370 }
371 }
372
373 /* XXX Kirk, do we need to make sure the bp has creds? */
374skip_readahead:
375 if (bp)
376 if (bp->b_flags & (B_DONE | B_DELWRI))
377 panic("cluster_read: DONE bp");
378 else
379 error = VOP_STRATEGY(bp);
380
381 if (rbp)
382 if (error || rbp->b_flags & (B_DONE | B_DELWRI)) {
383 rbp->b_flags &= ~(B_ASYNC | B_READ);
384 brelse(rbp);
385 } else
386 (void) VOP_STRATEGY(rbp);
387
388 if (bp)
389 return(biowait(bp));
390 return(error);
391}
392
393/*
394 * If blocks are contiguous on disk, use this to provide clustered
395 * read ahead. We will read as many blocks as possible sequentially
396 * and then parcel them up into logical blocks in the buffer hash table.
397 */
398struct buf *
399cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
400 struct vnode *vp;
401 u_quad_t filesize;
402 struct buf *bp;
403 daddr_t lbn;
404 daddr_t blkno;
405 long size;
406 int run;
407 long flags;
408{
409 struct cluster_save *b_save;
410 struct buf *tbp;
411 daddr_t bn;
412 int i, inc;
413
c5e0ddad
MS
414#ifdef DIAGNOSTIC
415 if (size != vp->v_mount->mnt_stat.f_iosize)
416 panic("cluster_rbuild: size %d != filesize %d\n",
417 size, vp->v_mount->mnt_stat.f_iosize);
418#endif
888c761e
MS
419 if (size * (lbn + run + 1) > filesize)
420 --run;
421 if (run == 0) {
422 if (!bp) {
e140149a 423 bp = getblk(vp, lbn, size, 0, 0);
888c761e
MS
424 bp->b_blkno = blkno;
425 bp->b_flags |= flags;
426 }
427 return(bp);
428 }
429
430 bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1);
431 if (bp->b_flags & (B_DONE | B_DELWRI))
432 return (bp);
433
434 b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),
435 M_SEGMENT, M_WAITOK);
436 b_save->bs_bufsize = b_save->bs_bcount = size;
437 b_save->bs_nchildren = 0;
438 b_save->bs_children = (struct buf **)(b_save + 1);
439 b_save->bs_saveaddr = bp->b_saveaddr;
440 bp->b_saveaddr = (caddr_t) b_save;
441
442 inc = size / DEV_BSIZE;
443 for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) {
444 if (incore(vp, lbn + i)) {
445 if (i == 1) {
446 bp->b_saveaddr = b_save->bs_saveaddr;
447 bp->b_flags &= ~B_CALL;
448 bp->b_iodone = NULL;
449 allocbuf(bp, size);
450 free(b_save, M_SEGMENT);
451 } else
452 allocbuf(bp, size * i);
453 break;
454 }
e140149a 455 tbp = getblk(vp, lbn + i, 0, 0, 0);
888c761e
MS
456 tbp->b_bcount = tbp->b_bufsize = size;
457 tbp->b_blkno = bn;
458 tbp->b_flags |= flags | B_READ | B_ASYNC;
459 ++b_save->bs_nchildren;
460 b_save->bs_children[i - 1] = tbp;
461 }
462 if (!(bp->b_flags & B_ASYNC))
463 vp->v_ralen = max(vp->v_ralen - 1, 1);
464 return(bp);
465}
466
467/*
468 * Either get a new buffer or grow the existing one.
469 */
470struct buf *
471cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run)
472 struct vnode *vp;
473 struct buf *bp;
474 long flags;
475 daddr_t blkno;
476 daddr_t lblkno;
477 long size;
478 int run;
479{
480 if (!bp) {
e140149a 481 bp = getblk(vp, lblkno, size, 0, 0);
888c761e
MS
482 if (bp->b_flags & (B_DONE | B_DELWRI)) {
483 bp->b_blkno = blkno;
484 return(bp);
485 }
486 }
487 allocbuf(bp, run * size);
488 bp->b_blkno = blkno;
489 bp->b_iodone = cluster_callback;
490 bp->b_flags |= flags | B_CALL;
491 return(bp);
492}
493
494/*
495 * Cleanup after a clustered read or write.
496 */
497void
498cluster_callback(bp)
499 struct buf *bp;
500{
501 struct cluster_save *b_save;
502 struct buf **tbp;
503 long bsize;
504 caddr_t cp;
888c761e
MS
505 b_save = (struct cluster_save *)(bp->b_saveaddr);
506 bp->b_saveaddr = b_save->bs_saveaddr;
507
508 cp = bp->b_un.b_addr + b_save->bs_bufsize;
509 for (tbp = b_save->bs_children; b_save->bs_nchildren--; ++tbp) {
510 pagemove(cp, (*tbp)->b_un.b_addr, (*tbp)->b_bufsize);
511 cp += (*tbp)->b_bufsize;
512 bp->b_bufsize -= (*tbp)->b_bufsize;
513 biodone(*tbp);
514 }
515#ifdef DIAGNOSTIC
516 if (bp->b_bufsize != b_save->bs_bufsize)
517 panic ("cluster_callback: more space to reclaim");
518#endif
519 bp->b_bcount = bp->b_bufsize;
520 bp->b_iodone = NULL;
521 free(b_save, M_SEGMENT);
522 if (bp->b_flags & B_ASYNC)
523 brelse(bp);
524 else
525 wakeup((caddr_t)bp);
526}
527
663dbc72 528/*
d42a4811
KM
529 * Synchronous write.
530 * Release buffer on completion.
663dbc72
BJ
531 */
532bwrite(bp)
3efdd860 533 register struct buf *bp;
663dbc72 534{
3789a403 535 struct proc *p = curproc; /* XXX */
7188ac27 536 register int flag;
31222d0d 537 int s, error = 0;
663dbc72
BJ
538
539 flag = bp->b_flags;
f844ee62 540 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
77dc8a8c
KM
541 if (flag & B_ASYNC) {
542 if ((flag & B_DELWRI) == 0)
543 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
544 else
545 reassignbuf(bp, bp->b_vp);
546 }
c5a600cf 547 trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
4f083fd7
SL
548 if (bp->b_bcount > bp->b_bufsize)
549 panic("bwrite");
86e7dd3b 550 s = splbio();
c669f646 551 bp->b_vp->v_numoutput++;
e140149a 552 bp->b_flags |= B_WRITEINPROG;
86e7dd3b 553 splx(s);
7188ac27 554 VOP_STRATEGY(bp);
3efdd860
KM
555
556 /*
d42a4811 557 * If the write was synchronous, then await I/O completion.
3efdd860 558 * If the write was "delayed", then we put the buffer on
d42a4811 559 * the queue of blocks awaiting I/O completion status.
3efdd860 560 */
d42a4811 561 if ((flag & B_ASYNC) == 0) {
7188ac27 562 error = biowait(bp);
77dc8a8c
KM
563 if ((flag&B_DELWRI) == 0)
564 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
565 else
566 reassignbuf(bp, bp->b_vp);
e140149a
KM
567 if (bp->b_flags & B_EINTR) {
568 bp->b_flags &= ~B_EINTR;
569 error = EINTR;
570 }
663dbc72 571 brelse(bp);
7188ac27 572 } else if (flag & B_DELWRI) {
31222d0d 573 s = splbio();
663dbc72 574 bp->b_flags |= B_AGE;
31222d0d 575 splx(s);
7188ac27
KM
576 }
577 return (error);
663dbc72
BJ
578}
579
80746147
JH
580int
581vn_bwrite(ap)
582 struct vop_bwrite_args *ap;
583{
37392cf8 584 return (bwrite(ap->a_bp));
80746147
JH
585}
586
587
663dbc72 588/*
d42a4811
KM
589 * Delayed write.
590 *
591 * The buffer is marked dirty, but is not queued for I/O.
592 * This routine should be used when the buffer is expected
593 * to be modified again soon, typically a small write that
594 * partially fills a buffer.
595 *
596 * NB: magnetic tapes cannot be delayed; they must be
597 * written in the order that the writes are requested.
663dbc72
BJ
598 */
599bdwrite(bp)
3efdd860 600 register struct buf *bp;
663dbc72 601{
3789a403 602 struct proc *p = curproc; /* XXX */
663dbc72 603
c669f646
KM
604 if ((bp->b_flags & B_DELWRI) == 0) {
605 bp->b_flags |= B_DELWRI;
606 reassignbuf(bp, bp->b_vp);
3789a403 607 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
c669f646 608 }
7188ac27 609 /*
edadbc2c 610 * If this is a tape drive, the write must be initiated.
7188ac27 611 */
ec67a3ce 612 if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
663dbc72 613 bawrite(bp);
edadbc2c 614 } else {
d42a4811 615 bp->b_flags |= (B_DONE | B_DELWRI);
663dbc72
BJ
616 brelse(bp);
617 }
618}
619
620/*
d42a4811
KM
621 * Asynchronous write.
622 * Start I/O on a buffer, but do not wait for it to complete.
623 * The buffer is released when the I/O completes.
663dbc72
BJ
624 */
625bawrite(bp)
3efdd860 626 register struct buf *bp;
663dbc72
BJ
627{
628
d42a4811
KM
629 /*
630 * Setting the ASYNC flag causes bwrite to return
631 * after starting the I/O.
632 */
663dbc72 633 bp->b_flags |= B_ASYNC;
e140149a 634 (void) VOP_BWRITE(bp);
663dbc72
BJ
635}
636
888c761e
MS
637/*
638 * Do clustered write for FFS.
639 *
640 * Three cases:
641 * 1. Write is not sequential (write asynchronously)
642 * Write is sequential:
643 * 2. beginning of cluster - begin cluster
644 * 3. middle of a cluster - add to cluster
645 * 4. end of a cluster - asynchronously write cluster
646 */
647void
648cluster_write(bp, filesize)
649 struct buf *bp;
650 u_quad_t filesize;
651{
652 struct vnode *vp;
653 daddr_t lbn;
c5e0ddad 654 int clen;
888c761e
MS
655
656 vp = bp->b_vp;
657 lbn = bp->b_lblkno;
888c761e 658
c5e0ddad
MS
659 /* Initialize vnode to beginning of file. */
660 if (lbn == 0)
661 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
662
663 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
664 (bp->b_blkno != vp->v_lasta + bp->b_bcount / DEV_BSIZE)) {
888c761e
MS
665 if (vp->v_clen != 0)
666 /*
667 * Write is not sequential.
668 */
669 cluster_wbuild(vp, NULL, bp->b_bcount, vp->v_cstart,
670 vp->v_lastw - vp->v_cstart + 1, lbn);
671 /*
672 * Consider beginning a cluster.
673 */
c5e0ddad
MS
674 if ((lbn + 1) * bp->b_bcount == filesize)
675 /* End of file, make cluster as large as possible */
676 clen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1;
677 else if (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &clen)) {
888c761e 678 bawrite(bp);
c5e0ddad
MS
679 vp->v_clen = 0;
680 vp->v_lasta = bp->b_blkno;
888c761e
MS
681 vp->v_cstart = lbn + 1;
682 vp->v_lastw = lbn;
683 return;
c5e0ddad
MS
684 } else
685 clen = 0;
888c761e
MS
686 vp->v_clen = clen;
687 if (clen == 0) { /* I/O not contiguous */
688 vp->v_cstart = lbn + 1;
689 bawrite(bp);
690 } else { /* Wait for rest of cluster */
691 vp->v_cstart = lbn;
692 bdwrite(bp);
693 }
694 } else if (lbn == vp->v_cstart + vp->v_clen) {
695 /*
696 * At end of cluster, write it out.
697 */
698 cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart,
699 vp->v_clen + 1, lbn);
700 vp->v_clen = 0;
701 vp->v_cstart = lbn + 1;
702 } else
703 /*
704 * In the middle of a cluster, so just delay the
705 * I/O for now.
706 */
707 bdwrite(bp);
708 vp->v_lastw = lbn;
c5e0ddad 709 vp->v_lasta = bp->b_blkno;
888c761e
MS
710}
711
712
713/*
714 * This is an awful lot like cluster_rbuild...wish they could be combined.
715 * The last lbn argument is the current block on which I/O is being
716 * performed. Check to see that it doesn't fall in the middle of
717 * the current block.
718 */
719void
720cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
721 struct vnode *vp;
722 struct buf *last_bp;
723 long size;
724 daddr_t start_lbn;
725 int len;
726 daddr_t lbn;
727{
728 struct cluster_save *b_save;
729 struct buf *bp, *tbp;
730 caddr_t cp;
731 int i, s;
732
c5e0ddad
MS
733#ifdef DIAGNOSTIC
734 if (size != vp->v_mount->mnt_stat.f_iosize)
735 panic("cluster_wbuild: size %d != filesize %d\n",
736 size, vp->v_mount->mnt_stat.f_iosize);
737#endif
888c761e
MS
738redo:
739 while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) {
740 ++start_lbn;
741 --len;
742 }
743
744 /* Get more memory for current buffer */
745 if (len <= 1) {
c5e0ddad 746 if (last_bp) {
888c761e 747 bawrite(last_bp);
c5e0ddad
MS
748 } else if (len) {
749 bp = getblk(vp, start_lbn, size, 0, 0);
750 bawrite(bp);
751 }
888c761e
MS
752 return;
753 }
754
e140149a 755 bp = getblk(vp, start_lbn, size, 0, 0);
888c761e
MS
756 if (!(bp->b_flags & B_DELWRI)) {
757 ++start_lbn;
758 --len;
759 brelse(bp);
760 goto redo;
761 }
762
763 --len;
764 b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save),
765 M_SEGMENT, M_WAITOK);
766 b_save->bs_bcount = bp->b_bcount;
767 b_save->bs_bufsize = bp->b_bufsize;
768 b_save->bs_nchildren = 0;
769 b_save->bs_children = (struct buf **)(b_save + 1);
770 b_save->bs_saveaddr = bp->b_saveaddr;
771 bp->b_saveaddr = (caddr_t) b_save;
772
773
774 bp->b_flags |= B_CALL;
775 bp->b_iodone = cluster_callback;
776 cp = bp->b_un.b_addr + bp->b_bufsize;
777 for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) {
778 if (!incore(vp, start_lbn) || start_lbn == lbn)
779 break;
780
781 if (last_bp == NULL || start_lbn != last_bp->b_lblkno) {
e140149a 782 tbp = getblk(vp, start_lbn, size, 0, 0);
888c761e
MS
783#ifdef DIAGNOSTIC
784 if (tbp->b_bcount != tbp->b_bufsize)
785 panic("cluster_wbuild: Buffer too big");
786#endif
787 if (!(tbp->b_flags & B_DELWRI)) {
788 brelse(tbp);
789 break;
790 }
791 } else
792 tbp = last_bp;
793
794 ++b_save->bs_nchildren;
795
796 /* Move memory from children to parent */
c5e0ddad
MS
797 if (tbp->b_blkno != (bp->b_blkno + bp->b_bufsize / DEV_BSIZE)) {
798 printf("Clustered Block: %d addr %x bufsize: %d\n",
799 bp->b_lblkno, bp->b_blkno, bp->b_bufsize);
800 printf("Child Block: %d addr: %x\n", tbp->b_lblkno,
801 tbp->b_blkno);
802 panic("Clustered write to wrong blocks");
803 }
804
888c761e
MS
805 pagemove(tbp->b_un.b_daddr, cp, size);
806 bp->b_bcount += size;
807 bp->b_bufsize += size;
808
809 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
810 tbp->b_flags |= B_ASYNC;
811 s = splbio();
812 reassignbuf(tbp, tbp->b_vp); /* put on clean list */
813 ++tbp->b_vp->v_numoutput;
814 splx(s);
815 b_save->bs_children[i] = tbp;
816
817 cp += tbp->b_bufsize;
818 }
819
820 if (i == 0) {
821 /* None to cluster */
822 bp->b_saveaddr = b_save->bs_saveaddr;
823 bp->b_flags &= ~B_CALL;
824 bp->b_iodone = NULL;
825 free(b_save, M_SEGMENT);
826 }
827 bawrite(bp);
828 if (i < len) {
829 len -= i + 1;
830 start_lbn += 1;
831 goto redo;
832 }
833}
834
663dbc72 835/*
d42a4811
KM
836 * Release a buffer.
837 * Even if the buffer is dirty, no I/O is started.
663dbc72
BJ
838 */
839brelse(bp)
3efdd860 840 register struct buf *bp;
663dbc72 841{
e3249ec0 842 register struct queue_entry *flist;
d42a4811 843 int s;
663dbc72 844
c5a600cf 845 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
3efdd860 846 /*
edadbc2c
KM
847 * If a process is waiting for the buffer, or
848 * is waiting for a free buffer, awaken it.
3efdd860 849 */
d42a4811 850 if (bp->b_flags & B_WANTED)
663dbc72 851 wakeup((caddr_t)bp);
37392cf8
KM
852 if (needbuffer) {
853 needbuffer = 0;
854 wakeup((caddr_t)&needbuffer);
663dbc72 855 }
edadbc2c
KM
856 /*
857 * Retry I/O for locked buffers rather than invalidating them.
858 */
31222d0d 859 s = splbio();
edadbc2c
KM
860 if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
861 bp->b_flags &= ~B_ERROR;
edadbc2c
KM
862 /*
863 * Disassociate buffers that are no longer valid.
864 */
d42a4811 865 if (bp->b_flags & (B_NOCACHE | B_ERROR))
7188ac27 866 bp->b_flags |= B_INVAL;
d42a4811 867 if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
edadbc2c
KM
868 if (bp->b_vp)
869 brelvp(bp);
870 bp->b_flags &= ~B_DELWRI;
7188ac27 871 }
3efdd860
KM
872 /*
873 * Stick the buffer back on a free list.
874 */
4f083fd7
SL
875 if (bp->b_bufsize <= 0) {
876 /* block has no buffer ... put at front of unused buffer list */
37392cf8 877 flist = &bufqueues[BQ_EMPTY];
4f083fd7 878 binsheadfree(bp, flist);
d42a4811 879 } else if (bp->b_flags & (B_ERROR | B_INVAL)) {
46387ee3 880 /* block has no info ... put at front of most free list */
37392cf8 881 flist = &bufqueues[BQ_AGE];
3efdd860 882 binsheadfree(bp, flist);
663dbc72 883 } else {
46387ee3 884 if (bp->b_flags & B_LOCKED)
37392cf8 885 flist = &bufqueues[BQ_LOCKED];
46387ee3 886 else if (bp->b_flags & B_AGE)
37392cf8 887 flist = &bufqueues[BQ_AGE];
46387ee3 888 else
37392cf8 889 flist = &bufqueues[BQ_LRU];
3efdd860 890 binstailfree(bp, flist);
663dbc72 891 }
d42a4811 892 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
663dbc72
BJ
893 splx(s);
894}
895
896/*
d42a4811 897 * Check to see if a block is currently memory resident.
663dbc72 898 */
e140149a 899struct buf *
7188ac27
KM
900incore(vp, blkno)
901 struct vnode *vp;
3efdd860 902 daddr_t blkno;
663dbc72
BJ
903{
904 register struct buf *bp;
663dbc72 905
e3249ec0 906 for (bp = BUFHASH(vp, blkno)->le_next; bp; bp = bp->b_hash.qe_next)
edadbc2c 907 if (bp->b_lblkno == blkno && bp->b_vp == vp &&
3efdd860 908 (bp->b_flags & B_INVAL) == 0)
e140149a
KM
909 return (bp);
910 return (NULL);
663dbc72
BJ
911}
912
edadbc2c 913/*
d42a4811
KM
914 * Check to see if a block is currently memory resident.
915 * If it is resident, return it. If it is not resident,
916 * allocate a new buffer and assign it to the block.
663dbc72
BJ
917 */
918struct buf *
ec67a3ce
MK
919#ifdef SECSIZE
920getblk(dev, blkno, size, secsize)
921#else SECSIZE
e140149a 922getblk(vp, blkno, size, slpflag, slptimeo)
7188ac27 923 register struct vnode *vp;
ad30fb67 924 daddr_t blkno;
e140149a 925 int size, slpflag, slptimeo;
ec67a3ce
MK
926#ifdef SECSIZE
927 long secsize;
928#endif SECSIZE
663dbc72 929{
e3249ec0
KM
930 register struct buf *bp;
931 struct list_entry *dp;
e140149a 932 int s, error;
663dbc72 933
00a6a148
KM
934 if (size > MAXBSIZE)
935 panic("getblk: size too big");
3efdd860 936 /*
d42a4811
KM
937 * Search the cache for the block. If the buffer is found,
938 * but it is currently locked, the we must wait for it to
939 * become available.
3efdd860 940 */
7188ac27 941 dp = BUFHASH(vp, blkno);
3efdd860 942loop:
e3249ec0 943 for (bp = dp->le_next; bp; bp = bp->b_hash.qe_next) {
e140149a 944 if (bp->b_lblkno != blkno || bp->b_vp != vp)
663dbc72 945 continue;
a5e62f37 946 s = splbio();
d42a4811 947 if (bp->b_flags & B_BUSY) {
663dbc72 948 bp->b_flags |= B_WANTED;
e140149a
KM
949 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
950 "getblk", slptimeo);
23900030 951 splx(s);
e140149a
KM
952 if (error)
953 return (NULL);
663dbc72
BJ
954 goto loop;
955 }
e140149a
KM
956 /*
957 * The test for B_INVAL is moved down here, since there
958 * are cases where B_INVAL is set before VOP_BWRITE() is
959 * called and for NFS, the process cannot be allowed to
960 * allocate a new buffer for the same block until the write
961 * back to the server has been completed. (ie. B_BUSY clears)
962 */
963 if (bp->b_flags & B_INVAL) {
964 splx(s);
965 continue;
966 }
c669f646
KM
967 bremfree(bp);
968 bp->b_flags |= B_BUSY;
23900030 969 splx(s);
32a56bda 970 if (bp->b_bcount != size) {
edadbc2c
KM
971 printf("getblk: stray size");
972 bp->b_flags |= B_INVAL;
e140149a 973 VOP_BWRITE(bp);
9d6d37ce 974 goto loop;
edadbc2c 975 }
663dbc72 976 bp->b_flags |= B_CACHE;
a5e62f37 977 return (bp);
663dbc72 978 }
e140149a
KM
979 /*
980 * The loop back to the top when getnewbuf() fails is because
981 * stateless filesystems like NFS have no node locks. Thus,
982 * there is a slight chance that more than one process will
983 * try and getnewbuf() for the same block concurrently when
984 * the first sleeps in getnewbuf(). So after a sleep, go back
985 * up to the top to check the hash lists again.
986 */
987 if ((bp = getnewbuf(slpflag, slptimeo)) == 0)
988 goto loop;
3efdd860 989 bremhash(bp);
edadbc2c 990 bgetvp(vp, bp);
521a4688 991 bp->b_bcount = 0;
edadbc2c 992 bp->b_lblkno = blkno;
ec67a3ce
MK
993#ifdef SECSIZE
994 bp->b_blksize = secsize;
995#endif SECSIZE
ad30fb67 996 bp->b_blkno = blkno;
4f083fd7 997 bp->b_error = 0;
7188ac27
KM
998 bp->b_resid = 0;
999 binshash(bp, dp);
521a4688 1000 allocbuf(bp, size);
a5e62f37 1001 return (bp);
663dbc72
BJ
1002}
1003
1004/*
d42a4811
KM
1005 * Allocate a buffer.
1006 * The caller will assign it to a block.
663dbc72
BJ
1007 */
1008struct buf *
ad30fb67
KM
1009geteblk(size)
1010 int size;
663dbc72 1011{
37392cf8 1012 register struct buf *bp;
663dbc72 1013
00a6a148
KM
1014 if (size > MAXBSIZE)
1015 panic("geteblk: size too big");
e140149a
KM
1016 while ((bp = getnewbuf(0, 0)) == NULL)
1017 /* void */;
4f083fd7 1018 bp->b_flags |= B_INVAL;
3efdd860 1019 bremhash(bp);
37392cf8 1020 binshash(bp, &invalhash);
521a4688 1021 bp->b_bcount = 0;
ec67a3ce
MK
1022#ifdef SECSIZE
1023 bp->b_blksize = DEV_BSIZE;
1024#endif SECSIZE
4f083fd7 1025 bp->b_error = 0;
7188ac27 1026 bp->b_resid = 0;
521a4688 1027 allocbuf(bp, size);
a5e62f37 1028 return (bp);
663dbc72
BJ
1029}
1030
ad30fb67 1031/*
521a4688 1032 * Expand or contract the actual memory allocated to a buffer.
d42a4811 1033 * If no memory is available, release buffer and take error exit.
ad30fb67 1034 */
521a4688
KM
1035allocbuf(tp, size)
1036 register struct buf *tp;
ad30fb67
KM
1037 int size;
1038{
521a4688
KM
1039 register struct buf *bp, *ep;
1040 int sizealloc, take, s;
ad30fb67 1041
521a4688
KM
1042 sizealloc = roundup(size, CLBYTES);
1043 /*
1044 * Buffer size does not change
1045 */
1046 if (sizealloc == tp->b_bufsize)
1047 goto out;
1048 /*
1049 * Buffer size is shrinking.
1050 * Place excess space in a buffer header taken from the
1051 * BQ_EMPTY buffer list and placed on the "most free" list.
1052 * If no extra buffer headers are available, leave the
1053 * extra space in the present buffer.
1054 */
1055 if (sizealloc < tp->b_bufsize) {
e3249ec0 1056 if ((ep = bufqueues[BQ_EMPTY].qe_next) == NULL)
521a4688
KM
1057 goto out;
1058 s = splbio();
1059 bremfree(ep);
1060 ep->b_flags |= B_BUSY;
1061 splx(s);
1062 pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
1063 (int)tp->b_bufsize - sizealloc);
1064 ep->b_bufsize = tp->b_bufsize - sizealloc;
1065 tp->b_bufsize = sizealloc;
1066 ep->b_flags |= B_INVAL;
1067 ep->b_bcount = 0;
1068 brelse(ep);
1069 goto out;
1070 }
1071 /*
1072 * More buffer space is needed. Get it out of buffers on
1073 * the "most free" list, placing the empty headers on the
1074 * BQ_EMPTY buffer header list.
1075 */
1076 while (tp->b_bufsize < sizealloc) {
1077 take = sizealloc - tp->b_bufsize;
e140149a
KM
1078 while ((bp = getnewbuf(0, 0)) == NULL)
1079 /* void */;
521a4688
KM
1080 if (take >= bp->b_bufsize)
1081 take = bp->b_bufsize;
1082 pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
1083 &tp->b_un.b_addr[tp->b_bufsize], take);
1084 tp->b_bufsize += take;
1085 bp->b_bufsize = bp->b_bufsize - take;
1086 if (bp->b_bcount > bp->b_bufsize)
1087 bp->b_bcount = bp->b_bufsize;
1088 if (bp->b_bufsize <= 0) {
1089 bremhash(bp);
37392cf8 1090 binshash(bp, &invalhash);
d42a4811 1091 bp->b_dev = NODEV;
521a4688
KM
1092 bp->b_error = 0;
1093 bp->b_flags |= B_INVAL;
1094 }
1095 brelse(bp);
1096 }
1097out:
1098 tp->b_bcount = size;
1099 return (1);
4f083fd7
SL
1100}
1101
4f083fd7
SL
1102/*
1103 * Find a buffer which is available for use.
1104 * Select something from a free list.
1105 * Preference is to AGE list, then LRU list.
1106 */
1107struct buf *
e140149a
KM
1108getnewbuf(slpflag, slptimeo)
1109 int slpflag, slptimeo;
4f083fd7 1110{
37392cf8 1111 register struct buf *bp;
e3249ec0 1112 register struct queue_entry *dp;
a937f856 1113 register struct ucred *cred;
4f083fd7
SL
1114 int s;
1115
1116loop:
a5e62f37 1117 s = splbio();
37392cf8 1118 for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--)
e3249ec0 1119 if (dp->qe_next)
4f083fd7 1120 break;
37392cf8
KM
1121 if (dp == bufqueues) { /* no free blocks */
1122 needbuffer = 1;
e140149a
KM
1123 (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1),
1124 "getnewbuf", slptimeo);
4b7d506c 1125 splx(s);
e140149a 1126 return (NULL);
4f083fd7 1127 }
e3249ec0 1128 bp = dp->qe_next;
c669f646
KM
1129 bremfree(bp);
1130 bp->b_flags |= B_BUSY;
1131 splx(s);
4f083fd7 1132 if (bp->b_flags & B_DELWRI) {
033a786e 1133 (void) bawrite(bp);
4f083fd7
SL
1134 goto loop;
1135 }
c5a600cf 1136 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
edadbc2c
KM
1137 if (bp->b_vp)
1138 brelvp(bp);
a937f856
KM
1139 if (bp->b_rcred != NOCRED) {
1140 cred = bp->b_rcred;
1141 bp->b_rcred = NOCRED;
1142 crfree(cred);
1143 }
1144 if (bp->b_wcred != NOCRED) {
1145 cred = bp->b_wcred;
1146 bp->b_wcred = NOCRED;
1147 crfree(cred);
1148 }
4f083fd7 1149 bp->b_flags = B_BUSY;
1c89915d 1150 bp->b_dirtyoff = bp->b_dirtyend = 0;
bb1626f7 1151 bp->b_validoff = bp->b_validend = 0;
4f083fd7
SL
1152 return (bp);
1153}
1154
663dbc72 1155/*
d42a4811
KM
1156 * Wait for I/O to complete.
1157 *
1158 * Extract and return any errors associated with the I/O.
1159 * If the error flag is set, but no specific error is
1160 * given, return EIO.
663dbc72 1161 */
3efdd860 1162biowait(bp)
ad30fb67 1163 register struct buf *bp;
663dbc72 1164{
530d0032 1165 int s;
663dbc72 1166
a5e62f37 1167 s = splbio();
a937f856 1168 while ((bp->b_flags & B_DONE) == 0)
663dbc72 1169 sleep((caddr_t)bp, PRIBIO);
530d0032 1170 splx(s);
7188ac27
KM
1171 if ((bp->b_flags & B_ERROR) == 0)
1172 return (0);
1173 if (bp->b_error)
1174 return (bp->b_error);
1175 return (EIO);
663dbc72
BJ
1176}
1177
663dbc72 1178/*
af04ce66 1179 * Mark I/O complete on a buffer.
d42a4811
KM
1180 *
1181 * If a callback has been requested, e.g. the pageout
1182 * daemon, do so. Otherwise, awaken waiting processes.
663dbc72 1183 */
251f56ba 1184void
3efdd860
KM
1185biodone(bp)
1186 register struct buf *bp;
663dbc72 1187{
663dbc72 1188
80e7c811 1189 if (bp->b_flags & B_DONE)
3efdd860 1190 panic("dup biodone");
663dbc72 1191 bp->b_flags |= B_DONE;
76429560
KM
1192 if ((bp->b_flags & B_READ) == 0)
1193 vwakeup(bp);
961945a8
SL
1194 if (bp->b_flags & B_CALL) {
1195 bp->b_flags &= ~B_CALL;
1196 (*bp->b_iodone)(bp);
1197 return;
1198 }
d42a4811 1199 if (bp->b_flags & B_ASYNC)
663dbc72
BJ
1200 brelse(bp);
1201 else {
1202 bp->b_flags &= ~B_WANTED;
1203 wakeup((caddr_t)bp);
1204 }
1205}
aa95c6fc 1206
b5d79df9
MS
1207int
1208count_lock_queue()
1209{
1210 register struct buf *bp;
1211 register int ret;
1212
1213 for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].qe_next;
1214 bp; bp = (struct buf *)bp->b_freelist.qe_next)
1215 ++ret;
1216 return(ret);
1217}
1218
aa95c6fc
KM
1219#ifdef DIAGNOSTIC
1220/*
1221 * Print out statistics on the current allocation of the buffer pool.
1222 * Can be enabled to print out on every ``sync'' by setting "syncprt"
1223 * above.
1224 */
1225void
1226vfs_bufstats()
1227{
1228 int s, i, j, count;
37392cf8 1229 register struct buf *bp;
e3249ec0 1230 register struct queue_entry *dp;
aa95c6fc
KM
1231 int counts[MAXBSIZE/CLBYTES+1];
1232 static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" };
1233
37392cf8 1234 for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
aa95c6fc
KM
1235 count = 0;
1236 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
1237 counts[j] = 0;
1238 s = splbio();
e3249ec0 1239 for (bp = dp->qe_next; bp; bp = bp->b_freelist.qe_next) {
aa95c6fc
KM
1240 counts[bp->b_bufsize/CLBYTES]++;
1241 count++;
1242 }
1243 splx(s);
1244 printf("%s: total-%d", bname[i], count);
1245 for (j = 0; j <= MAXBSIZE/CLBYTES; j++)
1246 if (counts[j] != 0)
1247 printf(", %d-%d", j * CLBYTES, counts[j]);
1248 printf("\n");
1249 }
1250}
1251#endif /* DIAGNOSTIC */