MAXPARTITIONS enforced differently on i386; no need for bigger const
[unix-history] / usr / src / sys / kern / vfs_cluster.c
CommitLineData
da7c5cc6 1/*
7188ac27
KM
2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3 * All rights reserved.
da7c5cc6 4 *
dbf0c423 5 * %sccs.include.redist.c%
7188ac27 6 *
76429560 7 * @(#)vfs_cluster.c 7.38 (Berkeley) %G%
da7c5cc6 8 */
961945a8 9
94368568 10#include "param.h"
3789a403 11#include "proc.h"
94368568 12#include "buf.h"
7188ac27 13#include "vnode.h"
0f93ba7b 14#include "specdev.h"
edadbc2c 15#include "mount.h"
94368568 16#include "trace.h"
f28ea80c 17#include "resourcevar.h"
663dbc72 18
663dbc72 19/*
d42a4811
KM
20 * Find the block in the buffer pool.
21 * If the buffer is not present, allocate a new buffer and load
22 * its contents according to the filesystem fill routine.
663dbc72 23 */
a937f856 24bread(vp, blkno, size, cred, bpp)
7188ac27 25 struct vnode *vp;
ad30fb67
KM
26 daddr_t blkno;
27 int size;
a937f856 28 struct ucred *cred;
7188ac27 29 struct buf **bpp;
ec67a3ce
MK
30#ifdef SECSIZE
31 long secsize;
32#endif SECSIZE
663dbc72 33{
3789a403 34 struct proc *p = curproc; /* XXX */
663dbc72
BJ
35 register struct buf *bp;
36
4f083fd7
SL
37 if (size == 0)
38 panic("bread: size 0");
ec67a3ce
MK
39#ifdef SECSIZE
40 bp = getblk(dev, blkno, size, secsize);
41#else SECSIZE
7188ac27 42 *bpp = bp = getblk(vp, blkno, size);
ec67a3ce 43#endif SECSIZE
d42a4811 44 if (bp->b_flags & (B_DONE | B_DELWRI)) {
c5a600cf 45 trace(TR_BREADHIT, pack(vp, size), blkno);
7188ac27 46 return (0);
663dbc72
BJ
47 }
48 bp->b_flags |= B_READ;
4f083fd7
SL
49 if (bp->b_bcount > bp->b_bufsize)
50 panic("bread");
a937f856
KM
51 if (bp->b_rcred == NOCRED && cred != NOCRED) {
52 crhold(cred);
53 bp->b_rcred = cred;
54 }
7188ac27 55 VOP_STRATEGY(bp);
c5a600cf 56 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 57 p->p_stats->p_ru.ru_inblock++; /* pay for read */
7188ac27 58 return (biowait(bp));
663dbc72
BJ
59}
60
61/*
d42a4811
KM
62 * Operates like bread, but also starts I/O on the specified
63 * read-ahead block.
663dbc72 64 */
a937f856 65breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
7188ac27 66 struct vnode *vp;
84baaab3 67 daddr_t blkno; int size;
ec67a3ce
MK
68#ifdef SECSIZE
69 long secsize;
70#endif SECSIZE
a8d3bf7f 71 daddr_t rablkno; int rabsize;
a937f856 72 struct ucred *cred;
7188ac27 73 struct buf **bpp;
663dbc72 74{
3789a403 75 struct proc *p = curproc; /* XXX */
663dbc72
BJ
76 register struct buf *bp, *rabp;
77
78 bp = NULL;
3efdd860 79 /*
d42a4811
KM
80 * If the block is not memory resident,
81 * allocate a buffer and start I/O.
3efdd860 82 */
7188ac27
KM
83 if (!incore(vp, blkno)) {
84 *bpp = bp = getblk(vp, blkno, size);
ec67a3ce 85#endif SECSIZE
d42a4811 86 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
663dbc72 87 bp->b_flags |= B_READ;
4f083fd7
SL
88 if (bp->b_bcount > bp->b_bufsize)
89 panic("breada");
a937f856
KM
90 if (bp->b_rcred == NOCRED && cred != NOCRED) {
91 crhold(cred);
92 bp->b_rcred = cred;
93 }
7188ac27 94 VOP_STRATEGY(bp);
c5a600cf 95 trace(TR_BREADMISS, pack(vp, size), blkno);
3789a403 96 p->p_stats->p_ru.ru_inblock++; /* pay for read */
3efdd860 97 } else
c5a600cf 98 trace(TR_BREADHIT, pack(vp, size), blkno);
663dbc72 99 }
3efdd860
KM
100
101 /*
d42a4811 102 * If there is a read-ahead block, start I/O on it too.
3efdd860 103 */
ee19707c 104 if (!incore(vp, rablkno)) {
7188ac27 105 rabp = getblk(vp, rablkno, rabsize);
ec67a3ce 106#endif SECSIZE
d42a4811 107 if (rabp->b_flags & (B_DONE | B_DELWRI)) {
663dbc72 108 brelse(rabp);
c5a600cf 109 trace(TR_BREADHITRA, pack(vp, rabsize), rablkno);
973ecc4f 110 } else {
d42a4811 111 rabp->b_flags |= B_ASYNC | B_READ;
4f083fd7
SL
112 if (rabp->b_bcount > rabp->b_bufsize)
113 panic("breadrabp");
5062ac4a 114 if (rabp->b_rcred == NOCRED && cred != NOCRED) {
a937f856 115 crhold(cred);
5062ac4a 116 rabp->b_rcred = cred;
a937f856 117 }
7188ac27 118 VOP_STRATEGY(rabp);
c5a600cf 119 trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno);
3789a403 120 p->p_stats->p_ru.ru_inblock++; /* pay in advance */
663dbc72
BJ
121 }
122 }
3efdd860
KM
123
124 /*
d42a4811
KM
125 * If block was memory resident, let bread get it.
126 * If block was not memory resident, the read was
127 * started above, so just wait for the read to complete.
3efdd860 128 */
84baaab3 129 if (bp == NULL)
ec67a3ce
MK
130#ifdef SECSIZE
131 return (bread(dev, blkno, size, secsize));
132#else SECSIZE
a937f856 133 return (bread(vp, blkno, size, cred, bpp));
7188ac27 134 return (biowait(bp));
663dbc72
BJ
135}
136
137/*
d42a4811
KM
138 * Synchronous write.
139 * Release buffer on completion.
663dbc72
BJ
140 */
141bwrite(bp)
3efdd860 142 register struct buf *bp;
663dbc72 143{
3789a403 144 struct proc *p = curproc; /* XXX */
7188ac27 145 register int flag;
86e7dd3b 146 int s, error;
663dbc72
BJ
147
148 flag = bp->b_flags;
f844ee62 149 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
d42a4811 150 if ((flag & B_DELWRI) == 0)
3789a403 151 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
c669f646
KM
152 else
153 reassignbuf(bp, bp->b_vp);
c5a600cf 154 trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno);
4f083fd7
SL
155 if (bp->b_bcount > bp->b_bufsize)
156 panic("bwrite");
86e7dd3b 157 s = splbio();
c669f646 158 bp->b_vp->v_numoutput++;
86e7dd3b 159 splx(s);
7188ac27 160 VOP_STRATEGY(bp);
3efdd860
KM
161
162 /*
d42a4811 163 * If the write was synchronous, then await I/O completion.
3efdd860 164 * If the write was "delayed", then we put the buffer on
d42a4811 165 * the queue of blocks awaiting I/O completion status.
3efdd860 166 */
d42a4811 167 if ((flag & B_ASYNC) == 0) {
7188ac27 168 error = biowait(bp);
663dbc72 169 brelse(bp);
7188ac27 170 } else if (flag & B_DELWRI) {
663dbc72 171 bp->b_flags |= B_AGE;
7188ac27
KM
172 error = 0;
173 }
174 return (error);
663dbc72
BJ
175}
176
177/*
d42a4811
KM
178 * Delayed write.
179 *
180 * The buffer is marked dirty, but is not queued for I/O.
181 * This routine should be used when the buffer is expected
182 * to be modified again soon, typically a small write that
183 * partially fills a buffer.
184 *
185 * NB: magnetic tapes cannot be delayed; they must be
186 * written in the order that the writes are requested.
663dbc72
BJ
187 */
188bdwrite(bp)
3efdd860 189 register struct buf *bp;
663dbc72 190{
3789a403 191 struct proc *p = curproc; /* XXX */
663dbc72 192
c669f646
KM
193 if ((bp->b_flags & B_DELWRI) == 0) {
194 bp->b_flags |= B_DELWRI;
195 reassignbuf(bp, bp->b_vp);
3789a403 196 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */
c669f646 197 }
7188ac27 198 /*
edadbc2c 199 * If this is a tape drive, the write must be initiated.
7188ac27 200 */
ec67a3ce 201 if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE)
663dbc72 202 bawrite(bp);
edadbc2c 203 } else {
d42a4811 204 bp->b_flags |= (B_DONE | B_DELWRI);
663dbc72
BJ
205 brelse(bp);
206 }
207}
208
209/*
d42a4811
KM
210 * Asynchronous write.
211 * Start I/O on a buffer, but do not wait for it to complete.
212 * The buffer is released when the I/O completes.
663dbc72
BJ
213 */
214bawrite(bp)
3efdd860 215 register struct buf *bp;
663dbc72
BJ
216{
217
d42a4811
KM
218 /*
219 * Setting the ASYNC flag causes bwrite to return
220 * after starting the I/O.
221 */
663dbc72 222 bp->b_flags |= B_ASYNC;
7188ac27 223 (void) bwrite(bp);
663dbc72
BJ
224}
225
226/*
d42a4811
KM
227 * Release a buffer.
228 * Even if the buffer is dirty, no I/O is started.
663dbc72
BJ
229 */
230brelse(bp)
3efdd860 231 register struct buf *bp;
663dbc72 232{
46387ee3 233 register struct buf *flist;
d42a4811 234 int s;
663dbc72 235
c5a600cf 236 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
3efdd860 237 /*
edadbc2c
KM
238 * If a process is waiting for the buffer, or
239 * is waiting for a free buffer, awaken it.
3efdd860 240 */
d42a4811 241 if (bp->b_flags & B_WANTED)
663dbc72 242 wakeup((caddr_t)bp);
d42a4811 243 if (bfreelist[0].b_flags & B_WANTED) {
46387ee3
BJ
244 bfreelist[0].b_flags &= ~B_WANTED;
245 wakeup((caddr_t)bfreelist);
663dbc72 246 }
edadbc2c
KM
247 /*
248 * Retry I/O for locked buffers rather than invalidating them.
249 */
250 if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED))
251 bp->b_flags &= ~B_ERROR;
edadbc2c
KM
252 /*
253 * Disassociate buffers that are no longer valid.
254 */
d42a4811 255 if (bp->b_flags & (B_NOCACHE | B_ERROR))
7188ac27 256 bp->b_flags |= B_INVAL;
d42a4811 257 if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) {
edadbc2c
KM
258 if (bp->b_vp)
259 brelvp(bp);
260 bp->b_flags &= ~B_DELWRI;
7188ac27 261 }
3efdd860
KM
262 /*
263 * Stick the buffer back on a free list.
264 */
a5e62f37 265 s = splbio();
4f083fd7
SL
266 if (bp->b_bufsize <= 0) {
267 /* block has no buffer ... put at front of unused buffer list */
268 flist = &bfreelist[BQ_EMPTY];
269 binsheadfree(bp, flist);
d42a4811 270 } else if (bp->b_flags & (B_ERROR | B_INVAL)) {
46387ee3 271 /* block has no info ... put at front of most free list */
4f083fd7 272 flist = &bfreelist[BQ_AGE];
3efdd860 273 binsheadfree(bp, flist);
663dbc72 274 } else {
46387ee3
BJ
275 if (bp->b_flags & B_LOCKED)
276 flist = &bfreelist[BQ_LOCKED];
277 else if (bp->b_flags & B_AGE)
278 flist = &bfreelist[BQ_AGE];
279 else
280 flist = &bfreelist[BQ_LRU];
3efdd860 281 binstailfree(bp, flist);
663dbc72 282 }
d42a4811 283 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE);
663dbc72
BJ
284 splx(s);
285}
286
287/*
d42a4811 288 * Check to see if a block is currently memory resident.
663dbc72 289 */
7188ac27
KM
290incore(vp, blkno)
291 struct vnode *vp;
3efdd860 292 daddr_t blkno;
663dbc72
BJ
293{
294 register struct buf *bp;
46387ee3 295 register struct buf *dp;
663dbc72 296
243d4743 297 dp = BUFHASH(vp, blkno);
46387ee3 298 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
edadbc2c 299 if (bp->b_lblkno == blkno && bp->b_vp == vp &&
3efdd860 300 (bp->b_flags & B_INVAL) == 0)
5603d07d 301 return (1);
5603d07d 302 return (0);
663dbc72
BJ
303}
304
edadbc2c 305/*
d42a4811
KM
306 * Check to see if a block is currently memory resident.
307 * If it is resident, return it. If it is not resident,
308 * allocate a new buffer and assign it to the block.
663dbc72
BJ
309 */
310struct buf *
ec67a3ce
MK
311#ifdef SECSIZE
312getblk(dev, blkno, size, secsize)
313#else SECSIZE
7188ac27
KM
314getblk(vp, blkno, size)
315 register struct vnode *vp;
ad30fb67
KM
316 daddr_t blkno;
317 int size;
ec67a3ce
MK
318#ifdef SECSIZE
319 long secsize;
320#endif SECSIZE
663dbc72 321{
4f083fd7 322 register struct buf *bp, *dp;
23900030 323 int s;
663dbc72 324
00a6a148
KM
325 if (size > MAXBSIZE)
326 panic("getblk: size too big");
3efdd860 327 /*
d42a4811
KM
328 * Search the cache for the block. If the buffer is found,
329 * but it is currently locked, the we must wait for it to
330 * become available.
3efdd860 331 */
7188ac27 332 dp = BUFHASH(vp, blkno);
3efdd860 333loop:
46387ee3 334 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
edadbc2c 335 if (bp->b_lblkno != blkno || bp->b_vp != vp ||
d42a4811 336 (bp->b_flags & B_INVAL))
663dbc72 337 continue;
a5e62f37 338 s = splbio();
d42a4811 339 if (bp->b_flags & B_BUSY) {
663dbc72 340 bp->b_flags |= B_WANTED;
d42a4811 341 sleep((caddr_t)bp, PRIBIO + 1);
23900030 342 splx(s);
663dbc72
BJ
343 goto loop;
344 }
c669f646
KM
345 bremfree(bp);
346 bp->b_flags |= B_BUSY;
23900030 347 splx(s);
32a56bda 348 if (bp->b_bcount != size) {
edadbc2c
KM
349 printf("getblk: stray size");
350 bp->b_flags |= B_INVAL;
351 bwrite(bp);
9d6d37ce 352 goto loop;
edadbc2c 353 }
663dbc72 354 bp->b_flags |= B_CACHE;
a5e62f37 355 return (bp);
663dbc72 356 }
4f083fd7 357 bp = getnewbuf();
3efdd860 358 bremhash(bp);
edadbc2c 359 bgetvp(vp, bp);
521a4688 360 bp->b_bcount = 0;
edadbc2c 361 bp->b_lblkno = blkno;
ec67a3ce
MK
362#ifdef SECSIZE
363 bp->b_blksize = secsize;
364#endif SECSIZE
ad30fb67 365 bp->b_blkno = blkno;
4f083fd7 366 bp->b_error = 0;
7188ac27
KM
367 bp->b_resid = 0;
368 binshash(bp, dp);
521a4688 369 allocbuf(bp, size);
a5e62f37 370 return (bp);
663dbc72
BJ
371}
372
373/*
d42a4811
KM
374 * Allocate a buffer.
375 * The caller will assign it to a block.
663dbc72
BJ
376 */
377struct buf *
ad30fb67
KM
378geteblk(size)
379 int size;
663dbc72 380{
4f083fd7 381 register struct buf *bp, *flist;
663dbc72 382
00a6a148
KM
383 if (size > MAXBSIZE)
384 panic("geteblk: size too big");
4f083fd7
SL
385 bp = getnewbuf();
386 bp->b_flags |= B_INVAL;
3efdd860 387 bremhash(bp);
4f083fd7 388 flist = &bfreelist[BQ_AGE];
521a4688 389 bp->b_bcount = 0;
ec67a3ce
MK
390#ifdef SECSIZE
391 bp->b_blksize = DEV_BSIZE;
392#endif SECSIZE
4f083fd7 393 bp->b_error = 0;
7188ac27
KM
394 bp->b_resid = 0;
395 binshash(bp, flist);
521a4688 396 allocbuf(bp, size);
a5e62f37 397 return (bp);
663dbc72
BJ
398}
399
ad30fb67 400/*
521a4688 401 * Expand or contract the actual memory allocated to a buffer.
d42a4811 402 * If no memory is available, release buffer and take error exit.
ad30fb67 403 */
521a4688
KM
404allocbuf(tp, size)
405 register struct buf *tp;
ad30fb67
KM
406 int size;
407{
521a4688
KM
408 register struct buf *bp, *ep;
409 int sizealloc, take, s;
ad30fb67 410
521a4688
KM
411 sizealloc = roundup(size, CLBYTES);
412 /*
413 * Buffer size does not change
414 */
415 if (sizealloc == tp->b_bufsize)
416 goto out;
417 /*
418 * Buffer size is shrinking.
419 * Place excess space in a buffer header taken from the
420 * BQ_EMPTY buffer list and placed on the "most free" list.
421 * If no extra buffer headers are available, leave the
422 * extra space in the present buffer.
423 */
424 if (sizealloc < tp->b_bufsize) {
425 ep = bfreelist[BQ_EMPTY].av_forw;
426 if (ep == &bfreelist[BQ_EMPTY])
427 goto out;
428 s = splbio();
429 bremfree(ep);
430 ep->b_flags |= B_BUSY;
431 splx(s);
432 pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr,
433 (int)tp->b_bufsize - sizealloc);
434 ep->b_bufsize = tp->b_bufsize - sizealloc;
435 tp->b_bufsize = sizealloc;
436 ep->b_flags |= B_INVAL;
437 ep->b_bcount = 0;
438 brelse(ep);
439 goto out;
440 }
441 /*
442 * More buffer space is needed. Get it out of buffers on
443 * the "most free" list, placing the empty headers on the
444 * BQ_EMPTY buffer header list.
445 */
446 while (tp->b_bufsize < sizealloc) {
447 take = sizealloc - tp->b_bufsize;
448 bp = getnewbuf();
449 if (take >= bp->b_bufsize)
450 take = bp->b_bufsize;
451 pagemove(&bp->b_un.b_addr[bp->b_bufsize - take],
452 &tp->b_un.b_addr[tp->b_bufsize], take);
453 tp->b_bufsize += take;
454 bp->b_bufsize = bp->b_bufsize - take;
455 if (bp->b_bcount > bp->b_bufsize)
456 bp->b_bcount = bp->b_bufsize;
457 if (bp->b_bufsize <= 0) {
458 bremhash(bp);
459 binshash(bp, &bfreelist[BQ_EMPTY]);
d42a4811 460 bp->b_dev = NODEV;
521a4688
KM
461 bp->b_error = 0;
462 bp->b_flags |= B_INVAL;
463 }
464 brelse(bp);
465 }
466out:
467 tp->b_bcount = size;
468 return (1);
4f083fd7
SL
469}
470
4f083fd7
SL
471/*
472 * Find a buffer which is available for use.
473 * Select something from a free list.
474 * Preference is to AGE list, then LRU list.
475 */
476struct buf *
477getnewbuf()
478{
479 register struct buf *bp, *dp;
a937f856 480 register struct ucred *cred;
4f083fd7
SL
481 int s;
482
483loop:
a5e62f37 484 s = splbio();
4f083fd7
SL
485 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--)
486 if (dp->av_forw != dp)
487 break;
488 if (dp == bfreelist) { /* no free blocks */
489 dp->b_flags |= B_WANTED;
d42a4811 490 sleep((caddr_t)dp, PRIBIO + 1);
4b7d506c 491 splx(s);
4f083fd7
SL
492 goto loop;
493 }
4f083fd7 494 bp = dp->av_forw;
c669f646
KM
495 bremfree(bp);
496 bp->b_flags |= B_BUSY;
497 splx(s);
4f083fd7 498 if (bp->b_flags & B_DELWRI) {
033a786e 499 (void) bawrite(bp);
4f083fd7
SL
500 goto loop;
501 }
c5a600cf 502 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno);
edadbc2c
KM
503 if (bp->b_vp)
504 brelvp(bp);
a937f856
KM
505 if (bp->b_rcred != NOCRED) {
506 cred = bp->b_rcred;
507 bp->b_rcred = NOCRED;
508 crfree(cred);
509 }
510 if (bp->b_wcred != NOCRED) {
511 cred = bp->b_wcred;
512 bp->b_wcred = NOCRED;
513 crfree(cred);
514 }
4f083fd7 515 bp->b_flags = B_BUSY;
1c89915d 516 bp->b_dirtyoff = bp->b_dirtyend = 0;
4f083fd7
SL
517 return (bp);
518}
519
663dbc72 520/*
d42a4811
KM
521 * Wait for I/O to complete.
522 *
523 * Extract and return any errors associated with the I/O.
524 * If the error flag is set, but no specific error is
525 * given, return EIO.
663dbc72 526 */
3efdd860 527biowait(bp)
ad30fb67 528 register struct buf *bp;
663dbc72 529{
530d0032 530 int s;
663dbc72 531
a5e62f37 532 s = splbio();
a937f856 533 while ((bp->b_flags & B_DONE) == 0)
663dbc72 534 sleep((caddr_t)bp, PRIBIO);
530d0032 535 splx(s);
7188ac27
KM
536 if ((bp->b_flags & B_ERROR) == 0)
537 return (0);
538 if (bp->b_error)
539 return (bp->b_error);
540 return (EIO);
663dbc72
BJ
541}
542
663dbc72 543/*
af04ce66 544 * Mark I/O complete on a buffer.
d42a4811
KM
545 *
546 * If a callback has been requested, e.g. the pageout
547 * daemon, do so. Otherwise, awaken waiting processes.
663dbc72 548 */
3efdd860
KM
549biodone(bp)
550 register struct buf *bp;
663dbc72 551{
663dbc72 552
80e7c811 553 if (bp->b_flags & B_DONE)
3efdd860 554 panic("dup biodone");
663dbc72 555 bp->b_flags |= B_DONE;
76429560
KM
556 if ((bp->b_flags & B_READ) == 0)
557 vwakeup(bp);
961945a8
SL
558 if (bp->b_flags & B_CALL) {
559 bp->b_flags &= ~B_CALL;
560 (*bp->b_iodone)(bp);
561 return;
562 }
d42a4811 563 if (bp->b_flags & B_ASYNC)
663dbc72
BJ
564 brelse(bp);
565 else {
566 bp->b_flags &= ~B_WANTED;
567 wakeup((caddr_t)bp);
568 }
569}