From Bde:
[unix-history] / sys / kern / vfs_bio.old.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This software is a component of "386BSD" developed by
16 William F. Jolitz, TeleMuse.
17 * 4. Neither the name of the developer nor the name "386BSD"
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
22 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
23 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
24 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
25 * NOT MAKE USE THIS WORK.
26 *
27 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
28 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
29 * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
30 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
31 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
32 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
33 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
34 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
600f7f07 48 * $Id$
15637ed4 49 */
15637ed4
RG
50
51#include "param.h"
52#include "proc.h"
53#include "vnode.h"
54#include "buf.h"
55#include "specdev.h"
56#include "mount.h"
57#include "malloc.h"
58#ifdef notyet
59#include "vm/vm.h"
60#include "vm/vm_kern.h"
61#endif /* notyet */
62#include "resourcevar.h"
63
64/*
65 * Initialize buffer headers and related structures.
66 */
67void bufinit()
68{
69 struct bufhd *bh;
70 struct buf *bp;
71
72 /* first, make a null hash table */
73 for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) {
74 bh->b_flags = 0;
75 bh->b_forw = (struct buf *)bh;
76 bh->b_back = (struct buf *)bh;
77 }
78
79 /* next, make a null set of free lists */
80 for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) {
81 bp->b_flags = 0;
82 bp->av_forw = bp;
83 bp->av_back = bp;
84 bp->b_forw = bp;
85 bp->b_back = bp;
86 }
87
88 /* finally, initialize each buffer header and stick on empty q */
89 for(bp = buf; bp < buf + nbuf ; bp++) {
90 bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */
91 bp->b_dev = NODEV;
92 bp->b_vp = 0;
93 binstailfree(bp, bfreelist + BQ_EMPTY);
94 binshash(bp, bfreelist + BQ_EMPTY);
95 }
96}
97
98/*
99 * Find the block in the buffer pool.
100 * If the buffer is not present, allocate a new buffer and load
101 * its contents according to the filesystem fill routine.
102 */
103bread(vp, blkno, size, cred, bpp)
104 struct vnode *vp;
105 daddr_t blkno;
106 int size;
107 struct ucred *cred;
108 struct buf **bpp;
109{
110 struct buf *bp;
111 int rv = 0;
112
113 bp = getblk (vp, blkno, size);
114
115 /* if not found in cache, do some I/O */
116 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
117 bp->b_flags |= B_READ;
118 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
119 bp->b_rcred = cred;
120 VOP_STRATEGY(bp);
121 rv = biowait (bp);
122 }
123 *bpp = bp;
124
125 return (rv);
126}
127
128/*
129 * Operates like bread, but also starts I/O on the specified
130 * read-ahead block. [See page 55 of Bach's Book]
131 */
132breada(vp, blkno, size, rablkno, rabsize, cred, bpp)
133 struct vnode *vp;
134 daddr_t blkno; int size;
135 daddr_t rablkno; int rabsize;
136 struct ucred *cred;
137 struct buf **bpp;
138{
139 struct buf *bp, *rabp;
140 int rv = 0, needwait = 0;
141
142 bp = getblk (vp, blkno, size);
143
144 /* if not found in cache, do some I/O */
145 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
146 bp->b_flags |= B_READ;
147 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
148 bp->b_rcred = cred;
149 VOP_STRATEGY(bp);
150 needwait++;
151 }
152
153 rabp = getblk (vp, rablkno, rabsize);
154
155 /* if not found in cache, do some I/O (overlapped with first) */
156 if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) {
157 rabp->b_flags |= B_READ | B_ASYNC;
158 rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
159 rabp->b_rcred = cred;
160 VOP_STRATEGY(rabp);
161 } else
162 brelse(rabp);
163
164 /* wait for original I/O */
165 if (needwait)
166 rv = biowait (bp);
167
168 *bpp = bp;
169 return (rv);
170}
171
172/*
173 * Synchronous write.
174 * Release buffer on completion.
175 */
176bwrite(bp)
177 register struct buf *bp;
178{
179 int rv;
180
181 if(bp->b_flags & B_INVAL) {
182 brelse(bp);
183 return (0);
184 } else {
185 int wasdelayed;
186
187 if(!(bp->b_flags & B_BUSY))panic("bwrite: not busy");
188 wasdelayed = bp->b_flags & B_DELWRI;
189 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI);
190 if(wasdelayed) reassignbuf(bp, bp->b_vp);
191 bp->b_flags |= B_DIRTY;
192 bp->b_vp->v_numoutput++;
193 VOP_STRATEGY(bp);
194 rv = biowait(bp);
195 brelse(bp);
196 return (rv);
197 }
198}
199
200/*
201 * Delayed write.
202 *
203 * The buffer is marked dirty, but is not queued for I/O.
204 * This routine should be used when the buffer is expected
205 * to be modified again soon, typically a small write that
206 * partially fills a buffer.
207 *
208 * NB: magnetic tapes cannot be delayed; they must be
209 * written in the order that the writes are requested.
210 */
211void bdwrite(bp)
212 register struct buf *bp;
213{
214
215 if(!(bp->b_flags & B_BUSY))panic("bdwrite: not busy");
216 if(bp->b_flags & B_INVAL) {
217 brelse(bp);
218 }
219 if(bp->b_flags & B_TAPE) {
220 bwrite(bp);
221 return;
222 }
223 bp->b_flags &= ~(B_READ|B_DONE);
224 bp->b_flags |= B_DIRTY|B_DELWRI;
225 reassignbuf(bp, bp->b_vp);
226 brelse(bp);
227 return;
228}
229
230/*
231 * Asynchronous write.
232 * Start I/O on a buffer, but do not wait for it to complete.
233 * The buffer is released when the I/O completes.
234 */
235bawrite(bp)
236 register struct buf *bp;
237{
238
239 if(!(bp->b_flags & B_BUSY))panic("bawrite: not busy");
240 if(bp->b_flags & B_INVAL)
241 brelse(bp);
242 else {
243 int wasdelayed;
244
245 wasdelayed = bp->b_flags & B_DELWRI;
246 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
247 if(wasdelayed) reassignbuf(bp, bp->b_vp);
248
249 bp->b_flags |= B_DIRTY | B_ASYNC;
250 bp->b_vp->v_numoutput++;
251 VOP_STRATEGY(bp);
252 }
253}
254
255/*
256 * Release a buffer.
257 * Even if the buffer is dirty, no I/O is started.
258 */
259brelse(bp)
260 register struct buf *bp;
261{
262 int x;
263
264 /* anyone need a "free" block? */
265 x=splbio();
266 if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) {
267 (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED;
268 wakeup(bfreelist);
269 }
270 /* anyone need this very block? */
271 if (bp->b_flags & B_WANTED) {
272 bp->b_flags &= ~B_WANTED;
273 wakeup(bp);
274 }
275
276 if (bp->b_flags & (B_INVAL|B_ERROR)) {
277 bp->b_flags |= B_INVAL;
278 bp->b_flags &= ~(B_DELWRI|B_CACHE);
279 if(bp->b_vp)
280 brelvp(bp);
281 }
282
283 /* enqueue */
284 /* just an empty buffer head ... */
285 /*if(bp->b_flags & B_HEAD)
286 binsheadfree(bp, bfreelist + BQ_EMPTY)*/
287 /* buffers with junk contents */
288 /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE))
289 binsheadfree(bp, bfreelist + BQ_AGE)
290 /* buffers with stale but valid contents */
291 else if(bp->b_flags & B_AGE)
292 binstailfree(bp, bfreelist + BQ_AGE)
293 /* buffers with valid and quite potentially reuseable contents */
294 else
295 binstailfree(bp, bfreelist + BQ_LRU)
296
297 /* unlock */
298 bp->b_flags &= ~B_BUSY;
299 splx(x);
300
301 return;
302}
303
304int freebufspace;
305int allocbufspace;
306
307/*
308 * Find a buffer which is available for use.
309 * If free memory for buffer space and an empty header from the empty list,
310 * use that. Otherwise, select something from a free list.
311 * Preference is to AGE list, then LRU list.
312 */
313struct buf *
314getnewbuf(sz)
315{
316 struct buf *bp;
317 int x;
318
319 x = splbio();
320start:
321 /* can we constitute a new buffer? */
322 if (freebufspace > sz
323 && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) {
324 caddr_t addr;
325
326#ifndef notyet
327 if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;
328#else /* notyet */
329 /* get new memory buffer */
330 if (round_page(sz) == sz)
331 addr = (caddr_t) kmem_alloc(buffer_map, sz);
332 else
333 addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK);
334 /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/
335#endif /* notyet */
336 freebufspace -= sz;
337 allocbufspace += sz;
338
339 bp = bfreelist[BQ_EMPTY].av_forw;
340 bp->b_flags = B_BUSY | B_INVAL;
341 bremfree(bp);
342#ifndef notyet
343 bp->b_un.b_addr = (caddr_t) addr;
344#else /* notyet */
345 bp->b_un.b_addr = addr;
346#endif /* notyet */
347 goto fillin;
348 }
349
350tryfree:
351 if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) {
352 bp = bfreelist[BQ_AGE].av_forw;
353 bremfree(bp);
354 } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) {
355 bp = bfreelist[BQ_LRU].av_forw;
356 bremfree(bp);
357 } else {
358 /* wait for a free buffer of any kind */
359 (bfreelist + BQ_AGE)->b_flags |= B_WANTED;
360 sleep(bfreelist, PRIBIO);
361 splx(x);
362 return (0);
363 }
364
365 /* if we are a delayed write, convert to an async write! */
366 if (bp->b_flags & B_DELWRI) {
367 /*bp->b_flags &= ~B_DELWRI;*/
368 bp->b_flags |= B_BUSY;
369 bawrite (bp);
370 goto start;
371 }
372
373 /*if (bp->b_flags & (B_INVAL|B_ERROR) == 0) {
374 bremhash(bp);
375 }*/
376
377 if(bp->b_vp)
378 brelvp(bp);
379
380 /* we are not free, nor do we contain interesting data */
381 bp->b_flags = B_BUSY;
382fillin:
383 bremhash(bp);
384 splx(x);
385 bp->b_dev = NODEV;
386 bp->b_vp = NULL;
387 bp->b_blkno = bp->b_lblkno = 0;
388 bp->b_iodone = 0;
389 bp->b_error = 0;
390 bp->b_wcred = bp->b_rcred = NOCRED;
391 if (bp->b_bufsize != sz) allocbuf(bp, sz);
392 bp->b_bcount = bp->b_bufsize = sz;
393 bp->b_dirtyoff = bp->b_dirtyend = 0;
394 return (bp);
395}
396
397/*
398 * Check to see if a block is currently memory resident.
399 */
400struct buf *incore(vp, blkno)
401 struct vnode *vp;
402 daddr_t blkno;
403{
404 struct buf *bh;
405 struct buf *bp;
406
407 bh = BUFHASH(vp, blkno);
408
409 /* Search hash chain */
410 bp = bh->b_forw;
411 while (bp != (struct buf *) bh) {
412 /* hit */
413 if (bp->b_lblkno == blkno && bp->b_vp == vp
414 && (bp->b_flags & B_INVAL) == 0)
415 return (bp);
416 bp = bp->b_forw;
417 }
418
419 return(0);
420}
421
422/*
423 * Get a block of requested size that is associated with
424 * a given vnode and block offset. If it is found in the
425 * block cache, mark it as having been found, make it busy
426 * and return it. Otherwise, return an empty block of the
427 * correct size. It is up to the caller to insure that the
428 * cached blocks be of the correct size.
429 */
430struct buf *
431getblk(vp, blkno, size)
432 register struct vnode *vp;
433 daddr_t blkno;
434 int size;
435{
436 struct buf *bp, *bh;
437 int x;
438
439 for (;;) {
440 if (bp = incore(vp, blkno)) {
441 x = splbio();
442 if (bp->b_flags & B_BUSY) {
443 bp->b_flags |= B_WANTED;
444 sleep (bp, PRIBIO);
445 splx(x);
446 continue;
447 }
448 bp->b_flags |= B_BUSY | B_CACHE;
449 bremfree(bp);
450 if (size > bp->b_bufsize)
451 panic("now what do we do?");
452 /* if (bp->b_bufsize != size) allocbuf(bp, size); */
453 } else {
454
455 if((bp = getnewbuf(size)) == 0) continue;
456 bp->b_blkno = bp->b_lblkno = blkno;
457 bgetvp(vp, bp);
458 x = splbio();
459 bh = BUFHASH(vp, blkno);
460 binshash(bp, bh);
461 bp->b_flags = B_BUSY;
462 }
463 splx(x);
464 return (bp);
465 }
466}
467
468/*
469 * Get an empty, disassociated buffer of given size.
470 */
471struct buf *
472geteblk(size)
473 int size;
474{
475 struct buf *bp;
476 int x;
477
478 while ((bp = getnewbuf(size)) == 0)
479 ;
480 x = splbio();
481 binshash(bp, bfreelist + BQ_AGE);
482 splx(x);
483
484 return (bp);
485}
486
487/*
488 * Exchange a buffer's underlying buffer storage for one of different
489 * size, taking care to maintain contents appropriately. When buffer
490 * increases in size, caller is responsible for filling out additional
491 * contents. When buffer shrinks in size, data is lost, so caller must
492 * first return it to backing store before shrinking the buffer, as
493 * no implied I/O will be done.
494 *
495 * Expanded buffer is returned as value.
496 */
497void
498allocbuf(bp, size)
499 register struct buf *bp;
500 int size;
501{
502 caddr_t newcontents;
503
504 /* get new memory buffer */
505#ifndef notyet
506 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
507#else /* notyet */
508 if (round_page(size) == size)
509 newcontents = (caddr_t) kmem_alloc(buffer_map, size);
510 else
511 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
512#endif /* notyet */
513
514 /* copy the old into the new, up to the maximum that will fit */
515 bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size));
516
517 /* return old contents to free heap */
518#ifndef notyet
519 free (bp->b_un.b_addr, M_TEMP);
520#else /* notyet */
521 if (round_page(bp->b_bufsize) == bp->b_bufsize)
522 kmem_free(buffer_map, bp->b_un.b_addr, bp->b_bufsize);
523 else
524 free (bp->b_un.b_addr, M_TEMP);
525#endif /* notyet */
526
527 /* adjust buffer cache's idea of memory allocated to buffer contents */
528 freebufspace -= size - bp->b_bufsize;
529 allocbufspace += size - bp->b_bufsize;
530
531 /* update buffer header */
532 bp->b_un.b_addr = newcontents;
533 bp->b_bcount = bp->b_bufsize = size;
534}
535
536/*
537 * Patiently await operations to complete on this buffer.
538 * When they do, extract error value and return it.
539 * Extract and return any errors associated with the I/O.
540 * If an invalid block, force it off the lookup hash chains.
541 */
542biowait(bp)
543 register struct buf *bp;
544{
545 int x;
546
547 x = splbio();
548 while ((bp->b_flags & B_DONE) == 0)
549 sleep((caddr_t)bp, PRIBIO);
550 if((bp->b_flags & B_ERROR) || bp->b_error) {
551 if ((bp->b_flags & B_INVAL) == 0) {
552 bp->b_flags |= B_INVAL;
553 bremhash(bp);
554 binshash(bp, bfreelist + BQ_AGE);
555 }
556 if (!bp->b_error)
557 bp->b_error = EIO;
558 else
559 bp->b_flags |= B_ERROR;
560 splx(x);
561 return (bp->b_error);
562 } else {
563 splx(x);
564 return (0);
565 }
566}
567
568/*
569 * Finish up operations on a buffer, calling an optional
570 * function (if requested), and releasing the buffer if
571 * marked asynchronous. Then mark this buffer done so that
572 * others biowait()'ing for it will notice when they are
573 * woken up from sleep().
574 */
575biodone(bp)
576 register struct buf *bp;
577{
578 int x;
579
580 x = splbio();
581 if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp);
582 bp->b_flags &= ~B_CALL;
583 if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) {
584 bp->b_flags &= ~B_DIRTY;
585 vwakeup(bp);
586 }
587 if (bp->b_flags & B_ASYNC)
588 brelse(bp);
589 bp->b_flags &= ~B_ASYNC;
590 bp->b_flags |= B_DONE;
591 wakeup(bp);
592 splx(x);
593}