>From: bde@kralizec.zeta.org.au (Bruce Evans)
[unix-history] / sys / kern / vfs__bio.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This software is a component of "386BSD" developed by
16 William F. Jolitz, TeleMuse.
17 * 4. Neither the name of the developer nor the name "386BSD"
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
22 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
23 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
24 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
25 * NOT MAKE USE THIS WORK.
26 *
27 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
28 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
29 * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
30 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
31 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
32 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
33 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
34 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
48 * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE
49 * -------------------- ----- ----------------------
50 * CURRENT PATCH LEVEL: 2 00042
51 * -------------------- ----- ----------------------
52 *
53 * 24 Apr 92 Martin Renters Fix NFS read request hang
54 * 20 Aug 92 David Greenman Fix getnewbuf() 2xAllocation
55 */
dd18dc33 56static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys.386bsd/kern/vfs__bio.c,v 1.3 1993/07/18 17:15:09 davidg Exp $";
15637ed4
RG
57
58#include "param.h"
dd18dc33 59#include "systm.h"
15637ed4
RG
60#include "proc.h"
61#include "vnode.h"
62#include "buf.h"
63#include "specdev.h"
64#include "mount.h"
65#include "malloc.h"
66#include "vm/vm.h"
67#include "resourcevar.h"
68
dbd7c74f 69static struct buf *getnewbuf(int);
15637ed4
RG
70extern vm_map_t buffer_map;
71
72/*
73 * Initialize buffer headers and related structures.
74 */
75void bufinit()
76{
77 struct bufhd *bh;
78 struct buf *bp;
79
80 /* first, make a null hash table */
81 for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) {
82 bh->b_flags = 0;
83 bh->b_forw = (struct buf *)bh;
84 bh->b_back = (struct buf *)bh;
85 }
86
87 /* next, make a null set of free lists */
88 for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) {
89 bp->b_flags = 0;
90 bp->av_forw = bp;
91 bp->av_back = bp;
92 bp->b_forw = bp;
93 bp->b_back = bp;
94 }
95
96 /* finally, initialize each buffer header and stick on empty q */
97 for(bp = buf; bp < buf + nbuf ; bp++) {
98 bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */
99 bp->b_dev = NODEV;
100 bp->b_vp = 0;
101 binstailfree(bp, bfreelist + BQ_EMPTY);
102 binshash(bp, bfreelist + BQ_EMPTY);
103 }
104}
105
106/*
107 * Find the block in the buffer pool.
108 * If the buffer is not present, allocate a new buffer and load
109 * its contents according to the filesystem fill routine.
110 */
111int
112bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred,
113 struct buf **bpp)
114{
115 struct buf *bp;
116 int rv = 0;
117
118 bp = getblk (vp, blkno, size);
119
120 /* if not found in cache, do some I/O */
121 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
122 bp->b_flags |= B_READ;
123 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
124 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
125 bp->b_rcred = cred;
126 VOP_STRATEGY(bp);
127 rv = biowait (bp);
128 }
129 *bpp = bp;
130
131 return (rv);
132}
133
134/*
135 * Operates like bread, but also starts I/O on the specified
136 * read-ahead block. [See page 55 of Bach's Book]
137 */
138int
139breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize,
140 struct ucred *cred, struct buf **bpp)
141{
142 struct buf *bp, *rabp;
143 int rv = 0, needwait = 0;
144
145 bp = getblk (vp, blkno, size);
146
147 /* if not found in cache, do some I/O */
148 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
149 bp->b_flags |= B_READ;
150 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
151 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
152 bp->b_rcred = cred;
153 VOP_STRATEGY(bp);
154 needwait++;
155 }
156
157 rabp = getblk (vp, rablkno, rabsize);
158
159 /* if not found in cache, do some I/O (overlapped with first) */
160 if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) {
161 rabp->b_flags |= B_READ | B_ASYNC;
162 rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
163 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
164 rabp->b_rcred = cred;
165 VOP_STRATEGY(rabp);
166 } else
167 brelse(rabp);
168
169 /* wait for original I/O */
170 if (needwait)
171 rv = biowait (bp);
172
173 *bpp = bp;
174 return (rv);
175}
176
177/*
178 * Synchronous write.
179 * Release buffer on completion.
180 */
181int
182bwrite(register struct buf *bp)
183{
184 int rv;
185
186 if(bp->b_flags & B_INVAL) {
187 brelse(bp);
188 return (0);
189 } else {
190 int wasdelayed;
191
192 if(!(bp->b_flags & B_BUSY))
193 panic("bwrite: not busy");
194
195 wasdelayed = bp->b_flags & B_DELWRI;
196 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI);
197 if(wasdelayed)
198 reassignbuf(bp, bp->b_vp);
199
200 bp->b_flags |= B_DIRTY;
201 bp->b_vp->v_numoutput++;
202 VOP_STRATEGY(bp);
203 rv = biowait(bp);
204 brelse(bp);
205 return (rv);
206 }
207}
208
209/*
210 * Delayed write.
211 *
212 * The buffer is marked dirty, but is not queued for I/O.
213 * This routine should be used when the buffer is expected
214 * to be modified again soon, typically a small write that
215 * partially fills a buffer.
216 *
217 * NB: magnetic tapes cannot be delayed; they must be
218 * written in the order that the writes are requested.
219 */
220void
221bdwrite(register struct buf *bp)
222{
223
224 if(!(bp->b_flags & B_BUSY))
225 panic("bdwrite: not busy");
226
227 if(bp->b_flags & B_INVAL) {
228 brelse(bp);
2877196e 229 return;
15637ed4
RG
230 }
231 if(bp->b_flags & B_TAPE) {
232 bwrite(bp);
233 return;
234 }
235 bp->b_flags &= ~(B_READ|B_DONE);
236 bp->b_flags |= B_DIRTY|B_DELWRI;
237 reassignbuf(bp, bp->b_vp);
238 brelse(bp);
239 return;
240}
241
242/*
243 * Asynchronous write.
244 * Start I/O on a buffer, but do not wait for it to complete.
245 * The buffer is released when the I/O completes.
246 */
247void
248bawrite(register struct buf *bp)
249{
250
251 if(!(bp->b_flags & B_BUSY))
252 panic("bawrite: not busy");
253
254 if(bp->b_flags & B_INVAL)
255 brelse(bp);
256 else {
257 int wasdelayed;
258
259 wasdelayed = bp->b_flags & B_DELWRI;
260 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
261 if(wasdelayed)
262 reassignbuf(bp, bp->b_vp);
263
264 bp->b_flags |= B_DIRTY | B_ASYNC;
265 bp->b_vp->v_numoutput++;
266 VOP_STRATEGY(bp);
267 }
268}
269
270/*
271 * Release a buffer.
272 * Even if the buffer is dirty, no I/O is started.
273 */
274void
275brelse(register struct buf *bp)
276{
277 int x;
278
279 /* anyone need a "free" block? */
280 x=splbio();
281 if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) {
282 (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED;
283 wakeup(bfreelist);
284 }
285 /* anyone need this very block? */
286 if (bp->b_flags & B_WANTED) {
287 bp->b_flags &= ~B_WANTED;
288 wakeup(bp);
289 }
290
291 if (bp->b_flags & (B_INVAL|B_ERROR)) {
292 bp->b_flags |= B_INVAL;
293 bp->b_flags &= ~(B_DELWRI|B_CACHE);
294 if(bp->b_vp)
295 brelvp(bp);
296 }
297
298 /* enqueue */
299 /* just an empty buffer head ... */
300 /*if(bp->b_flags & B_HEAD)
301 binsheadfree(bp, bfreelist + BQ_EMPTY)*/
302 /* buffers with junk contents */
303 /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE))
304 binsheadfree(bp, bfreelist + BQ_AGE)
305 /* buffers with stale but valid contents */
306 else if(bp->b_flags & B_AGE)
307 binstailfree(bp, bfreelist + BQ_AGE)
308 /* buffers with valid and quite potentially reuseable contents */
309 else
310 binstailfree(bp, bfreelist + BQ_LRU)
311
312 /* unlock */
313 bp->b_flags &= ~B_BUSY;
314 splx(x);
315
316}
317
318int freebufspace;
319int allocbufspace;
320
321/*
322 * Find a buffer which is available for use.
323 * If free memory for buffer space and an empty header from the empty list,
324 * use that. Otherwise, select something from a free list.
325 * Preference is to AGE list, then LRU list.
326 */
327static struct buf *
328getnewbuf(int sz)
329{
330 struct buf *bp;
331 int x;
332
333 x = splbio();
334start:
335 /* can we constitute a new buffer? */
336 if (freebufspace > sz
337 && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) {
338 caddr_t addr;
339
340/*#define notyet*/
341#ifndef notyet
342 if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree;
343#else /* notyet */
344 /* get new memory buffer */
345 if (round_page(sz) == sz)
346 addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz);
347 else
348 addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK);
349 /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/
350 bzero(addr, sz);
351#endif /* notyet */
352 freebufspace -= sz;
353 allocbufspace += sz;
354
355 bp = bfreelist[BQ_EMPTY].av_forw;
356 bp->b_flags = B_BUSY | B_INVAL;
357 bremfree(bp);
358 bp->b_un.b_addr = addr;
359 bp->b_bufsize = sz; /* 20 Aug 92*/
360 goto fillin;
361 }
362
363tryfree:
364 if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) {
365 bp = bfreelist[BQ_AGE].av_forw;
366 bremfree(bp);
367 } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) {
368 bp = bfreelist[BQ_LRU].av_forw;
369 bremfree(bp);
370 } else {
371 /* wait for a free buffer of any kind */
372 (bfreelist + BQ_AGE)->b_flags |= B_WANTED;
373 sleep(bfreelist, PRIBIO);
374 splx(x);
375 return (0);
376 }
377
378 /* if we are a delayed write, convert to an async write! */
379 if (bp->b_flags & B_DELWRI) {
380 bp->b_flags |= B_BUSY;
381 bawrite (bp);
382 goto start;
383 }
384
385
386 if(bp->b_vp)
387 brelvp(bp);
388
389 /* we are not free, nor do we contain interesting data */
390 if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); /* 25 Apr 92*/
391 if (bp->b_wcred != NOCRED) crfree(bp->b_wcred);
392 bp->b_flags = B_BUSY;
393fillin:
394 bremhash(bp);
395 splx(x);
396 bp->b_dev = NODEV;
397 bp->b_vp = NULL;
398 bp->b_blkno = bp->b_lblkno = 0;
399 bp->b_iodone = 0;
400 bp->b_error = 0;
401 bp->b_wcred = bp->b_rcred = NOCRED;
402 if (bp->b_bufsize != sz)
403 allocbuf(bp, sz);
404 bp->b_bcount = bp->b_bufsize = sz;
405 bp->b_dirtyoff = bp->b_dirtyend = 0;
406 return (bp);
407}
408
409/*
410 * Check to see if a block is currently memory resident.
411 */
412struct buf *
413incore(struct vnode *vp, daddr_t blkno)
414{
415 struct buf *bh;
416 struct buf *bp;
417
418 bh = BUFHASH(vp, blkno);
419
420 /* Search hash chain */
421 bp = bh->b_forw;
422 while (bp != (struct buf *) bh) {
423 /* hit */
424 if (bp->b_lblkno == blkno && bp->b_vp == vp
425 && (bp->b_flags & B_INVAL) == 0)
426 return (bp);
427 bp = bp->b_forw;
428 }
429
430 return(0);
431}
432
433/*
434 * Get a block of requested size that is associated with
435 * a given vnode and block offset. If it is found in the
436 * block cache, mark it as having been found, make it busy
437 * and return it. Otherwise, return an empty block of the
438 * correct size. It is up to the caller to insure that the
439 * cached blocks be of the correct size.
440 */
441struct buf *
442getblk(register struct vnode *vp, daddr_t blkno, int size)
443{
444 struct buf *bp, *bh;
445 int x;
446
447 for (;;) {
448 if (bp = incore(vp, blkno)) {
449 x = splbio();
450 if (bp->b_flags & B_BUSY) {
451 bp->b_flags |= B_WANTED;
452 sleep (bp, PRIBIO);
453 splx(x);
454 continue;
455 }
456 bp->b_flags |= B_BUSY | B_CACHE;
457 bremfree(bp);
458 if (size > bp->b_bufsize)
459 panic("now what do we do?");
460 /* if (bp->b_bufsize != size) allocbuf(bp, size); */
461 } else {
462
463 if((bp = getnewbuf(size)) == 0) continue;
464 bp->b_blkno = bp->b_lblkno = blkno;
465 bgetvp(vp, bp);
466 x = splbio();
467 bh = BUFHASH(vp, blkno);
468 binshash(bp, bh);
469 bp->b_flags = B_BUSY;
470 }
471 splx(x);
472 return (bp);
473 }
474}
475
476/*
477 * Get an empty, disassociated buffer of given size.
478 */
479struct buf *
480geteblk(int size)
481{
482 struct buf *bp;
483 int x;
484
485 while ((bp = getnewbuf(size)) == 0)
486 ;
487 x = splbio();
488 binshash(bp, bfreelist + BQ_AGE);
489 splx(x);
490
491 return (bp);
492}
493
494/*
495 * Exchange a buffer's underlying buffer storage for one of different
496 * size, taking care to maintain contents appropriately. When buffer
497 * increases in size, caller is responsible for filling out additional
498 * contents. When buffer shrinks in size, data is lost, so caller must
499 * first return it to backing store before shrinking the buffer, as
500 * no implied I/O will be done.
501 *
502 * Expanded buffer is returned as value.
503 */
504void
505allocbuf(register struct buf *bp, int size)
506{
507 caddr_t newcontents;
508
509 /* get new memory buffer */
510#ifndef notyet
511 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
512#else /* notyet */
513 if (round_page(size) == size)
514 newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size);
515 else
516 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
517#endif /* notyet */
518
519 /* copy the old into the new, up to the maximum that will fit */
520 bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size));
521
522 /* return old contents to free heap */
523#ifndef notyet
524 free (bp->b_un.b_addr, M_TEMP);
525#else /* notyet */
526 if (round_page(bp->b_bufsize) == bp->b_bufsize)
527 kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize);
528 else
529 free (bp->b_un.b_addr, M_TEMP);
530#endif /* notyet */
531
532 /* adjust buffer cache's idea of memory allocated to buffer contents */
533 freebufspace -= size - bp->b_bufsize;
534 allocbufspace += size - bp->b_bufsize;
535
536 /* update buffer header */
537 bp->b_un.b_addr = newcontents;
538 bp->b_bcount = bp->b_bufsize = size;
539}
540
541/*
542 * Patiently await operations to complete on this buffer.
543 * When they do, extract error value and return it.
544 * Extract and return any errors associated with the I/O.
545 * If an invalid block, force it off the lookup hash chains.
546 */
547int
548biowait(register struct buf *bp)
549{
550 int x;
551
552 x = splbio();
553 while ((bp->b_flags & B_DONE) == 0)
554 sleep((caddr_t)bp, PRIBIO);
555 if((bp->b_flags & B_ERROR) || bp->b_error) {
556 if ((bp->b_flags & B_INVAL) == 0) {
557 bp->b_flags |= B_INVAL;
558 bremhash(bp);
559 binshash(bp, bfreelist + BQ_AGE);
560 }
561 if (!bp->b_error)
562 bp->b_error = EIO;
563 else
564 bp->b_flags |= B_ERROR;
565 splx(x);
566 return (bp->b_error);
567 } else {
568 splx(x);
569 return (0);
570 }
571}
572
573/*
574 * Finish up operations on a buffer, calling an optional
575 * function (if requested), and releasing the buffer if
576 * marked asynchronous. Then mark this buffer done so that
577 * others biowait()'ing for it will notice when they are
578 * woken up from sleep().
579 */
580int
581biodone(register struct buf *bp)
582{
583 int x;
584
585 x = splbio();
586 if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp);
587 bp->b_flags &= ~B_CALL;
588 if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) {
589 bp->b_flags &= ~B_DIRTY;
590 vwakeup(bp);
591 }
592 if (bp->b_flags & B_ASYNC)
593 brelse(bp);
594 bp->b_flags &= ~B_ASYNC;
595 bp->b_flags |= B_DONE;
596 wakeup(bp);
597 splx(x);
598}