changed BINMODE= 6555 and CATMODE= 0664
[unix-history] / sys / kern / vfs__bio.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This software is a component of "386BSD" developed by
16 William F. Jolitz, TeleMuse.
17 * 4. Neither the name of the developer nor the name "386BSD"
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
22 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
23 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
24 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
25 * NOT MAKE USE THIS WORK.
26 *
27 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
28 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
29 * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
30 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
31 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
32 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
33 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
34 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
48 * PATCHES MAGIC LEVEL PATCH THAT GOT US HERE
49 * -------------------- ----- ----------------------
50 * CURRENT PATCH LEVEL: 2 00042
51 * -------------------- ----- ----------------------
52 *
53 * 24 Apr 92 Martin Renters Fix NFS read request hang
54 * 20 Aug 92 David Greenman Fix getnewbuf() 2xAllocation
55 */
2877196e 56static char rcsid[] = "$Header: /a/cvs/386BSD/src/sys.386bsd/kern/vfs__bio.c,v 1.2 1993/07/18 11:27:45 paul Exp $";
15637ed4
RG
57
58#include "param.h"
59#include "proc.h"
60#include "vnode.h"
61#include "buf.h"
62#include "specdev.h"
63#include "mount.h"
64#include "malloc.h"
65#include "vm/vm.h"
66#include "resourcevar.h"
67
dbd7c74f 68static struct buf *getnewbuf(int);
15637ed4
RG
69extern vm_map_t buffer_map;
70
71/*
72 * Initialize buffer headers and related structures.
73 */
74void bufinit()
75{
76 struct bufhd *bh;
77 struct buf *bp;
78
79 /* first, make a null hash table */
80 for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) {
81 bh->b_flags = 0;
82 bh->b_forw = (struct buf *)bh;
83 bh->b_back = (struct buf *)bh;
84 }
85
86 /* next, make a null set of free lists */
87 for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) {
88 bp->b_flags = 0;
89 bp->av_forw = bp;
90 bp->av_back = bp;
91 bp->b_forw = bp;
92 bp->b_back = bp;
93 }
94
95 /* finally, initialize each buffer header and stick on empty q */
96 for(bp = buf; bp < buf + nbuf ; bp++) {
97 bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */
98 bp->b_dev = NODEV;
99 bp->b_vp = 0;
100 binstailfree(bp, bfreelist + BQ_EMPTY);
101 binshash(bp, bfreelist + BQ_EMPTY);
102 }
103}
104
105/*
106 * Find the block in the buffer pool.
107 * If the buffer is not present, allocate a new buffer and load
108 * its contents according to the filesystem fill routine.
109 */
110int
111bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred,
112 struct buf **bpp)
113{
114 struct buf *bp;
115 int rv = 0;
116
117 bp = getblk (vp, blkno, size);
118
119 /* if not found in cache, do some I/O */
120 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
121 bp->b_flags |= B_READ;
122 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
123 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
124 bp->b_rcred = cred;
125 VOP_STRATEGY(bp);
126 rv = biowait (bp);
127 }
128 *bpp = bp;
129
130 return (rv);
131}
132
133/*
134 * Operates like bread, but also starts I/O on the specified
135 * read-ahead block. [See page 55 of Bach's Book]
136 */
137int
138breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize,
139 struct ucred *cred, struct buf **bpp)
140{
141 struct buf *bp, *rabp;
142 int rv = 0, needwait = 0;
143
144 bp = getblk (vp, blkno, size);
145
146 /* if not found in cache, do some I/O */
147 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
148 bp->b_flags |= B_READ;
149 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
150 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
151 bp->b_rcred = cred;
152 VOP_STRATEGY(bp);
153 needwait++;
154 }
155
156 rabp = getblk (vp, rablkno, rabsize);
157
158 /* if not found in cache, do some I/O (overlapped with first) */
159 if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) {
160 rabp->b_flags |= B_READ | B_ASYNC;
161 rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
162 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
163 rabp->b_rcred = cred;
164 VOP_STRATEGY(rabp);
165 } else
166 brelse(rabp);
167
168 /* wait for original I/O */
169 if (needwait)
170 rv = biowait (bp);
171
172 *bpp = bp;
173 return (rv);
174}
175
176/*
177 * Synchronous write.
178 * Release buffer on completion.
179 */
180int
181bwrite(register struct buf *bp)
182{
183 int rv;
184
185 if(bp->b_flags & B_INVAL) {
186 brelse(bp);
187 return (0);
188 } else {
189 int wasdelayed;
190
191 if(!(bp->b_flags & B_BUSY))
192 panic("bwrite: not busy");
193
194 wasdelayed = bp->b_flags & B_DELWRI;
195 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI);
196 if(wasdelayed)
197 reassignbuf(bp, bp->b_vp);
198
199 bp->b_flags |= B_DIRTY;
200 bp->b_vp->v_numoutput++;
201 VOP_STRATEGY(bp);
202 rv = biowait(bp);
203 brelse(bp);
204 return (rv);
205 }
206}
207
208/*
209 * Delayed write.
210 *
211 * The buffer is marked dirty, but is not queued for I/O.
212 * This routine should be used when the buffer is expected
213 * to be modified again soon, typically a small write that
214 * partially fills a buffer.
215 *
216 * NB: magnetic tapes cannot be delayed; they must be
217 * written in the order that the writes are requested.
218 */
219void
220bdwrite(register struct buf *bp)
221{
222
223 if(!(bp->b_flags & B_BUSY))
224 panic("bdwrite: not busy");
225
226 if(bp->b_flags & B_INVAL) {
227 brelse(bp);
2877196e 228 return;
15637ed4
RG
229 }
230 if(bp->b_flags & B_TAPE) {
231 bwrite(bp);
232 return;
233 }
234 bp->b_flags &= ~(B_READ|B_DONE);
235 bp->b_flags |= B_DIRTY|B_DELWRI;
236 reassignbuf(bp, bp->b_vp);
237 brelse(bp);
238 return;
239}
240
241/*
242 * Asynchronous write.
243 * Start I/O on a buffer, but do not wait for it to complete.
244 * The buffer is released when the I/O completes.
245 */
246void
247bawrite(register struct buf *bp)
248{
249
250 if(!(bp->b_flags & B_BUSY))
251 panic("bawrite: not busy");
252
253 if(bp->b_flags & B_INVAL)
254 brelse(bp);
255 else {
256 int wasdelayed;
257
258 wasdelayed = bp->b_flags & B_DELWRI;
259 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
260 if(wasdelayed)
261 reassignbuf(bp, bp->b_vp);
262
263 bp->b_flags |= B_DIRTY | B_ASYNC;
264 bp->b_vp->v_numoutput++;
265 VOP_STRATEGY(bp);
266 }
267}
268
269/*
270 * Release a buffer.
271 * Even if the buffer is dirty, no I/O is started.
272 */
273void
274brelse(register struct buf *bp)
275{
276 int x;
277
278 /* anyone need a "free" block? */
279 x=splbio();
280 if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) {
281 (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED;
282 wakeup(bfreelist);
283 }
284 /* anyone need this very block? */
285 if (bp->b_flags & B_WANTED) {
286 bp->b_flags &= ~B_WANTED;
287 wakeup(bp);
288 }
289
290 if (bp->b_flags & (B_INVAL|B_ERROR)) {
291 bp->b_flags |= B_INVAL;
292 bp->b_flags &= ~(B_DELWRI|B_CACHE);
293 if(bp->b_vp)
294 brelvp(bp);
295 }
296
297 /* enqueue */
298 /* just an empty buffer head ... */
299 /*if(bp->b_flags & B_HEAD)
300 binsheadfree(bp, bfreelist + BQ_EMPTY)*/
301 /* buffers with junk contents */
302 /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE))
303 binsheadfree(bp, bfreelist + BQ_AGE)
304 /* buffers with stale but valid contents */
305 else if(bp->b_flags & B_AGE)
306 binstailfree(bp, bfreelist + BQ_AGE)
307 /* buffers with valid and quite potentially reuseable contents */
308 else
309 binstailfree(bp, bfreelist + BQ_LRU)
310
311 /* unlock */
312 bp->b_flags &= ~B_BUSY;
313 splx(x);
314
315}
316
317int freebufspace;
318int allocbufspace;
319
320/*
321 * Find a buffer which is available for use.
322 * If free memory for buffer space and an empty header from the empty list,
323 * use that. Otherwise, select something from a free list.
324 * Preference is to AGE list, then LRU list.
325 */
326static struct buf *
327getnewbuf(int sz)
328{
329 struct buf *bp;
330 int x;
331
332 x = splbio();
333start:
334 /* can we constitute a new buffer? */
335 if (freebufspace > sz
336 && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) {
337 caddr_t addr;
338
339/*#define notyet*/
340#ifndef notyet
341 if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree;
342#else /* notyet */
343 /* get new memory buffer */
344 if (round_page(sz) == sz)
345 addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz);
346 else
347 addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK);
348 /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/
349 bzero(addr, sz);
350#endif /* notyet */
351 freebufspace -= sz;
352 allocbufspace += sz;
353
354 bp = bfreelist[BQ_EMPTY].av_forw;
355 bp->b_flags = B_BUSY | B_INVAL;
356 bremfree(bp);
357 bp->b_un.b_addr = addr;
358 bp->b_bufsize = sz; /* 20 Aug 92*/
359 goto fillin;
360 }
361
362tryfree:
363 if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) {
364 bp = bfreelist[BQ_AGE].av_forw;
365 bremfree(bp);
366 } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) {
367 bp = bfreelist[BQ_LRU].av_forw;
368 bremfree(bp);
369 } else {
370 /* wait for a free buffer of any kind */
371 (bfreelist + BQ_AGE)->b_flags |= B_WANTED;
372 sleep(bfreelist, PRIBIO);
373 splx(x);
374 return (0);
375 }
376
377 /* if we are a delayed write, convert to an async write! */
378 if (bp->b_flags & B_DELWRI) {
379 bp->b_flags |= B_BUSY;
380 bawrite (bp);
381 goto start;
382 }
383
384
385 if(bp->b_vp)
386 brelvp(bp);
387
388 /* we are not free, nor do we contain interesting data */
389 if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); /* 25 Apr 92*/
390 if (bp->b_wcred != NOCRED) crfree(bp->b_wcred);
391 bp->b_flags = B_BUSY;
392fillin:
393 bremhash(bp);
394 splx(x);
395 bp->b_dev = NODEV;
396 bp->b_vp = NULL;
397 bp->b_blkno = bp->b_lblkno = 0;
398 bp->b_iodone = 0;
399 bp->b_error = 0;
400 bp->b_wcred = bp->b_rcred = NOCRED;
401 if (bp->b_bufsize != sz)
402 allocbuf(bp, sz);
403 bp->b_bcount = bp->b_bufsize = sz;
404 bp->b_dirtyoff = bp->b_dirtyend = 0;
405 return (bp);
406}
407
408/*
409 * Check to see if a block is currently memory resident.
410 */
411struct buf *
412incore(struct vnode *vp, daddr_t blkno)
413{
414 struct buf *bh;
415 struct buf *bp;
416
417 bh = BUFHASH(vp, blkno);
418
419 /* Search hash chain */
420 bp = bh->b_forw;
421 while (bp != (struct buf *) bh) {
422 /* hit */
423 if (bp->b_lblkno == blkno && bp->b_vp == vp
424 && (bp->b_flags & B_INVAL) == 0)
425 return (bp);
426 bp = bp->b_forw;
427 }
428
429 return(0);
430}
431
432/*
433 * Get a block of requested size that is associated with
434 * a given vnode and block offset. If it is found in the
435 * block cache, mark it as having been found, make it busy
436 * and return it. Otherwise, return an empty block of the
437 * correct size. It is up to the caller to insure that the
438 * cached blocks be of the correct size.
439 */
440struct buf *
441getblk(register struct vnode *vp, daddr_t blkno, int size)
442{
443 struct buf *bp, *bh;
444 int x;
445
446 for (;;) {
447 if (bp = incore(vp, blkno)) {
448 x = splbio();
449 if (bp->b_flags & B_BUSY) {
450 bp->b_flags |= B_WANTED;
451 sleep (bp, PRIBIO);
452 splx(x);
453 continue;
454 }
455 bp->b_flags |= B_BUSY | B_CACHE;
456 bremfree(bp);
457 if (size > bp->b_bufsize)
458 panic("now what do we do?");
459 /* if (bp->b_bufsize != size) allocbuf(bp, size); */
460 } else {
461
462 if((bp = getnewbuf(size)) == 0) continue;
463 bp->b_blkno = bp->b_lblkno = blkno;
464 bgetvp(vp, bp);
465 x = splbio();
466 bh = BUFHASH(vp, blkno);
467 binshash(bp, bh);
468 bp->b_flags = B_BUSY;
469 }
470 splx(x);
471 return (bp);
472 }
473}
474
475/*
476 * Get an empty, disassociated buffer of given size.
477 */
478struct buf *
479geteblk(int size)
480{
481 struct buf *bp;
482 int x;
483
484 while ((bp = getnewbuf(size)) == 0)
485 ;
486 x = splbio();
487 binshash(bp, bfreelist + BQ_AGE);
488 splx(x);
489
490 return (bp);
491}
492
493/*
494 * Exchange a buffer's underlying buffer storage for one of different
495 * size, taking care to maintain contents appropriately. When buffer
496 * increases in size, caller is responsible for filling out additional
497 * contents. When buffer shrinks in size, data is lost, so caller must
498 * first return it to backing store before shrinking the buffer, as
499 * no implied I/O will be done.
500 *
501 * Expanded buffer is returned as value.
502 */
503void
504allocbuf(register struct buf *bp, int size)
505{
506 caddr_t newcontents;
507
508 /* get new memory buffer */
509#ifndef notyet
510 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
511#else /* notyet */
512 if (round_page(size) == size)
513 newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size);
514 else
515 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
516#endif /* notyet */
517
518 /* copy the old into the new, up to the maximum that will fit */
519 bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size));
520
521 /* return old contents to free heap */
522#ifndef notyet
523 free (bp->b_un.b_addr, M_TEMP);
524#else /* notyet */
525 if (round_page(bp->b_bufsize) == bp->b_bufsize)
526 kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize);
527 else
528 free (bp->b_un.b_addr, M_TEMP);
529#endif /* notyet */
530
531 /* adjust buffer cache's idea of memory allocated to buffer contents */
532 freebufspace -= size - bp->b_bufsize;
533 allocbufspace += size - bp->b_bufsize;
534
535 /* update buffer header */
536 bp->b_un.b_addr = newcontents;
537 bp->b_bcount = bp->b_bufsize = size;
538}
539
540/*
541 * Patiently await operations to complete on this buffer.
542 * When they do, extract error value and return it.
543 * Extract and return any errors associated with the I/O.
544 * If an invalid block, force it off the lookup hash chains.
545 */
546int
547biowait(register struct buf *bp)
548{
549 int x;
550
551 x = splbio();
552 while ((bp->b_flags & B_DONE) == 0)
553 sleep((caddr_t)bp, PRIBIO);
554 if((bp->b_flags & B_ERROR) || bp->b_error) {
555 if ((bp->b_flags & B_INVAL) == 0) {
556 bp->b_flags |= B_INVAL;
557 bremhash(bp);
558 binshash(bp, bfreelist + BQ_AGE);
559 }
560 if (!bp->b_error)
561 bp->b_error = EIO;
562 else
563 bp->b_flags |= B_ERROR;
564 splx(x);
565 return (bp->b_error);
566 } else {
567 splx(x);
568 return (0);
569 }
570}
571
572/*
573 * Finish up operations on a buffer, calling an optional
574 * function (if requested), and releasing the buffer if
575 * marked asynchronous. Then mark this buffer done so that
576 * others biowait()'ing for it will notice when they are
577 * woken up from sleep().
578 */
579int
580biodone(register struct buf *bp)
581{
582 int x;
583
584 x = splbio();
585 if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp);
586 bp->b_flags &= ~B_CALL;
587 if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) {
588 bp->b_flags &= ~B_DIRTY;
589 vwakeup(bp);
590 }
591 if (bp->b_flags & B_ASYNC)
592 brelse(bp);
593 bp->b_flags &= ~B_ASYNC;
594 bp->b_flags |= B_DONE;
595 wakeup(bp);
596 splx(x);
597}