This commit was manufactured by cvs2svn to create tag 'FreeBSD-release/1.0'.
[unix-history] / sys / kern / vfs__bio.c
CommitLineData
15637ed4
RG
1/*
2 * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This software is a component of "386BSD" developed by
16 William F. Jolitz, TeleMuse.
17 * 4. Neither the name of the developer nor the name "386BSD"
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
22 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
23 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
24 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
25 * NOT MAKE USE THIS WORK.
26 *
27 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
28 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
29 * REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
30 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
31 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
32 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
33 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
34 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39 * ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
78ed81a3 48 * $Id: vfs__bio.c,v 1.6 1993/10/16 15:25:16 rgrimes Exp $
15637ed4 49 */
15637ed4
RG
50
51#include "param.h"
78ed81a3 52#include "systm.h"
15637ed4
RG
53#include "proc.h"
54#include "vnode.h"
55#include "buf.h"
56#include "specdev.h"
57#include "mount.h"
58#include "malloc.h"
59#include "vm/vm.h"
60#include "resourcevar.h"
61
78ed81a3 62static struct buf *getnewbuf(int);
15637ed4
RG
63extern vm_map_t buffer_map;
64
65/*
66 * Initialize buffer headers and related structures.
67 */
68void bufinit()
69{
70 struct bufhd *bh;
71 struct buf *bp;
72
73 /* first, make a null hash table */
74 for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) {
75 bh->b_flags = 0;
76 bh->b_forw = (struct buf *)bh;
77 bh->b_back = (struct buf *)bh;
78 }
79
80 /* next, make a null set of free lists */
81 for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) {
82 bp->b_flags = 0;
83 bp->av_forw = bp;
84 bp->av_back = bp;
85 bp->b_forw = bp;
86 bp->b_back = bp;
87 }
88
89 /* finally, initialize each buffer header and stick on empty q */
90 for(bp = buf; bp < buf + nbuf ; bp++) {
91 bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */
92 bp->b_dev = NODEV;
93 bp->b_vp = 0;
94 binstailfree(bp, bfreelist + BQ_EMPTY);
95 binshash(bp, bfreelist + BQ_EMPTY);
96 }
97}
98
99/*
100 * Find the block in the buffer pool.
101 * If the buffer is not present, allocate a new buffer and load
102 * its contents according to the filesystem fill routine.
103 */
104int
105bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred,
106 struct buf **bpp)
107{
108 struct buf *bp;
109 int rv = 0;
110
111 bp = getblk (vp, blkno, size);
112
113 /* if not found in cache, do some I/O */
114 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
78ed81a3 115 if (curproc && curproc->p_stats) /* count block I/O */
116 curproc->p_stats->p_ru.ru_inblock++;
15637ed4
RG
117 bp->b_flags |= B_READ;
118 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
119 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
120 bp->b_rcred = cred;
121 VOP_STRATEGY(bp);
122 rv = biowait (bp);
123 }
124 *bpp = bp;
125
126 return (rv);
127}
128
129/*
130 * Operates like bread, but also starts I/O on the specified
131 * read-ahead block. [See page 55 of Bach's Book]
132 */
133int
134breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize,
135 struct ucred *cred, struct buf **bpp)
136{
137 struct buf *bp, *rabp;
138 int rv = 0, needwait = 0;
139
140 bp = getblk (vp, blkno, size);
141
142 /* if not found in cache, do some I/O */
143 if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
78ed81a3 144 if (curproc && curproc->p_stats) /* count block I/O */
145 curproc->p_stats->p_ru.ru_inblock++;
15637ed4
RG
146 bp->b_flags |= B_READ;
147 bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
148 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
149 bp->b_rcred = cred;
150 VOP_STRATEGY(bp);
151 needwait++;
152 }
153
154 rabp = getblk (vp, rablkno, rabsize);
155
156 /* if not found in cache, do some I/O (overlapped with first) */
157 if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) {
78ed81a3 158 if (curproc && curproc->p_stats) /* count block I/O */
159 curproc->p_stats->p_ru.ru_inblock++;
15637ed4
RG
160 rabp->b_flags |= B_READ | B_ASYNC;
161 rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
162 if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/
163 rabp->b_rcred = cred;
164 VOP_STRATEGY(rabp);
165 } else
166 brelse(rabp);
167
168 /* wait for original I/O */
169 if (needwait)
170 rv = biowait (bp);
171
172 *bpp = bp;
173 return (rv);
174}
175
176/*
177 * Synchronous write.
178 * Release buffer on completion.
179 */
180int
181bwrite(register struct buf *bp)
182{
183 int rv;
184
185 if(bp->b_flags & B_INVAL) {
186 brelse(bp);
187 return (0);
188 } else {
189 int wasdelayed;
190
191 if(!(bp->b_flags & B_BUSY))
192 panic("bwrite: not busy");
193
194 wasdelayed = bp->b_flags & B_DELWRI;
195 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI);
196 if(wasdelayed)
197 reassignbuf(bp, bp->b_vp);
198
78ed81a3 199 if (curproc && curproc->p_stats) /* count block I/O */
200 curproc->p_stats->p_ru.ru_oublock++;
15637ed4
RG
201 bp->b_flags |= B_DIRTY;
202 bp->b_vp->v_numoutput++;
203 VOP_STRATEGY(bp);
204 rv = biowait(bp);
205 brelse(bp);
206 return (rv);
207 }
208}
209
210/*
211 * Delayed write.
212 *
213 * The buffer is marked dirty, but is not queued for I/O.
214 * This routine should be used when the buffer is expected
215 * to be modified again soon, typically a small write that
216 * partially fills a buffer.
217 *
218 * NB: magnetic tapes cannot be delayed; they must be
219 * written in the order that the writes are requested.
220 */
221void
222bdwrite(register struct buf *bp)
223{
224
225 if(!(bp->b_flags & B_BUSY))
226 panic("bdwrite: not busy");
227
228 if(bp->b_flags & B_INVAL) {
229 brelse(bp);
78ed81a3 230 return;
15637ed4
RG
231 }
232 if(bp->b_flags & B_TAPE) {
233 bwrite(bp);
234 return;
235 }
236 bp->b_flags &= ~(B_READ|B_DONE);
237 bp->b_flags |= B_DIRTY|B_DELWRI;
238 reassignbuf(bp, bp->b_vp);
239 brelse(bp);
240 return;
241}
242
243/*
244 * Asynchronous write.
245 * Start I/O on a buffer, but do not wait for it to complete.
246 * The buffer is released when the I/O completes.
247 */
248void
249bawrite(register struct buf *bp)
250{
251
252 if(!(bp->b_flags & B_BUSY))
253 panic("bawrite: not busy");
254
255 if(bp->b_flags & B_INVAL)
256 brelse(bp);
257 else {
258 int wasdelayed;
259
260 wasdelayed = bp->b_flags & B_DELWRI;
261 bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
262 if(wasdelayed)
263 reassignbuf(bp, bp->b_vp);
264
78ed81a3 265 if (curproc && curproc->p_stats) /* count block I/O */
266 curproc->p_stats->p_ru.ru_oublock++;
15637ed4
RG
267 bp->b_flags |= B_DIRTY | B_ASYNC;
268 bp->b_vp->v_numoutput++;
269 VOP_STRATEGY(bp);
270 }
271}
272
273/*
274 * Release a buffer.
275 * Even if the buffer is dirty, no I/O is started.
276 */
277void
278brelse(register struct buf *bp)
279{
280 int x;
281
282 /* anyone need a "free" block? */
283 x=splbio();
284 if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) {
285 (bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED;
286 wakeup(bfreelist);
287 }
288 /* anyone need this very block? */
289 if (bp->b_flags & B_WANTED) {
290 bp->b_flags &= ~B_WANTED;
291 wakeup(bp);
292 }
293
294 if (bp->b_flags & (B_INVAL|B_ERROR)) {
295 bp->b_flags |= B_INVAL;
296 bp->b_flags &= ~(B_DELWRI|B_CACHE);
297 if(bp->b_vp)
298 brelvp(bp);
299 }
300
301 /* enqueue */
302 /* just an empty buffer head ... */
303 /*if(bp->b_flags & B_HEAD)
304 binsheadfree(bp, bfreelist + BQ_EMPTY)*/
305 /* buffers with junk contents */
306 /*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE))
307 binsheadfree(bp, bfreelist + BQ_AGE)
308 /* buffers with stale but valid contents */
309 else if(bp->b_flags & B_AGE)
310 binstailfree(bp, bfreelist + BQ_AGE)
311 /* buffers with valid and quite potentially reuseable contents */
312 else
313 binstailfree(bp, bfreelist + BQ_LRU)
314
315 /* unlock */
316 bp->b_flags &= ~B_BUSY;
317 splx(x);
318
319}
320
321int freebufspace;
322int allocbufspace;
323
324/*
325 * Find a buffer which is available for use.
326 * If free memory for buffer space and an empty header from the empty list,
327 * use that. Otherwise, select something from a free list.
328 * Preference is to AGE list, then LRU list.
329 */
330static struct buf *
331getnewbuf(int sz)
332{
333 struct buf *bp;
334 int x;
335
336 x = splbio();
337start:
338 /* can we constitute a new buffer? */
339 if (freebufspace > sz
340 && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) {
341 caddr_t addr;
342
343/*#define notyet*/
344#ifndef notyet
345 if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree;
346#else /* notyet */
347 /* get new memory buffer */
348 if (round_page(sz) == sz)
349 addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz);
350 else
351 addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK);
352 /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/
353 bzero(addr, sz);
354#endif /* notyet */
355 freebufspace -= sz;
356 allocbufspace += sz;
357
358 bp = bfreelist[BQ_EMPTY].av_forw;
359 bp->b_flags = B_BUSY | B_INVAL;
360 bremfree(bp);
361 bp->b_un.b_addr = addr;
362 bp->b_bufsize = sz; /* 20 Aug 92*/
363 goto fillin;
364 }
365
366tryfree:
367 if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) {
368 bp = bfreelist[BQ_AGE].av_forw;
369 bremfree(bp);
370 } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) {
371 bp = bfreelist[BQ_LRU].av_forw;
372 bremfree(bp);
373 } else {
374 /* wait for a free buffer of any kind */
375 (bfreelist + BQ_AGE)->b_flags |= B_WANTED;
376 sleep(bfreelist, PRIBIO);
377 splx(x);
378 return (0);
379 }
380
381 /* if we are a delayed write, convert to an async write! */
382 if (bp->b_flags & B_DELWRI) {
383 bp->b_flags |= B_BUSY;
384 bawrite (bp);
385 goto start;
386 }
387
388
389 if(bp->b_vp)
390 brelvp(bp);
391
392 /* we are not free, nor do we contain interesting data */
393 if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); /* 25 Apr 92*/
394 if (bp->b_wcred != NOCRED) crfree(bp->b_wcred);
395 bp->b_flags = B_BUSY;
396fillin:
397 bremhash(bp);
398 splx(x);
399 bp->b_dev = NODEV;
400 bp->b_vp = NULL;
401 bp->b_blkno = bp->b_lblkno = 0;
402 bp->b_iodone = 0;
403 bp->b_error = 0;
78ed81a3 404 bp->b_resid = 0;
15637ed4
RG
405 bp->b_wcred = bp->b_rcred = NOCRED;
406 if (bp->b_bufsize != sz)
407 allocbuf(bp, sz);
408 bp->b_bcount = bp->b_bufsize = sz;
409 bp->b_dirtyoff = bp->b_dirtyend = 0;
410 return (bp);
411}
412
413/*
414 * Check to see if a block is currently memory resident.
415 */
416struct buf *
417incore(struct vnode *vp, daddr_t blkno)
418{
419 struct buf *bh;
420 struct buf *bp;
421
422 bh = BUFHASH(vp, blkno);
423
424 /* Search hash chain */
425 bp = bh->b_forw;
426 while (bp != (struct buf *) bh) {
427 /* hit */
428 if (bp->b_lblkno == blkno && bp->b_vp == vp
429 && (bp->b_flags & B_INVAL) == 0)
430 return (bp);
431 bp = bp->b_forw;
432 }
433
434 return(0);
435}
436
437/*
438 * Get a block of requested size that is associated with
439 * a given vnode and block offset. If it is found in the
440 * block cache, mark it as having been found, make it busy
441 * and return it. Otherwise, return an empty block of the
442 * correct size. It is up to the caller to insure that the
443 * cached blocks be of the correct size.
444 */
445struct buf *
446getblk(register struct vnode *vp, daddr_t blkno, int size)
447{
448 struct buf *bp, *bh;
449 int x;
450
451 for (;;) {
452 if (bp = incore(vp, blkno)) {
453 x = splbio();
454 if (bp->b_flags & B_BUSY) {
455 bp->b_flags |= B_WANTED;
456 sleep (bp, PRIBIO);
457 splx(x);
458 continue;
459 }
460 bp->b_flags |= B_BUSY | B_CACHE;
461 bremfree(bp);
462 if (size > bp->b_bufsize)
463 panic("now what do we do?");
464 /* if (bp->b_bufsize != size) allocbuf(bp, size); */
465 } else {
466
467 if((bp = getnewbuf(size)) == 0) continue;
468 bp->b_blkno = bp->b_lblkno = blkno;
469 bgetvp(vp, bp);
470 x = splbio();
471 bh = BUFHASH(vp, blkno);
472 binshash(bp, bh);
473 bp->b_flags = B_BUSY;
474 }
475 splx(x);
476 return (bp);
477 }
478}
479
480/*
481 * Get an empty, disassociated buffer of given size.
482 */
483struct buf *
484geteblk(int size)
485{
486 struct buf *bp;
487 int x;
488
489 while ((bp = getnewbuf(size)) == 0)
490 ;
491 x = splbio();
492 binshash(bp, bfreelist + BQ_AGE);
493 splx(x);
494
495 return (bp);
496}
497
498/*
499 * Exchange a buffer's underlying buffer storage for one of different
500 * size, taking care to maintain contents appropriately. When buffer
501 * increases in size, caller is responsible for filling out additional
502 * contents. When buffer shrinks in size, data is lost, so caller must
503 * first return it to backing store before shrinking the buffer, as
504 * no implied I/O will be done.
505 *
506 * Expanded buffer is returned as value.
507 */
508void
509allocbuf(register struct buf *bp, int size)
510{
511 caddr_t newcontents;
512
513 /* get new memory buffer */
514#ifndef notyet
515 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
516#else /* notyet */
517 if (round_page(size) == size)
518 newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size);
519 else
520 newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
521#endif /* notyet */
522
523 /* copy the old into the new, up to the maximum that will fit */
524 bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size));
525
526 /* return old contents to free heap */
527#ifndef notyet
528 free (bp->b_un.b_addr, M_TEMP);
529#else /* notyet */
530 if (round_page(bp->b_bufsize) == bp->b_bufsize)
531 kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize);
532 else
533 free (bp->b_un.b_addr, M_TEMP);
534#endif /* notyet */
535
536 /* adjust buffer cache's idea of memory allocated to buffer contents */
537 freebufspace -= size - bp->b_bufsize;
538 allocbufspace += size - bp->b_bufsize;
539
540 /* update buffer header */
541 bp->b_un.b_addr = newcontents;
542 bp->b_bcount = bp->b_bufsize = size;
543}
544
545/*
546 * Patiently await operations to complete on this buffer.
547 * When they do, extract error value and return it.
548 * Extract and return any errors associated with the I/O.
549 * If an invalid block, force it off the lookup hash chains.
550 */
551int
552biowait(register struct buf *bp)
553{
554 int x;
555
556 x = splbio();
557 while ((bp->b_flags & B_DONE) == 0)
558 sleep((caddr_t)bp, PRIBIO);
559 if((bp->b_flags & B_ERROR) || bp->b_error) {
560 if ((bp->b_flags & B_INVAL) == 0) {
561 bp->b_flags |= B_INVAL;
562 bremhash(bp);
563 binshash(bp, bfreelist + BQ_AGE);
564 }
565 if (!bp->b_error)
566 bp->b_error = EIO;
567 else
568 bp->b_flags |= B_ERROR;
569 splx(x);
570 return (bp->b_error);
571 } else {
572 splx(x);
573 return (0);
574 }
575}
576
577/*
578 * Finish up operations on a buffer, calling an optional
579 * function (if requested), and releasing the buffer if
580 * marked asynchronous. Then mark this buffer done so that
581 * others biowait()'ing for it will notice when they are
582 * woken up from sleep().
583 */
584int
585biodone(register struct buf *bp)
586{
587 int x;
588
589 x = splbio();
590 if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp);
591 bp->b_flags &= ~B_CALL;
592 if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY) {
593 bp->b_flags &= ~B_DIRTY;
594 vwakeup(bp);
595 }
596 if (bp->b_flags & B_ASYNC)
597 brelse(bp);
598 bp->b_flags &= ~B_ASYNC;
599 bp->b_flags |= B_DONE;
600 wakeup(bp);
601 splx(x);
602}