* Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This software is a component of "386BSD" developed by
William F. Jolitz, TeleMuse.
* 4. Neither the name of the developer nor the name "386BSD"
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
* AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
* SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
* THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
* NOT MAKE USE THIS WORK.
* FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
* BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
* REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
* (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
* JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
* LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
* ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
* OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
* THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* PATCHES MAGIC LEVEL PATCH THAT GOT US HERE
* -------------------- ----- ----------------------
* CURRENT PATCH LEVEL: 2 00042
* -------------------- ----- ----------------------
* 24 Apr 92 Martin Renters Fix NFS read request hang
* 20 Aug 92 David Greenman Fix getnewbuf() 2xAllocation
static char rcsid
[] = "$Header: /a/cvs/386BSD/src/sys.386bsd/kern/vfs__bio.c,v 1.2 1993/07/18 11:27:45 paul Exp $";
static struct buf
*getnewbuf(int);
extern vm_map_t buffer_map
;
* Initialize buffer headers and related structures.
/* first, make a null hash table */
for(bh
= bufhash
; bh
< bufhash
+ BUFHSZ
; bh
++) {
bh
->b_forw
= (struct buf
*)bh
;
bh
->b_back
= (struct buf
*)bh
;
/* next, make a null set of free lists */
for(bp
= bfreelist
; bp
< bfreelist
+ BQUEUES
; bp
++) {
/* finally, initialize each buffer header and stick on empty q */
for(bp
= buf
; bp
< buf
+ nbuf
; bp
++) {
bp
->b_flags
= B_HEAD
| B_INVAL
; /* we're just an empty header */
binstailfree(bp
, bfreelist
+ BQ_EMPTY
);
binshash(bp
, bfreelist
+ BQ_EMPTY
);
* Find the block in the buffer pool.
* If the buffer is not present, allocate a new buffer and load
* its contents according to the filesystem fill routine.
bread(struct vnode
*vp
, daddr_t blkno
, int size
, struct ucred
*cred
,
bp
= getblk (vp
, blkno
, size
);
/* if not found in cache, do some I/O */
if ((bp
->b_flags
& B_CACHE
) == 0 || (bp
->b_flags
& B_INVAL
) != 0) {
bp
->b_flags
&= ~(B_DONE
|B_ERROR
|B_INVAL
);
if (cred
!= NOCRED
) crhold(cred
); /* 25 Apr 92*/
* Operates like bread, but also starts I/O on the specified
* read-ahead block. [See page 55 of Bach's Book]
breada(struct vnode
*vp
, daddr_t blkno
, int size
, daddr_t rablkno
, int rabsize
,
struct ucred
*cred
, struct buf
**bpp
)
int rv
= 0, needwait
= 0;
bp
= getblk (vp
, blkno
, size
);
/* if not found in cache, do some I/O */
if ((bp
->b_flags
& B_CACHE
) == 0 || (bp
->b_flags
& B_INVAL
) != 0) {
bp
->b_flags
&= ~(B_DONE
|B_ERROR
|B_INVAL
);
if (cred
!= NOCRED
) crhold(cred
); /* 25 Apr 92*/
rabp
= getblk (vp
, rablkno
, rabsize
);
/* if not found in cache, do some I/O (overlapped with first) */
if ((rabp
->b_flags
& B_CACHE
) == 0 || (rabp
->b_flags
& B_INVAL
) != 0) {
rabp
->b_flags
|= B_READ
| B_ASYNC
;
rabp
->b_flags
&= ~(B_DONE
|B_ERROR
|B_INVAL
);
if (cred
!= NOCRED
) crhold(cred
); /* 25 Apr 92*/
/* wait for original I/O */
* Release buffer on completion.
bwrite(register struct buf
*bp
)
if(bp
->b_flags
& B_INVAL
) {
if(!(bp
->b_flags
& B_BUSY
))
panic("bwrite: not busy");
wasdelayed
= bp
->b_flags
& B_DELWRI
;
bp
->b_flags
&= ~(B_READ
|B_DONE
|B_ERROR
|B_ASYNC
|B_DELWRI
);
reassignbuf(bp
, bp
->b_vp
);
* The buffer is marked dirty, but is not queued for I/O.
* This routine should be used when the buffer is expected
* to be modified again soon, typically a small write that
* partially fills a buffer.
* NB: magnetic tapes cannot be delayed; they must be
* written in the order that the writes are requested.
bdwrite(register struct buf
*bp
)
if(!(bp
->b_flags
& B_BUSY
))
panic("bdwrite: not busy");
if(bp
->b_flags
& B_INVAL
) {
if(bp
->b_flags
& B_TAPE
) {
bp
->b_flags
&= ~(B_READ
|B_DONE
);
bp
->b_flags
|= B_DIRTY
|B_DELWRI
;
reassignbuf(bp
, bp
->b_vp
);
* Start I/O on a buffer, but do not wait for it to complete.
* The buffer is released when the I/O completes.
bawrite(register struct buf
*bp
)
if(!(bp
->b_flags
& B_BUSY
))
panic("bawrite: not busy");
if(bp
->b_flags
& B_INVAL
)
wasdelayed
= bp
->b_flags
& B_DELWRI
;
bp
->b_flags
&= ~(B_READ
|B_DONE
|B_ERROR
|B_DELWRI
);
reassignbuf(bp
, bp
->b_vp
);
bp
->b_flags
|= B_DIRTY
| B_ASYNC
;
* Even if the buffer is dirty, no I/O is started.
brelse(register struct buf
*bp
)
/* anyone need a "free" block? */
if ((bfreelist
+ BQ_AGE
)->b_flags
& B_WANTED
) {
(bfreelist
+ BQ_AGE
) ->b_flags
&= ~B_WANTED
;
/* anyone need this very block? */
if (bp
->b_flags
& B_WANTED
) {
bp
->b_flags
&= ~B_WANTED
;
if (bp
->b_flags
& (B_INVAL
|B_ERROR
)) {
bp
->b_flags
&= ~(B_DELWRI
|B_CACHE
);
/* just an empty buffer head ... */
/*if(bp->b_flags & B_HEAD)
binsheadfree(bp, bfreelist + BQ_EMPTY)*/
/* buffers with junk contents */
/*else*/ if(bp
->b_flags
& (B_ERROR
|B_INVAL
|B_NOCACHE
))
binsheadfree(bp
, bfreelist
+ BQ_AGE
)
/* buffers with stale but valid contents */
else if(bp
->b_flags
& B_AGE
)
binstailfree(bp
, bfreelist
+ BQ_AGE
)
/* buffers with valid and quite potentially reuseable contents */
binstailfree(bp
, bfreelist
+ BQ_LRU
)
* Find a buffer which is available for use.
* If free memory for buffer space and an empty header from the empty list,
* use that. Otherwise, select something from a free list.
* Preference is to AGE list, then LRU list.
/* can we constitute a new buffer? */
&& bfreelist
[BQ_EMPTY
].av_forw
!= (struct buf
*)bfreelist
+BQ_EMPTY
) {
if ((addr
= malloc (sz
, M_TEMP
, M_WAITOK
)) == 0) goto tryfree
;
/* get new memory buffer */
if (round_page(sz
) == sz
)
addr
= (caddr_t
) kmem_alloc_wired_wait(buffer_map
, sz
);
addr
= (caddr_t
) malloc (sz
, M_TEMP
, M_WAITOK
);
/*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/
bp
= bfreelist
[BQ_EMPTY
].av_forw
;
bp
->b_flags
= B_BUSY
| B_INVAL
;
bp
->b_bufsize
= sz
; /* 20 Aug 92*/
if (bfreelist
[BQ_AGE
].av_forw
!= (struct buf
*)bfreelist
+BQ_AGE
) {
bp
= bfreelist
[BQ_AGE
].av_forw
;
} else if (bfreelist
[BQ_LRU
].av_forw
!= (struct buf
*)bfreelist
+BQ_LRU
) {
bp
= bfreelist
[BQ_LRU
].av_forw
;
/* wait for a free buffer of any kind */
(bfreelist
+ BQ_AGE
)->b_flags
|= B_WANTED
;
sleep(bfreelist
, PRIBIO
);
/* if we are a delayed write, convert to an async write! */
if (bp
->b_flags
& B_DELWRI
) {
/* we are not free, nor do we contain interesting data */
if (bp
->b_rcred
!= NOCRED
) crfree(bp
->b_rcred
); /* 25 Apr 92*/
if (bp
->b_wcred
!= NOCRED
) crfree(bp
->b_wcred
);
bp
->b_blkno
= bp
->b_lblkno
= 0;
bp
->b_wcred
= bp
->b_rcred
= NOCRED
;
bp
->b_bcount
= bp
->b_bufsize
= sz
;
bp
->b_dirtyoff
= bp
->b_dirtyend
= 0;
* Check to see if a block is currently memory resident.
incore(struct vnode
*vp
, daddr_t blkno
)
while (bp
!= (struct buf
*) bh
) {
if (bp
->b_lblkno
== blkno
&& bp
->b_vp
== vp
&& (bp
->b_flags
& B_INVAL
) == 0)
* Get a block of requested size that is associated with
* a given vnode and block offset. If it is found in the
* block cache, mark it as having been found, make it busy
* and return it. Otherwise, return an empty block of the
* correct size. It is up to the caller to insure that the
* cached blocks be of the correct size.
getblk(register struct vnode
*vp
, daddr_t blkno
, int size
)
if (bp
= incore(vp
, blkno
)) {
if (bp
->b_flags
& B_BUSY
) {
bp
->b_flags
|= B_BUSY
| B_CACHE
;
if (size
> bp
->b_bufsize
)
panic("now what do we do?");
/* if (bp->b_bufsize != size) allocbuf(bp, size); */
if((bp
= getnewbuf(size
)) == 0) continue;
bp
->b_blkno
= bp
->b_lblkno
= blkno
;
* Get an empty, disassociated buffer of given size.
while ((bp
= getnewbuf(size
)) == 0)
binshash(bp
, bfreelist
+ BQ_AGE
);
* Exchange a buffer's underlying buffer storage for one of different
* size, taking care to maintain contents appropriately. When buffer
* increases in size, caller is responsible for filling out additional
* contents. When buffer shrinks in size, data is lost, so caller must
* first return it to backing store before shrinking the buffer, as
* no implied I/O will be done.
* Expanded buffer is returned as value.
allocbuf(register struct buf
*bp
, int size
)
/* get new memory buffer */
newcontents
= (caddr_t
) malloc (size
, M_TEMP
, M_WAITOK
);
if (round_page(size
) == size
)
newcontents
= (caddr_t
) kmem_alloc_wired_wait(buffer_map
, size
);
newcontents
= (caddr_t
) malloc (size
, M_TEMP
, M_WAITOK
);
/* copy the old into the new, up to the maximum that will fit */
bcopy (bp
->b_un
.b_addr
, newcontents
, min(bp
->b_bufsize
, size
));
/* return old contents to free heap */
free (bp
->b_un
.b_addr
, M_TEMP
);
if (round_page(bp
->b_bufsize
) == bp
->b_bufsize
)
kmem_free_wakeup(buffer_map
, bp
->b_un
.b_addr
, bp
->b_bufsize
);
free (bp
->b_un
.b_addr
, M_TEMP
);
/* adjust buffer cache's idea of memory allocated to buffer contents */
freebufspace
-= size
- bp
->b_bufsize
;
allocbufspace
+= size
- bp
->b_bufsize
;
/* update buffer header */
bp
->b_un
.b_addr
= newcontents
;
bp
->b_bcount
= bp
->b_bufsize
= size
;
* Patiently await operations to complete on this buffer.
* When they do, extract error value and return it.
* Extract and return any errors associated with the I/O.
* If an invalid block, force it off the lookup hash chains.
biowait(register struct buf
*bp
)
while ((bp
->b_flags
& B_DONE
) == 0)
sleep((caddr_t
)bp
, PRIBIO
);
if((bp
->b_flags
& B_ERROR
) || bp
->b_error
) {
if ((bp
->b_flags
& B_INVAL
) == 0) {
binshash(bp
, bfreelist
+ BQ_AGE
);
* Finish up operations on a buffer, calling an optional
* function (if requested), and releasing the buffer if
* marked asynchronous. Then mark this buffer done so that
* others biowait()'ing for it will notice when they are
biodone(register struct buf
*bp
)
if (bp
->b_flags
& B_CALL
) (*bp
->b_iodone
)(bp
);
if ((bp
->b_flags
& (B_READ
|B_DIRTY
)) == B_DIRTY
) {
if (bp
->b_flags
& B_ASYNC
)