[unix-history] / usr / src / sys.386bsd / kern / vfs__bio.c

/*
 * Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This software is a component of "386BSD" developed by 
	William F. Jolitz, TeleMuse.
 * 4. Neither the name of the developer nor the name "386BSD"
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ 
 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS 
 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT. 
 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT 
 * NOT MAKE USE THIS WORK.
 *
 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN 
 * REFERENCES SUCH AS THE  "PORTING UNIX TO THE 386" SERIES 
 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING 
 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND 
 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE 
 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS 
 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
 *
 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE DEVELOPER BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */
static char rcsid[] = "$Header: /usr/bill/working/sys/kern/RCS/vfs__bio.c,v 1.2 92/01/21 21:30:08 william Exp $";

#include "param.h"
#include "proc.h"
#include "vnode.h"
#include "buf.h"
#include "specdev.h"
#include "mount.h"
#include "malloc.h"
#include "vm/vm.h"
#include "resourcevar.h"

struct buf *getnewbuf(int);
extern	vm_map_t buffer_map;

/*
 * Initialize buffer headers and related structures.
 */
void bufinit()
{
	struct bufhd *bh;
	struct buf *bp;

	/* first, make a null hash table */
	for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) {
		bh->b_flags = 0;
		bh->b_forw = (struct buf *)bh;
		bh->b_back = (struct buf *)bh;
	}

	/* next, make a null set of free lists */
	for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) {
		bp->b_flags = 0;
		bp->av_forw = bp;
		bp->av_back = bp;
		bp->b_forw = bp;
		bp->b_back = bp;
	}

	/* finally, initialize each buffer header and stick on empty q */
	for(bp = buf; bp < buf + nbuf ; bp++) {
		bp->b_flags = B_HEAD | B_INVAL;	/* we're just an empty header */
		bp->b_dev = NODEV;
		bp->b_vp = 0;
		binstailfree(bp, bfreelist + BQ_EMPTY);
		binshash(bp, bfreelist + BQ_EMPTY);
	}
}

/*
 * Find the block in the buffer pool.
 * If the buffer is not present, allocate a new buffer and load
 * its contents according to the filesystem fill routine.
 */
int
bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred,
	struct buf **bpp)
{
	struct buf *bp;
	int rv = 0;

	bp = getblk (vp, blkno, size);

	/* if not found in cache, do some I/O */
	if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
		bp->b_flags |= B_READ;
		bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
		bp->b_rcred = cred;
		VOP_STRATEGY(bp);
		rv = biowait (bp);
	}
	*bpp = bp;
	
	return (rv);
}

/*
 * Operates like bread, but also starts I/O on the specified
 * read-ahead block. [See page 55 of Bach's Book]
 */
int
breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize,
	struct ucred *cred, struct buf **bpp)
{
	struct buf *bp, *rabp;
	int rv = 0, needwait = 0;

	bp = getblk (vp, blkno, size);

	/* if not found in cache, do some I/O */
	if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) {
		bp->b_flags |= B_READ;
		bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
		bp->b_rcred = cred;
		VOP_STRATEGY(bp);
		needwait++;
	}
	
	rabp = getblk (vp, rablkno, rabsize);

	/* if not found in cache, do some I/O (overlapped with first) */
	if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) {
		rabp->b_flags |= B_READ | B_ASYNC;
		rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL);
		rabp->b_rcred = cred;
		VOP_STRATEGY(rabp);
	} else
		brelse(rabp);
	
	/* wait for original I/O */
	if (needwait)
		rv = biowait (bp);

	*bpp = bp;
	return (rv);
}

/*
 * Synchronous write.
 * Release buffer on completion.
 */
int
bwrite(register struct buf *bp)
{
	int rv;

	if(bp->b_flags & B_INVAL) {
		brelse(bp);
		return (0);
	} else {
		int wasdelayed;

		if(!(bp->b_flags & B_BUSY))
			panic("bwrite: not busy");

		wasdelayed = bp->b_flags & B_DELWRI;
		bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI);
		if(wasdelayed)
			reassignbuf(bp, bp->b_vp);

		bp->b_flags |= B_DIRTY;
		bp->b_vp->v_numoutput++;
		VOP_STRATEGY(bp);
		rv = biowait(bp);
		brelse(bp);
		return (rv);
	}
}

/*
 * Delayed write.
 *
 * The buffer is marked dirty, but is not queued for I/O.
 * This routine should be used when the buffer is expected
 * to be modified again soon, typically a small write that
 * partially fills a buffer.
 *
 * NB: magnetic tapes cannot be delayed; they must be
 * written in the order that the writes are requested.
 */
void
bdwrite(register struct buf *bp)
{

	if(!(bp->b_flags & B_BUSY))
		panic("bdwrite: not busy");

	if(bp->b_flags & B_INVAL) {
		brelse(bp);
	}
	if(bp->b_flags & B_TAPE) {
		bwrite(bp);
		return;
	}
	bp->b_flags &= ~(B_READ|B_DONE);
	bp->b_flags |= B_DIRTY|B_DELWRI;
	reassignbuf(bp, bp->b_vp);
	brelse(bp);
	return;
}

/*
 * Asynchronous write.
 * Start I/O on a buffer, but do not wait for it to complete.
 * The buffer is released when the I/O completes.
 */
void
bawrite(register struct buf *bp)
{

	if(!(bp->b_flags & B_BUSY))
		panic("bawrite: not busy");

	if(bp->b_flags & B_INVAL)
		brelse(bp);
	else {
		int wasdelayed;

		wasdelayed = bp->b_flags & B_DELWRI;
		bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
		if(wasdelayed)
			reassignbuf(bp, bp->b_vp);

		bp->b_flags |= B_DIRTY | B_ASYNC;
		bp->b_vp->v_numoutput++;
		VOP_STRATEGY(bp);
	}
}

/*
 * Release a buffer.
 * Even if the buffer is dirty, no I/O is started.
 */
void
brelse(register struct buf *bp)
{
	int x;

	/* anyone need a "free" block? */
	x=splbio();
	if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) {
		(bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED;
		wakeup(bfreelist);
	}
	/* anyone need this very block? */
	if (bp->b_flags & B_WANTED) {
		bp->b_flags &= ~B_WANTED;
		wakeup(bp);
	}

	if (bp->b_flags & (B_INVAL|B_ERROR)) {
		bp->b_flags |= B_INVAL;
		bp->b_flags &= ~(B_DELWRI|B_CACHE);
		if(bp->b_vp)
			brelvp(bp);
	}

	/* enqueue */
	/* just an empty buffer head ... */
	/*if(bp->b_flags & B_HEAD)
		binsheadfree(bp, bfreelist + BQ_EMPTY)*/
	/* buffers with junk contents */
	/*else*/ if(bp->b_flags & (B_ERROR|B_INVAL|B_NOCACHE))
		binsheadfree(bp, bfreelist + BQ_AGE)
	/* buffers with stale but valid contents */
	else if(bp->b_flags & B_AGE)
		binstailfree(bp, bfreelist + BQ_AGE)
	/* buffers with valid and quite potentially reuseable contents */
	else
		binstailfree(bp, bfreelist + BQ_LRU)

	/* unlock */
	bp->b_flags &= ~B_BUSY;
	splx(x);

}

int freebufspace;
int allocbufspace;

/*
 * Find a buffer which is available for use.
 * If free memory for buffer space and an empty header from the empty list,
 * use that. Otherwise, select something from a free list.
 * Preference is to AGE list, then LRU list.
 */
static struct buf *
getnewbuf(int sz)
{
	struct buf *bp;
	int x;

	x = splbio();
start:
	/* can we constitute a new buffer? */
	if (freebufspace > sz
		&& bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) {
		caddr_t addr;

/*#define notyet*/
#ifndef notyet
		if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree;
#else /* notyet */
		/* get new memory buffer */
		if (round_page(sz) == sz)
			addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz);
		else
			addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK);
	/*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/
		bzero(addr, sz);
#endif /* notyet */
		freebufspace -= sz;
		allocbufspace += sz;

		bp = bfreelist[BQ_EMPTY].av_forw;
		bp->b_flags = B_BUSY | B_INVAL;
		bremfree(bp);
		bp->b_un.b_addr = addr;
		goto fillin;
	}

tryfree:
	if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) {
		bp = bfreelist[BQ_AGE].av_forw;
		bremfree(bp);
	} else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) {
		bp = bfreelist[BQ_LRU].av_forw;
		bremfree(bp);
	} else	{
		/* wait for a free buffer of any kind */
		(bfreelist + BQ_AGE)->b_flags |= B_WANTED;
		sleep(bfreelist, PRIBIO);
		splx(x);
		return (0);
	}

	/* if we are a delayed write, convert to an async write! */
	if (bp->b_flags & B_DELWRI) {
		bp->b_flags |= B_BUSY;
		bawrite (bp);
		goto start;
	}


	if(bp->b_vp)
		brelvp(bp);

	/* we are not free, nor do we contain interesting data */
	bp->b_flags = B_BUSY;
fillin:
	bremhash(bp);
	splx(x);
	bp->b_dev = NODEV;
	bp->b_vp = NULL;
	bp->b_blkno = bp->b_lblkno = 0;
	bp->b_iodone = 0;
	bp->b_error = 0;
	bp->b_wcred = bp->b_rcred = NOCRED;
	if (bp->b_bufsize != sz)
		allocbuf(bp, sz);
	bp->b_bcount = bp->b_bufsize = sz;
	bp->b_dirtyoff = bp->b_dirtyend = 0;
	return (bp);
}

/*
 * Check to see if a block is currently memory resident.
 */
struct buf *
incore(struct vnode *vp, daddr_t blkno)
{
	struct buf *bh;
	struct buf *bp;

	bh = BUFHASH(vp, blkno);

	/* Search hash chain */
	bp = bh->b_forw;
	while (bp != (struct buf *) bh) {
		/* hit */
		if (bp->b_lblkno == blkno && bp->b_vp == vp
			&& (bp->b_flags & B_INVAL) == 0)
			return (bp);
		bp = bp->b_forw;
	}
	
	return(0);
}

/*
 * Get a block of requested size that is associated with
 * a given vnode and block offset. If it is found in the
 * block cache, mark it as having been found, make it busy
 * and return it. Otherwise, return an empty block of the
 * correct size. It is up to the caller to insure that the
 * cached blocks be of the correct size.
 */
struct buf *
getblk(register struct vnode *vp, daddr_t blkno, int size)
{
	struct buf *bp, *bh;
	int x;

	for (;;) {
		if (bp = incore(vp, blkno)) {
			x = splbio();
			if (bp->b_flags & B_BUSY) {
				bp->b_flags |= B_WANTED;
				sleep (bp, PRIBIO);
				splx(x);
				continue;
			}
			bp->b_flags |= B_BUSY | B_CACHE;
			bremfree(bp);
			if (size > bp->b_bufsize)
				panic("now what do we do?");
			/* if (bp->b_bufsize != size) allocbuf(bp, size); */
		} else {

			if((bp = getnewbuf(size)) == 0) continue;
			bp->b_blkno = bp->b_lblkno = blkno;
			bgetvp(vp, bp);
			x = splbio();
			bh = BUFHASH(vp, blkno);
			binshash(bp, bh);
			bp->b_flags = B_BUSY;
		}
		splx(x);
		return (bp);
	}
}

/*
 * Get an empty, disassociated buffer of given size.
 */
struct buf *
geteblk(int size)
{
	struct buf *bp;
	int x;

	while ((bp = getnewbuf(size)) == 0)
		;
	x = splbio();
	binshash(bp, bfreelist + BQ_AGE);
	splx(x);

	return (bp);
}

/*
 * Exchange a buffer's underlying buffer storage for one of different
 * size, taking care to maintain contents appropriately. When buffer
 * increases in size, caller is responsible for filling out additional
 * contents. When buffer shrinks in size, data is lost, so caller must
 * first return it to backing store before shrinking the buffer, as
 * no implied I/O will be done.
 *
 * Expanded buffer is returned as value.
 */
void
allocbuf(register struct buf *bp, int size)
{
	caddr_t newcontents;

	/* get new memory buffer */
#ifndef notyet
	newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
#else /* notyet */
	if (round_page(size) == size)
		newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size);
	else
		newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
#endif /* notyet */

	/* copy the old into the new, up to the maximum that will fit */
	bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size));

	/* return old contents to free heap */
#ifndef notyet
	free (bp->b_un.b_addr, M_TEMP);
#else /* notyet */
	if (round_page(bp->b_bufsize) == bp->b_bufsize)
		kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize);
	else
		free (bp->b_un.b_addr, M_TEMP);
#endif /* notyet */

	/* adjust buffer cache's idea of memory allocated to buffer contents */
	freebufspace -= size - bp->b_bufsize;
	allocbufspace += size - bp->b_bufsize;

	/* update buffer header */
	bp->b_un.b_addr = newcontents;
	bp->b_bcount = bp->b_bufsize = size;
}

/*
 * Patiently await operations to complete on this buffer.
 * When they do, extract error value and return it.
 * Extract and return any errors associated with the I/O.
 * If an invalid block, force it off the lookup hash chains.
 */
int
biowait(register struct buf *bp)
{
	int x;

	x = splbio();
	while ((bp->b_flags & B_DONE) == 0)
		sleep((caddr_t)bp, PRIBIO);
	if((bp->b_flags & B_ERROR) || bp->b_error) {
		if ((bp->b_flags & B_INVAL) == 0) {
			bp->b_flags |= B_INVAL;
			bremhash(bp);
			binshash(bp, bfreelist + BQ_AGE);
		}
		if (!bp->b_error)
			bp->b_error = EIO;
		else
			bp->b_flags |= B_ERROR;
		splx(x);
		return (bp->b_error);
	} else {
		splx(x);
		return (0);
	}
}

/*
 * Finish up operations on a buffer, calling an optional
 * function (if requested), and releasing the buffer if
 * marked asynchronous. Then mark this buffer done so that
 * others biowait()'ing for it will notice when they are
 * woken up from sleep().
 */
int
biodone(register struct buf *bp)
{
	int x;

	x = splbio();
	if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp);
	bp->b_flags &=  ~B_CALL;
	if ((bp->b_flags & (B_READ|B_DIRTY)) == B_DIRTY)  {
		bp->b_flags &=  ~B_DIRTY;
		vwakeup(bp);
	}
	if (bp->b_flags & B_ASYNC)
		brelse(bp);
	bp->b_flags &=  ~B_ASYNC;
	bp->b_flags |= B_DONE;
	wakeup(bp);
	splx(x);
}
Commit	Line	Data
9255a3b8 WJ	1	/*
	2	* Copyright (c) 1989, 1990, 1991, 1992 William F. Jolitz, TeleMuse
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	* 3. All advertising materials mentioning features or use of this software
	14	* must display the following acknowledgement:
	15	* This software is a component of "386BSD" developed by
	16	William F. Jolitz, TeleMuse.
	17	* 4. Neither the name of the developer nor the name "386BSD"
	18	* may be used to endorse or promote products derived from this software
	19	* without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
	22	* AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
	23	* SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
	24	* THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
	25	* NOT MAKE USE THIS WORK.
	26	*
	27	* FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
	28	* BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
	29	* REFERENCES SUCH AS THE "PORTING UNIX TO THE 386" SERIES
	30	* (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
	31	* JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
	32	* LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
	33	* ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
	34	* OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
	35	*
	36	* THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
	37	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	38	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	39	* ARE DISCLAIMED. IN NO EVENT SHALL THE DEVELOPER BE LIABLE
	40	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	41	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	42	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	43	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	44	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	45	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	46	* SUCH DAMAGE.
	47	*
	48	*/
	49	static char rcsid[] = "$Header: /usr/bill/working/sys/kern/RCS/vfs__bio.c,v 1.2 92/01/21 21:30:08 william Exp $";
	50
	51	#include "param.h"
	52	#include "proc.h"
	53	#include "vnode.h"
	54	#include "buf.h"
	55	#include "specdev.h"
	56	#include "mount.h"
	57	#include "malloc.h"
	58	#include "vm/vm.h"
	59	#include "resourcevar.h"
	60
	61	struct buf *getnewbuf(int);
	62	extern vm_map_t buffer_map;
	63
	64	/*
65	* Initialize buffer headers and related structures.
66	*/
67	void bufinit()
68	{
69	struct bufhd *bh;
70	struct buf *bp;
71
72	/* first, make a null hash table */
73	for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) {
74	bh->b_flags = 0;
75	bh->b_forw = (struct buf *)bh;
76	bh->b_back = (struct buf *)bh;
77	}
78
79	/* next, make a null set of free lists */
80	for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) {
81	bp->b_flags = 0;
82	bp->av_forw = bp;
83	bp->av_back = bp;
84	bp->b_forw = bp;
85	bp->b_back = bp;
86	}
87
88	/* finally, initialize each buffer header and stick on empty q */
89	for(bp = buf; bp < buf + nbuf ; bp++) {
90	bp->b_flags = B_HEAD \| B_INVAL; /* we're just an empty header */
91	bp->b_dev = NODEV;
92	bp->b_vp = 0;
93	binstailfree(bp, bfreelist + BQ_EMPTY);
94	binshash(bp, bfreelist + BQ_EMPTY);
95	}
96	}
97
98	/*
99	* Find the block in the buffer pool.
100	* If the buffer is not present, allocate a new buffer and load
101	* its contents according to the filesystem fill routine.
102	*/
103	int
104	bread(struct vnode vp, daddr_t blkno, int size, struct ucred cred,
105	struct buf **bpp)
106	{
107	struct buf *bp;
108	int rv = 0;
109
110	bp = getblk (vp, blkno, size);
111
112	/* if not found in cache, do some I/O */
113	if ((bp->b_flags & B_CACHE) == 0 \|\| (bp->b_flags & B_INVAL) != 0) {
114	bp->b_flags \|= B_READ;
115	bp->b_flags &= ~(B_DONE\|B_ERROR\|B_INVAL);
116	bp->b_rcred = cred;
117	VOP_STRATEGY(bp);
118	rv = biowait (bp);
119	}
120	*bpp = bp;
121
122	return (rv);
123	}
124
125	/*
126	* Operates like bread, but also starts I/O on the specified
127	* read-ahead block. [See page 55 of Bach's Book]
128	*/
129	int
130	breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize,
131	struct ucred cred, struct buf *bpp)
132	{
133	struct buf bp, rabp;
134	int rv = 0, needwait = 0;
135
136	bp = getblk (vp, blkno, size);
137
138	/* if not found in cache, do some I/O */
139	if ((bp->b_flags & B_CACHE) == 0 \|\| (bp->b_flags & B_INVAL) != 0) {
140	bp->b_flags \|= B_READ;
141	bp->b_flags &= ~(B_DONE\|B_ERROR\|B_INVAL);
142	bp->b_rcred = cred;
143	VOP_STRATEGY(bp);
144	needwait++;
145	}
146
147	rabp = getblk (vp, rablkno, rabsize);
148
149	/* if not found in cache, do some I/O (overlapped with first) */
150	if ((rabp->b_flags & B_CACHE) == 0 \|\| (rabp->b_flags & B_INVAL) != 0) {
151	rabp->b_flags \|= B_READ \| B_ASYNC;
152	rabp->b_flags &= ~(B_DONE\|B_ERROR\|B_INVAL);
153	rabp->b_rcred = cred;
154	VOP_STRATEGY(rabp);
155	} else
156	brelse(rabp);
157
158	/* wait for original I/O */
159	if (needwait)
160	rv = biowait (bp);
161
162	*bpp = bp;
163	return (rv);
164	}
165
166	/*
167	* Synchronous write.
168	* Release buffer on completion.
169	*/
170	int
171	bwrite(register struct buf *bp)
172	{
173	int rv;
174
175	if(bp->b_flags & B_INVAL) {
176	brelse(bp);
177	return (0);
178	} else {
179	int wasdelayed;
180
181	if(!(bp->b_flags & B_BUSY))
182	panic("bwrite: not busy");
183
184	wasdelayed = bp->b_flags & B_DELWRI;
185	bp->b_flags &= ~(B_READ\|B_DONE\|B_ERROR\|B_ASYNC\|B_DELWRI);
186	if(wasdelayed)
187	reassignbuf(bp, bp->b_vp);
188
189	bp->b_flags \|= B_DIRTY;
190	bp->b_vp->v_numoutput++;
191	VOP_STRATEGY(bp);
192	rv = biowait(bp);
193	brelse(bp);
194	return (rv);
195	}
196	}
197
198	/*
199	* Delayed write.
200	*
201	* The buffer is marked dirty, but is not queued for I/O.
202	* This routine should be used when the buffer is expected
203	* to be modified again soon, typically a small write that
204	* partially fills a buffer.
205	*
206	* NB: magnetic tapes cannot be delayed; they must be
207	* written in the order that the writes are requested.
208	*/
209	void
210	bdwrite(register struct buf *bp)
211	{
212
213	if(!(bp->b_flags & B_BUSY))
214	panic("bdwrite: not busy");
215
216	if(bp->b_flags & B_INVAL) {
217	brelse(bp);
218	}
219	if(bp->b_flags & B_TAPE) {
220	bwrite(bp);
221	return;
222	}
223	bp->b_flags &= ~(B_READ\|B_DONE);
224	bp->b_flags \|= B_DIRTY\|B_DELWRI;
225	reassignbuf(bp, bp->b_vp);
226	brelse(bp);
227	return;
228	}
229
230	/*
231	* Asynchronous write.
232	* Start I/O on a buffer, but do not wait for it to complete.
233	* The buffer is released when the I/O completes.
234	*/
235	void
236	bawrite(register struct buf *bp)
237	{
238
239	if(!(bp->b_flags & B_BUSY))
240	panic("bawrite: not busy");
241
242	if(bp->b_flags & B_INVAL)
243	brelse(bp);
244	else {
245	int wasdelayed;
246
247	wasdelayed = bp->b_flags & B_DELWRI;
248	bp->b_flags &= ~(B_READ\|B_DONE\|B_ERROR\|B_DELWRI);
249	if(wasdelayed)
250	reassignbuf(bp, bp->b_vp);
251
252	bp->b_flags \|= B_DIRTY \| B_ASYNC;
253	bp->b_vp->v_numoutput++;
254	VOP_STRATEGY(bp);
255	}
256	}
257
258	/*
259	* Release a buffer.
260	* Even if the buffer is dirty, no I/O is started.
261	*/
262	void
263	brelse(register struct buf *bp)
264	{
265	int x;
266
267	/* anyone need a "free" block? */
268	x=splbio();
269	if ((bfreelist + BQ_AGE)->b_flags & B_WANTED) {
270	(bfreelist + BQ_AGE) ->b_flags &= ~B_WANTED;
271	wakeup(bfreelist);
272	}
273	/* anyone need this very block? */
274	if (bp->b_flags & B_WANTED) {
275	bp->b_flags &= ~B_WANTED;
276	wakeup(bp);
277	}
278
279	if (bp->b_flags & (B_INVAL\|B_ERROR)) {
280	bp->b_flags \|= B_INVAL;
281	bp->b_flags &= ~(B_DELWRI\|B_CACHE);
282	if(bp->b_vp)
283	brelvp(bp);
284	}
285
286	/* enqueue */
287	/* just an empty buffer head ... */
288	/*if(bp->b_flags & B_HEAD)
289	binsheadfree(bp, bfreelist + BQ_EMPTY)*/
290	/* buffers with junk contents */
291	/else/ if(bp->b_flags & (B_ERROR\|B_INVAL\|B_NOCACHE))
292	binsheadfree(bp, bfreelist + BQ_AGE)
293	/* buffers with stale but valid contents */
294	else if(bp->b_flags & B_AGE)
295	binstailfree(bp, bfreelist + BQ_AGE)
296	/* buffers with valid and quite potentially reuseable contents */
297	else
298	binstailfree(bp, bfreelist + BQ_LRU)
299
300	/* unlock */
301	bp->b_flags &= ~B_BUSY;
302	splx(x);
303
304	}
305
306	int freebufspace;
307	int allocbufspace;
308
309	/*
310	* Find a buffer which is available for use.
311	* If free memory for buffer space and an empty header from the empty list,
312	* use that. Otherwise, select something from a free list.
313	* Preference is to AGE list, then LRU list.
314	*/
315	static struct buf *
316	getnewbuf(int sz)
317	{
318	struct buf *bp;
319	int x;
320
321	x = splbio();
322	start:
323	/* can we constitute a new buffer? */
324	if (freebufspace > sz
325	&& bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) {
326	caddr_t addr;
327
328	/#define notyet/
329	#ifndef notyet
330	if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree;
331	#else /* notyet */
332	/* get new memory buffer */
333	if (round_page(sz) == sz)
334	addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz);
335	else
336	addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK);
337	/if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;/
338	bzero(addr, sz);
339	#endif /* notyet */
340	freebufspace -= sz;
341	allocbufspace += sz;
342
343	bp = bfreelist[BQ_EMPTY].av_forw;
344	bp->b_flags = B_BUSY \| B_INVAL;
345	bremfree(bp);
346	bp->b_un.b_addr = addr;
347	goto fillin;
348	}
349
350	tryfree:
351	if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) {
352	bp = bfreelist[BQ_AGE].av_forw;
353	bremfree(bp);
354	} else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) {
355	bp = bfreelist[BQ_LRU].av_forw;
356	bremfree(bp);
357	} else {
358	/* wait for a free buffer of any kind */
359	(bfreelist + BQ_AGE)->b_flags \|= B_WANTED;
360	sleep(bfreelist, PRIBIO);
361	splx(x);
362	return (0);
363	}
364
365	/* if we are a delayed write, convert to an async write! */
366	if (bp->b_flags & B_DELWRI) {
367	bp->b_flags \|= B_BUSY;
368	bawrite (bp);
369	goto start;
370	}
371
372
373	if(bp->b_vp)
374	brelvp(bp);
375
376	/* we are not free, nor do we contain interesting data */
377	bp->b_flags = B_BUSY;
378	fillin:
379	bremhash(bp);
380	splx(x);
381	bp->b_dev = NODEV;
382	bp->b_vp = NULL;
383	bp->b_blkno = bp->b_lblkno = 0;
384	bp->b_iodone = 0;
385	bp->b_error = 0;
386	bp->b_wcred = bp->b_rcred = NOCRED;
387	if (bp->b_bufsize != sz)
388	allocbuf(bp, sz);
389	bp->b_bcount = bp->b_bufsize = sz;
390	bp->b_dirtyoff = bp->b_dirtyend = 0;
391	return (bp);
392	}
393
394	/*
395	* Check to see if a block is currently memory resident.
396	*/
397	struct buf *
398	incore(struct vnode *vp, daddr_t blkno)
399	{
400	struct buf *bh;
401	struct buf *bp;
402
403	bh = BUFHASH(vp, blkno);
404
405	/* Search hash chain */
406	bp = bh->b_forw;
407	while (bp != (struct buf *) bh) {
408	/* hit */
409	if (bp->b_lblkno == blkno && bp->b_vp == vp
410	&& (bp->b_flags & B_INVAL) == 0)
411	return (bp);
412	bp = bp->b_forw;
413	}
414
415	return(0);
416	}
417
418	/*
419	* Get a block of requested size that is associated with
420	* a given vnode and block offset. If it is found in the
421	* block cache, mark it as having been found, make it busy
422	* and return it. Otherwise, return an empty block of the
423	* correct size. It is up to the caller to insure that the
424	* cached blocks be of the correct size.
425	*/
426	struct buf *
427	getblk(register struct vnode *vp, daddr_t blkno, int size)
428	{
429	struct buf bp, bh;
430	int x;
431
432	for (;;) {
433	if (bp = incore(vp, blkno)) {
434	x = splbio();
435	if (bp->b_flags & B_BUSY) {
436	bp->b_flags \|= B_WANTED;
437	sleep (bp, PRIBIO);
438	splx(x);
439	continue;
440	}
441	bp->b_flags \|= B_BUSY \| B_CACHE;
442	bremfree(bp);
443	if (size > bp->b_bufsize)
444	panic("now what do we do?");
445	/* if (bp->b_bufsize != size) allocbuf(bp, size); */
446	} else {
447
448	if((bp = getnewbuf(size)) == 0) continue;
449	bp->b_blkno = bp->b_lblkno = blkno;
450	bgetvp(vp, bp);
451	x = splbio();
452	bh = BUFHASH(vp, blkno);
453	binshash(bp, bh);
454	bp->b_flags = B_BUSY;
455	}
456	splx(x);
457	return (bp);
458	}
459	}
460
461	/*
462	* Get an empty, disassociated buffer of given size.
463	*/
464	struct buf *
465	geteblk(int size)
466	{
467	struct buf *bp;
468	int x;
469
470	while ((bp = getnewbuf(size)) == 0)
471	;
472	x = splbio();
473	binshash(bp, bfreelist + BQ_AGE);
474	splx(x);
475
476	return (bp);
477	}
478
479	/*
480	* Exchange a buffer's underlying buffer storage for one of different
481	* size, taking care to maintain contents appropriately. When buffer
482	* increases in size, caller is responsible for filling out additional
483	* contents. When buffer shrinks in size, data is lost, so caller must
484	* first return it to backing store before shrinking the buffer, as
485	* no implied I/O will be done.
486	*
487	* Expanded buffer is returned as value.
488	*/
489	void
490	allocbuf(register struct buf *bp, int size)
491	{
492	caddr_t newcontents;
493
494	/* get new memory buffer */
495	#ifndef notyet
496	newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
497	#else /* notyet */
498	if (round_page(size) == size)
499	newcontents = (caddr_t) kmem_alloc_wired_wait(buffer_map, size);
500	else
501	newcontents = (caddr_t) malloc (size, M_TEMP, M_WAITOK);
502	#endif /* notyet */
503
504	/* copy the old into the new, up to the maximum that will fit */
505	bcopy (bp->b_un.b_addr, newcontents, min(bp->b_bufsize, size));
506
507	/* return old contents to free heap */
508	#ifndef notyet
509	free (bp->b_un.b_addr, M_TEMP);
510	#else /* notyet */
511	if (round_page(bp->b_bufsize) == bp->b_bufsize)
512	kmem_free_wakeup(buffer_map, bp->b_un.b_addr, bp->b_bufsize);
513	else
514	free (bp->b_un.b_addr, M_TEMP);
515	#endif /* notyet */
516
517	/* adjust buffer cache's idea of memory allocated to buffer contents */
518	freebufspace -= size - bp->b_bufsize;
519	allocbufspace += size - bp->b_bufsize;
520
521	/* update buffer header */
522	bp->b_un.b_addr = newcontents;
523	bp->b_bcount = bp->b_bufsize = size;
524	}
525
526	/*
527	* Patiently await operations to complete on this buffer.
528	* When they do, extract error value and return it.
529	* Extract and return any errors associated with the I/O.
530	* If an invalid block, force it off the lookup hash chains.
531	*/
532	int
533	biowait(register struct buf *bp)
534	{
535	int x;
536
537	x = splbio();
538	while ((bp->b_flags & B_DONE) == 0)
539	sleep((caddr_t)bp, PRIBIO);
540	if((bp->b_flags & B_ERROR) \|\| bp->b_error) {
541	if ((bp->b_flags & B_INVAL) == 0) {
542	bp->b_flags \|= B_INVAL;
543	bremhash(bp);
544	binshash(bp, bfreelist + BQ_AGE);
545	}
546	if (!bp->b_error)
547	bp->b_error = EIO;
548	else
549	bp->b_flags \|= B_ERROR;
550	splx(x);
551	return (bp->b_error);
552	} else {
553	splx(x);
554	return (0);
555	}
556	}
557
558	/*
559	* Finish up operations on a buffer, calling an optional
560	* function (if requested), and releasing the buffer if
561	* marked asynchronous. Then mark this buffer done so that
562	* others biowait()'ing for it will notice when they are
563	* woken up from sleep().
564	*/
565	int
566	biodone(register struct buf *bp)
567	{
568	int x;
569
570	x = splbio();
571	if (bp->b_flags & B_CALL) (*bp->b_iodone)(bp);
572	bp->b_flags &= ~B_CALL;
573	if ((bp->b_flags & (B_READ\|B_DIRTY)) == B_DIRTY) {
574	bp->b_flags &= ~B_DIRTY;
575	vwakeup(bp);
576	}
577	if (bp->b_flags & B_ASYNC)
578	brelse(bp);
579	bp->b_flags &= ~B_ASYNC;
580	bp->b_flags \|= B_DONE;
581	wakeup(bp);
582	splx(x);
583	}