* Copyright (c) 1982, 1986, 1989 Regents of the University of California.
* %sccs.include.redist.c%
* @(#)vfs_cluster.c 7.37 (Berkeley) %G%
* Find the block in the buffer pool.
* If the buffer is not present, allocate a new buffer and load
* its contents according to the filesystem fill routine.
bread(vp
, blkno
, size
, cred
, bpp
)
struct proc
*p
= curproc
; /* XXX */
bp
= getblk(dev
, blkno
, size
, secsize
);
*bpp
= bp
= getblk(vp
, blkno
, size
);
if (bp
->b_flags
& (B_DONE
| B_DELWRI
)) {
trace(TR_BREADHIT
, pack(vp
, size
), blkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
if (bp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISS
, pack(vp
, size
), blkno
);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay for read */
* Operates like bread, but also starts I/O on the specified
breada(vp
, blkno
, size
, rablkno
, rabsize
, cred
, bpp
)
daddr_t rablkno
; int rabsize
;
struct proc
*p
= curproc
; /* XXX */
register struct buf
*bp
, *rabp
;
* If the block is not memory resident,
* allocate a buffer and start I/O.
if (!incore(vp
, blkno
)) {
*bpp
= bp
= getblk(vp
, blkno
, size
);
if ((bp
->b_flags
& (B_DONE
| B_DELWRI
)) == 0) {
if (bp
->b_bcount
> bp
->b_bufsize
)
if (bp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISS
, pack(vp
, size
), blkno
);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay for read */
trace(TR_BREADHIT
, pack(vp
, size
), blkno
);
* If there is a read-ahead block, start I/O on it too.
if (!incore(vp
, rablkno
)) {
rabp
= getblk(vp
, rablkno
, rabsize
);
if (rabp
->b_flags
& (B_DONE
| B_DELWRI
)) {
trace(TR_BREADHITRA
, pack(vp
, rabsize
), rablkno
);
rabp
->b_flags
|= B_ASYNC
| B_READ
;
if (rabp
->b_bcount
> rabp
->b_bufsize
)
if (rabp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISSRA
, pack(vp
, rabsize
), rablkno
);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay in advance */
* If block was memory resident, let bread get it.
* If block was not memory resident, the read was
* started above, so just wait for the read to complete.
return (bread(dev
, blkno
, size
, secsize
));
return (bread(vp
, blkno
, size
, cred
, bpp
));
* Release buffer on completion.
struct proc
*p
= curproc
; /* XXX */
bp
->b_flags
&= ~(B_READ
| B_DONE
| B_ERROR
| B_DELWRI
);
if ((flag
& B_DELWRI
) == 0)
p
->p_stats
->p_ru
.ru_oublock
++; /* no one paid yet */
reassignbuf(bp
, bp
->b_vp
);
trace(TR_BWRITE
, pack(bp
->b_vp
, bp
->b_bcount
), bp
->b_lblkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
* If the write was synchronous, then await I/O completion.
* If the write was "delayed", then we put the buffer on
* the queue of blocks awaiting I/O completion status.
if ((flag
& B_ASYNC
) == 0) {
} else if (flag
& B_DELWRI
) {
* The buffer is marked dirty, but is not queued for I/O.
* This routine should be used when the buffer is expected
* to be modified again soon, typically a small write that
* partially fills a buffer.
* NB: magnetic tapes cannot be delayed; they must be
* written in the order that the writes are requested.
struct proc
*p
= curproc
; /* XXX */
if ((bp
->b_flags
& B_DELWRI
) == 0) {
reassignbuf(bp
, bp
->b_vp
);
p
->p_stats
->p_ru
.ru_oublock
++; /* no one paid yet */
* If this is a tape drive, the write must be initiated.
if (bdevsw
[major(bp
->b_dev
)].d_flags
& B_TAPE
)
bp
->b_flags
|= (B_DONE
| B_DELWRI
);
* Start I/O on a buffer, but do not wait for it to complete.
* The buffer is released when the I/O completes.
* Setting the ASYNC flag causes bwrite to return
* after starting the I/O.
* Even if the buffer is dirty, no I/O is started.
register struct buf
*flist
;
trace(TR_BRELSE
, pack(bp
->b_vp
, bp
->b_bufsize
), bp
->b_lblkno
);
* If a process is waiting for the buffer, or
* is waiting for a free buffer, awaken it.
if (bp
->b_flags
& B_WANTED
)
if (bfreelist
[0].b_flags
& B_WANTED
) {
bfreelist
[0].b_flags
&= ~B_WANTED
;
wakeup((caddr_t
)bfreelist
);
* Retry I/O for locked buffers rather than invalidating them.
if ((bp
->b_flags
& B_ERROR
) && (bp
->b_flags
& B_LOCKED
))
* Disassociate buffers that are no longer valid.
if (bp
->b_flags
& (B_NOCACHE
| B_ERROR
))
if ((bp
->b_bufsize
<= 0) || (bp
->b_flags
& (B_ERROR
| B_INVAL
))) {
bp
->b_flags
&= ~B_DELWRI
;
* Stick the buffer back on a free list.
if (bp
->b_bufsize
<= 0) {
/* block has no buffer ... put at front of unused buffer list */
flist
= &bfreelist
[BQ_EMPTY
];
} else if (bp
->b_flags
& (B_ERROR
| B_INVAL
)) {
/* block has no info ... put at front of most free list */
flist
= &bfreelist
[BQ_AGE
];
if (bp
->b_flags
& B_LOCKED
)
flist
= &bfreelist
[BQ_LOCKED
];
else if (bp
->b_flags
& B_AGE
)
flist
= &bfreelist
[BQ_AGE
];
flist
= &bfreelist
[BQ_LRU
];
bp
->b_flags
&= ~(B_WANTED
| B_BUSY
| B_ASYNC
| B_AGE
| B_NOCACHE
);
* Check to see if a block is currently memory resident.
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
)
if (bp
->b_lblkno
== blkno
&& bp
->b_vp
== vp
&&
(bp
->b_flags
& B_INVAL
) == 0)
* Check to see if a block is currently memory resident.
* If it is resident, return it. If it is not resident,
* allocate a new buffer and assign it to the block.
getblk(dev
, blkno
, size
, secsize
)
register struct vnode
*vp
;
register struct buf
*bp
, *dp
;
panic("getblk: size too big");
* Search the cache for the block. If the buffer is found,
* but it is currently locked, the we must wait for it to
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
) {
if (bp
->b_lblkno
!= blkno
|| bp
->b_vp
!= vp
||
if (bp
->b_flags
& B_BUSY
) {
sleep((caddr_t
)bp
, PRIBIO
+ 1);
if (bp
->b_bcount
!= size
) {
printf("getblk: stray size");
* The caller will assign it to a block.
register struct buf
*bp
, *flist
;
panic("geteblk: size too big");
flist
= &bfreelist
[BQ_AGE
];
bp
->b_blksize
= DEV_BSIZE
;
* Expand or contract the actual memory allocated to a buffer.
* If no memory is available, release buffer and take error exit.
register struct buf
*bp
, *ep
;
sizealloc
= roundup(size
, CLBYTES
);
* Buffer size does not change
if (sizealloc
== tp
->b_bufsize
)
* Buffer size is shrinking.
* Place excess space in a buffer header taken from the
* BQ_EMPTY buffer list and placed on the "most free" list.
* If no extra buffer headers are available, leave the
* extra space in the present buffer.
if (sizealloc
< tp
->b_bufsize
) {
ep
= bfreelist
[BQ_EMPTY
].av_forw
;
if (ep
== &bfreelist
[BQ_EMPTY
])
pagemove(tp
->b_un
.b_addr
+ sizealloc
, ep
->b_un
.b_addr
,
(int)tp
->b_bufsize
- sizealloc
);
ep
->b_bufsize
= tp
->b_bufsize
- sizealloc
;
tp
->b_bufsize
= sizealloc
;
* More buffer space is needed. Get it out of buffers on
* the "most free" list, placing the empty headers on the
* BQ_EMPTY buffer header list.
while (tp
->b_bufsize
< sizealloc
) {
take
= sizealloc
- tp
->b_bufsize
;
if (take
>= bp
->b_bufsize
)
pagemove(&bp
->b_un
.b_addr
[bp
->b_bufsize
- take
],
&tp
->b_un
.b_addr
[tp
->b_bufsize
], take
);
bp
->b_bufsize
= bp
->b_bufsize
- take
;
if (bp
->b_bcount
> bp
->b_bufsize
)
bp
->b_bcount
= bp
->b_bufsize
;
if (bp
->b_bufsize
<= 0) {
binshash(bp
, &bfreelist
[BQ_EMPTY
]);
* Find a buffer which is available for use.
* Select something from a free list.
* Preference is to AGE list, then LRU list.
register struct buf
*bp
, *dp
;
register struct ucred
*cred
;
for (dp
= &bfreelist
[BQ_AGE
]; dp
> bfreelist
; dp
--)
if (dp
== bfreelist
) { /* no free blocks */
sleep((caddr_t
)dp
, PRIBIO
+ 1);
if (bp
->b_flags
& B_DELWRI
) {
trace(TR_BRELSE
, pack(bp
->b_vp
, bp
->b_bufsize
), bp
->b_lblkno
);
if (bp
->b_rcred
!= NOCRED
) {
if (bp
->b_wcred
!= NOCRED
) {
bp
->b_dirtyoff
= bp
->b_dirtyend
= 0;
* Wait for I/O to complete.
* Extract and return any errors associated with the I/O.
* If the error flag is set, but no specific error is
while ((bp
->b_flags
& B_DONE
) == 0)
sleep((caddr_t
)bp
, PRIBIO
);
if ((bp
->b_flags
& B_ERROR
) == 0)
* Mark I/O complete on a buffer.
* If a callback has been requested, e.g. the pageout
* daemon, do so. Otherwise, awaken waiting processes.
register struct vnode
*vp
;
if (bp
->b_flags
& B_DONE
)
if ((bp
->b_flags
& B_READ
) == 0) {
bp
->b_dirtyoff
= bp
->b_dirtyend
= 0;
if ((vp
->v_flag
& VBWAIT
) && vp
->v_numoutput
<= 0) {
panic("biodone: neg numoutput");
wakeup((caddr_t
)&vp
->v_numoutput
);
if (bp
->b_flags
& B_CALL
) {
if (bp
->b_flags
& B_ASYNC
)
bp
->b_flags
&= ~B_WANTED
;
* Make sure all write-behind blocks associated
* with mount point are flushed out (from sync).
mntflushbuf(mountp
, flags
)
register struct vnode
*vp
;
if ((mountp
->mnt_flag
& MNT_MPBUSY
) == 0)
panic("mntflushbuf: not busy");
for (vp
= mountp
->mnt_mounth
; vp
; vp
= vp
->v_mountf
) {
if (vp
->v_mount
!= mountp
)
* Flush all dirty buffers associated with a vnode.
register struct vnode
*vp
;
for (bp
= vp
->v_dirtyblkhd
; bp
; bp
= nbp
) {
if ((bp
->b_flags
& B_BUSY
))
if ((bp
->b_flags
& B_DELWRI
) == 0)
panic("vflushbuf: not dirty");
* Wait for I/O associated with indirect blocks to complete,
* since there is no way to quickly wait for them below.
* NB: This is really specific to ufs, but is done here
* as it is easier and quicker.
if (bp
->b_vp
== vp
|| (flags
& B_SYNC
) == 0) {
if ((flags
& B_SYNC
) == 0)
while (vp
->v_numoutput
) {
sleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+ 1);
vprint("vflushbuf: dirty", vp
);
* Invalidate in core blocks belonging to closed or umounted filesystem
* Go through the list of vnodes associated with the file system;
* for each vnode invalidate any buffers that it holds. Normally
* this routine is preceeded by a bflush call, so that on a quiescent
* filesystem there will be no dirty buffers when we are done. Binval
* returns the count of dirty buffers when it is finished.
register struct vnode
*vp
;
if ((mountp
->mnt_flag
& MNT_MPBUSY
) == 0)
panic("mntinvalbuf: not busy");
for (vp
= mountp
->mnt_mounth
; vp
; vp
= vp
->v_mountf
) {
dirty
+= vinvalbuf(vp
, 1);
if (vp
->v_mount
!= mountp
)
* Flush out and invalidate all buffers associated with a vnode.
* Called with the underlying object locked.
register struct vnode
*vp
;
if (blist
= vp
->v_dirtyblkhd
)
else if (blist
= vp
->v_cleanblkhd
)
for (bp
= blist
; bp
; bp
= nbp
) {
if (bp
->b_flags
& B_BUSY
) {
sleep((caddr_t
)bp
, PRIBIO
+ 1);
if (save
&& (bp
->b_flags
& B_DELWRI
)) {
reassignbuf(bp
, bp
->b_vp
);
if (vp
->v_dirtyblkhd
|| vp
->v_cleanblkhd
)
panic("vinvalbuf: flush failed");
* Associate a buffer with a vnode.
register struct vnode
*vp
;
panic("bgetvp: not free");
if (vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
)
* Insert onto list for new vnode.
bp
->b_blockf
= vp
->v_cleanblkhd
;
bp
->b_blockb
= &vp
->v_cleanblkhd
;
vp
->v_cleanblkhd
->b_blockb
= &bp
->b_blockf
;
bp
->b_blockb
= &vp
->v_cleanblkhd
;
* Disassociate a buffer from a vnode.
if (bp
->b_vp
== (struct vnode
*) 0)
* Delete from old vnode list, if on one.
bq
->b_blockb
= bp
->b_blockb
;
bp
->b_vp
= (struct vnode
*) 0;
* Reassign a buffer from one vnode to another.
* Used to assign file specific control information
* (indirect blocks) to the vnode to which they belong.
register struct vnode
*newvp
;
register struct buf
*bq
, **listheadp
;
panic("reassignbuf: NULL");
* Delete from old vnode list, if on one.
bq
->b_blockb
= bp
->b_blockb
;
* If dirty, put on list of dirty buffers;
* otherwise insert onto list of clean buffers.
if (bp
->b_flags
& B_DELWRI
)
listheadp
= &newvp
->v_dirtyblkhd
;
listheadp
= &newvp
->v_cleanblkhd
;
bp
->b_blockf
= *listheadp
;
bp
->b_blockb
= listheadp
;
bp
->b_blockf
->b_blockb
= &bp
->b_blockf
;
bp
->b_blockb
= listheadp
;