* Copyright (c) 1982, 1986, 1989 Regents of the University of California.
* Redistribution and use in source and binary forms are permitted
* provided that the above copyright notice and this paragraph are
* duplicated in all such forms and that any documentation,
* advertising materials, and other materials related to such
* distribution and use acknowledge that the software was developed
* by the University of California, Berkeley. The name of the
* University may not be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
* @(#)vfs_cluster.c 7.23 (Berkeley) %G%
* Read in (if necessary) the block and return a buffer pointer.
bread(vp
, blkno
, size
, cred
, bpp
)
bp
= getblk(dev
, blkno
, size
, secsize
);
*bpp
= bp
= getblk(vp
, blkno
, size
);
if (bp
->b_flags
&(B_DONE
|B_DELWRI
)) {
trace(TR_BREADHIT
, pack(vp
, size
), blkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
if (bp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISS
, pack(vp
, size
), blkno
);
u
.u_ru
.ru_inblock
++; /* pay for read */
* Read in the block, like bread, but also start I/O on the
* read-ahead block (which is not allocated to the caller)
breada(vp
, blkno
, size
, rablkno
, rabsize
, cred
, bpp
)
daddr_t rablkno
; int rabsize
;
register struct buf
*bp
, *rabp
;
* If the block isn't in core, then allocate
* a buffer and initiate i/o (getblk checks
if (!incore(vp
, blkno
)) {
*bpp
= bp
= getblk(vp
, blkno
, size
);
if ((bp
->b_flags
&(B_DONE
|B_DELWRI
)) == 0) {
if (bp
->b_bcount
> bp
->b_bufsize
)
if (bp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISS
, pack(vp
, size
), blkno
);
u
.u_ru
.ru_inblock
++; /* pay for read */
trace(TR_BREADHIT
, pack(vp
, size
), blkno
);
* If there's a read-ahead block, start i/o
if (!incore(vp
, rablkno
)) {
rabp
= getblk(vp
, rablkno
, rabsize
);
if (rabp
->b_flags
& (B_DONE
|B_DELWRI
)) {
trace(TR_BREADHITRA
, pack(vp
, rabsize
), rablkno
);
rabp
->b_flags
|= B_READ
|B_ASYNC
;
if (rabp
->b_bcount
> rabp
->b_bufsize
)
if (rabp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISSRA
, pack(vp
, rabsize
), rablkno
);
u
.u_ru
.ru_inblock
++; /* pay in advance */
* If block was in core, let bread get it.
* If block wasn't in core, then the read was started
* above, and just wait for it.
return (bread(dev
, blkno
, size
, secsize
));
return (bread(vp
, blkno
, size
, cred
, bpp
));
* Write the buffer, waiting for completion.
* Then release the buffer.
bp
->b_flags
&= ~(B_READ
| B_DONE
| B_ERROR
| B_DELWRI
);
if ((flag
&B_DELWRI
) == 0)
u
.u_ru
.ru_oublock
++; /* noone paid yet */
reassignbuf(bp
, bp
->b_vp
);
trace(TR_BWRITE
, pack(bp
->b_vp
, bp
->b_bcount
), bp
->b_lblkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
* If the write was synchronous, then await i/o completion.
* If the write was "delayed", then we put the buffer on
* the q of blocks awaiting i/o completion status.
if ((flag
&B_ASYNC
) == 0) {
} else if (flag
& B_DELWRI
) {
* Release the buffer, marking it so that if it is grabbed
* for another purpose it will be written out before being
* given up (e.g. when writing a partial block where it is
* assumed that another write for the same block will soon follow).
* This can't be done for magtape, since writes must be done
* in the same order as requested.
if ((bp
->b_flags
& B_DELWRI
) == 0) {
reassignbuf(bp
, bp
->b_vp
);
u
.u_ru
.ru_oublock
++; /* noone paid yet */
* If this is a tape drive, the write must be initiated.
if (bdevsw
[major(bp
->b_dev
)].d_flags
& B_TAPE
)
bp
->b_flags
|= B_DELWRI
| B_DONE
;
* Release the buffer, start I/O on it, but don't wait for completion.
* Release the buffer, with no I/O implied.
register struct buf
*flist
;
trace(TR_BRELSE
, pack(bp
->b_vp
, bp
->b_bufsize
), bp
->b_lblkno
);
* If a process is waiting for the buffer, or
* is waiting for a free buffer, awaken it.
if (bp
->b_flags
&B_WANTED
)
if (bfreelist
[0].b_flags
&B_WANTED
) {
bfreelist
[0].b_flags
&= ~B_WANTED
;
wakeup((caddr_t
)bfreelist
);
* Retry I/O for locked buffers rather than invalidating them.
if ((bp
->b_flags
& B_ERROR
) && (bp
->b_flags
& B_LOCKED
))
* Disassociate buffers that are no longer valid.
if (bp
->b_flags
& (B_NOCACHE
|B_ERROR
))
if ((bp
->b_bufsize
<= 0) || (bp
->b_flags
& (B_ERROR
|B_INVAL
))) {
bp
->b_flags
&= ~B_DELWRI
;
* Stick the buffer back on a free list.
if (bp
->b_bufsize
<= 0) {
/* block has no buffer ... put at front of unused buffer list */
flist
= &bfreelist
[BQ_EMPTY
];
} else if (bp
->b_flags
& (B_ERROR
|B_INVAL
)) {
/* block has no info ... put at front of most free list */
flist
= &bfreelist
[BQ_AGE
];
if (bp
->b_flags
& B_LOCKED
)
flist
= &bfreelist
[BQ_LOCKED
];
else if (bp
->b_flags
& B_AGE
)
flist
= &bfreelist
[BQ_AGE
];
flist
= &bfreelist
[BQ_LRU
];
bp
->b_flags
&= ~(B_WANTED
|B_BUSY
|B_ASYNC
|B_AGE
|B_NOCACHE
);
* See if the block is associated with some buffer
* (mainly to avoid getting hung up on a wait in breada)
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
)
if (bp
->b_lblkno
== blkno
&& bp
->b_vp
== vp
&&
(bp
->b_flags
& B_INVAL
) == 0)
* Return a block if it is in memory.
baddr(vp
, blkno
, size
, cred
, bpp
)
return (bread(vp
, blkno
, size
, cred
, bpp
));
* Assign a buffer for the given block. If the appropriate
* block is already associated, return it; otherwise search
* for the oldest non-busy buffer and reassign it.
* We use splx here because this routine may be called
* on the interrupt stack during a dump, and we don't
* want to lower the ipl back to 0.
getblk(dev
, blkno
, size
, secsize
)
register struct vnode
*vp
;
register struct buf
*bp
, *dp
;
panic("getblk: size too big");
* To prevent overflow of 32-bit ints when converting block
* numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
* to the maximum number that can be converted to a byte offset
* without overflow. This is historic code; what bug it fixed,
* or whether it is still a reasonable thing to do is open to
* Make it a panic to see if it ever really happens. mkm 11/89
if ((unsigned)blkno
>= 1 << (sizeof(int)*NBBY
-DEV_BSHIFT
)) {
panic("getblk: blkno too big");
blkno
= 1 << ((sizeof(int)*NBBY
-DEV_BSHIFT
) + 1);
* Search the cache for the block. If we hit, but
* the buffer is in use for i/o, then we wait until
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
) {
if (bp
->b_lblkno
!= blkno
|| bp
->b_vp
!= vp
||
if (bp
->b_flags
&B_BUSY
) {
sleep((caddr_t
)bp
, PRIBIO
+1);
if (bp
->b_bcount
!= size
) {
printf("getblk: stray size");
* not assigned to any particular device
register struct buf
*bp
, *flist
;
panic("geteblk: size too big");
flist
= &bfreelist
[BQ_AGE
];
bp
->b_blksize
= DEV_BSIZE
;
* Allocate space associated with a buffer.
if (size
== bp
->b_bcount
)
* Find a buffer which is available for use.
* Select something from a free list.
* Preference is to AGE list, then LRU list.
register struct buf
*bp
, *dp
;
register struct ucred
*cred
;
for (dp
= &bfreelist
[BQ_AGE
]; dp
> bfreelist
; dp
--)
if (dp
== bfreelist
) { /* no free blocks */
sleep((caddr_t
)dp
, PRIBIO
+1);
if (bp
->b_flags
& B_DELWRI
) {
trace(TR_BRELSE
, pack(bp
->b_vp
, bp
->b_bufsize
), bp
->b_lblkno
);
if (bp
->b_rcred
!= NOCRED
) {
if (bp
->b_wcred
!= NOCRED
) {
* Wait for I/O completion on the buffer; return errors
while ((bp
->b_flags
& B_DONE
) == 0)
sleep((caddr_t
)bp
, PRIBIO
);
* Pick up the device's error number and pass it to the user;
* if there is an error but the number is 0 set a generalized code.
if ((bp
->b_flags
& B_ERROR
) == 0)
* Mark I/O complete on a buffer.
* If someone should be called, e.g. the pageout
* daemon, do so. Otherwise, wake up anyone
register struct vnode
*vp
;
if (bp
->b_flags
& B_DONE
)
if ((bp
->b_flags
& B_READ
) == 0) {
bp
->b_dirtyoff
= bp
->b_dirtyend
= 0;
if ((vp
->v_flag
& VBWAIT
) && vp
->v_numoutput
<= 0) {
panic("biodone: neg numoutput");
wakeup((caddr_t
)&vp
->v_numoutput
);
if (bp
->b_flags
& B_CALL
) {
bp
->b_flags
&= ~B_WANTED
;
* Make sure all write-behind blocks associated
* with mount point are flushed out (from sync).
mntflushbuf(mountp
, flags
)
register struct vnode
*vp
;
for (vp
= mountp
->m_mounth
; vp
; vp
= nvp
) {
* Flush all dirty buffers associated with a vnode.
register struct vnode
*vp
;
for (bp
= vp
->v_dirtyblkhd
; bp
; bp
= nbp
) {
if ((bp
->b_flags
& B_BUSY
))
if ((bp
->b_flags
& B_DELWRI
) == 0)
panic("vflushbuf: not dirty");
* Wait for I/O associated with indirect blocks to complete,
* since there is no way to quickly wait for them below.
* NB - This is really specific to ufs, but is done here
* as it is easier and quicker.
if (bp
->b_vp
== vp
|| (flags
& B_SYNC
) == 0) {
if ((flags
& B_SYNC
) == 0)
while (vp
->v_numoutput
) {
sleep((caddr_t
)&vp
->v_numoutput
, PRIBIO
+1);
vprint("vflushbuf: dirty", vp
);
* Invalidate in core blocks belonging to closed or umounted filesystem
* Go through the list of vnodes associated with the file system;
* for each vnode invalidate any buffers that it holds. Normally
* this routine is preceeded by a bflush call, so that on a quiescent
* filesystem there will be no dirty buffers when we are done. Binval
* returns the count of dirty buffers when it is finished.
register struct vnode
*vp
;
for (vp
= mountp
->m_mounth
; vp
; vp
= nvp
) {
dirty
+= vinvalbuf(vp
, 1);
* Flush out and invalidate all buffers associated with a vnode.
* Called with the underlying object locked.
register struct vnode
*vp
;
if (blist
= vp
->v_dirtyblkhd
)
else if (blist
= vp
->v_cleanblkhd
)
for (bp
= blist
; bp
; bp
= nbp
) {
if (bp
->b_flags
& B_BUSY
) {
sleep((caddr_t
)bp
, PRIBIO
+1);
if (save
&& (bp
->b_flags
& B_DELWRI
)) {
reassignbuf(bp
, bp
->b_vp
);
if (vp
->v_dirtyblkhd
|| vp
->v_cleanblkhd
)
panic("vinvalbuf: flush failed");
* Associate a buffer with a vnode.
register struct vnode
*vp
;
panic("bgetvp: not free");
if (vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
)
* Insert onto list for new vnode.
bp
->b_blockf
= vp
->v_cleanblkhd
;
bp
->b_blockb
= &vp
->v_cleanblkhd
;
vp
->v_cleanblkhd
->b_blockb
= &bp
->b_blockf
;
bp
->b_blockb
= &vp
->v_cleanblkhd
;
* Disassociate a buffer from a vnode.
if (bp
->b_vp
== (struct vnode
*) 0)
* Delete from old vnode list, if on one.
bq
->b_blockb
= bp
->b_blockb
;
bp
->b_vp
= (struct vnode
*) 0;
* Reassign a buffer from one vnode to another.
* Used to assign file specific control information
* (indirect blocks) to the vnode to which they belong.
register struct vnode
*newvp
;
register struct buf
*bq
, **listheadp
;
panic("reassignbuf: NULL");
* Delete from old vnode list, if on one.
bq
->b_blockb
= bp
->b_blockb
;
* If dirty, put on list of dirty buffers;
* otherwise insert onto list of clean buffers.
if (bp
->b_flags
& B_DELWRI
)
listheadp
= &newvp
->v_dirtyblkhd
;
listheadp
= &newvp
->v_cleanblkhd
;
bp
->b_blockf
= *listheadp
;
bp
->b_blockb
= listheadp
;
bp
->b_blockf
->b_blockb
= &bp
->b_blockf
;
bp
->b_blockb
= listheadp
;