* Copyright (c) 1982, 1986 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
* @(#)vfs_bio.c 7.3 (Berkeley) %G%
#include "../machine/pte.h"
* Read in (if necessary) the block and return a buffer pointer.
bread(dev
, blkno
, size
, secsize
)
bp
= getblk(dev
, blkno
, size
, secsize
);
bp
= getblk(dev
, blkno
, size
);
if (bp
->b_flags
&(B_DONE
|B_DELWRI
)) {
trace(TR_BREADHIT
, pack(dev
, size
), blkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
(*bdevsw
[major(dev
)].d_strategy
)(bp
);
trace(TR_BREADMISS
, pack(dev
, size
), blkno
);
u
.u_ru
.ru_inblock
++; /* pay for read */
* Read in the block, like bread, but also start I/O on the
* read-ahead block (which is not allocated to the caller)
breada(dev
, blkno
, size
, secsize
, rablkno
, rabsize
)
breada(dev
, blkno
, size
, rablkno
, rabsize
)
daddr_t rablkno
; int rabsize
;
register struct buf
*bp
, *rabp
;
* If the block isn't in core, then allocate
* a buffer and initiate i/o (getblk checks
if (!incore(dev
, blkno
)) {
bp
= getblk(dev
, blkno
, size
, secsize
);
bp
= getblk(dev
, blkno
, size
);
if ((bp
->b_flags
&(B_DONE
|B_DELWRI
)) == 0) {
if (bp
->b_bcount
> bp
->b_bufsize
)
(*bdevsw
[major(dev
)].d_strategy
)(bp
);
trace(TR_BREADMISS
, pack(dev
, size
), blkno
);
u
.u_ru
.ru_inblock
++; /* pay for read */
trace(TR_BREADHIT
, pack(dev
, size
), blkno
);
* If there's a read-ahead block, start i/o
if (rablkno
&& !incore(dev
, rablkno
)) {
rabp
= getblk(dev
, rablkno
, rabsize
, secsize
);
rabp
= getblk(dev
, rablkno
, rabsize
);
if (rabp
->b_flags
& (B_DONE
|B_DELWRI
)) {
trace(TR_BREADHITRA
, pack(dev
, rabsize
), blkno
);
rabp
->b_flags
|= B_READ
|B_ASYNC
;
if (rabp
->b_bcount
> rabp
->b_bufsize
)
(*bdevsw
[major(dev
)].d_strategy
)(rabp
);
trace(TR_BREADMISSRA
, pack(dev
, rabsize
), rablock
);
u
.u_ru
.ru_inblock
++; /* pay in advance */
* If block was in core, let bread get it.
* If block wasn't in core, then the read was started
* above, and just wait for it.
return (bread(dev
, blkno
, size
, secsize
));
return (bread(dev
, blkno
, size
));
* Write the buffer, waiting for completion.
* Then release the buffer.
bp
->b_flags
&= ~(B_READ
| B_DONE
| B_ERROR
| B_DELWRI
);
if ((flag
&B_DELWRI
) == 0)
u
.u_ru
.ru_oublock
++; /* noone paid yet */
trace(TR_BWRITE
, pack(bp
->b_dev
, bp
->b_bcount
), bp
->b_blkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
(*bdevsw
[major(bp
->b_dev
)].d_strategy
)(bp
);
* If the write was synchronous, then await i/o completion.
* If the write was "delayed", then we put the buffer on
* the q of blocks awaiting i/o completion status.
if ((flag
&B_ASYNC
) == 0) {
} else if (flag
& B_DELWRI
)
* Release the buffer, marking it so that if it is grabbed
* for another purpose it will be written out before being
* given up (e.g. when writing a partial block where it is
* assumed that another write for the same block will soon follow).
* This can't be done for magtape, since writes must be done
* in the same order as requested.
if ((bp
->b_flags
&B_DELWRI
) == 0)
u
.u_ru
.ru_oublock
++; /* noone paid yet */
if (bdevsw
[major(bp
->b_dev
)].d_flags
& B_TAPE
)
bp
->b_flags
|= B_DELWRI
| B_DONE
;
* Release the buffer, start I/O on it, but don't wait for completion.
* Release the buffer, with no I/O implied.
register struct buf
*flist
;
trace(TR_BRELSE
, pack(bp
->b_dev
, bp
->b_bufsize
), bp
->b_blkno
);
* If someone's waiting for the buffer, or
* is waiting for a buffer wake 'em up.
if (bp
->b_flags
&B_WANTED
)
if (bfreelist
[0].b_flags
&B_WANTED
) {
bfreelist
[0].b_flags
&= ~B_WANTED
;
wakeup((caddr_t
)bfreelist
);
if (bp
->b_flags
& B_LOCKED
)
bp
->b_flags
&= ~B_ERROR
; /* try again later */
bp
->b_dev
= NODEV
; /* no assoc */
* Stick the buffer back on a free list.
if (bp
->b_bufsize
<= 0) {
/* block has no buffer ... put at front of unused buffer list */
flist
= &bfreelist
[BQ_EMPTY
];
} else if (bp
->b_flags
& (B_ERROR
|B_INVAL
)) {
/* block has no info ... put at front of most free list */
flist
= &bfreelist
[BQ_AGE
];
if (bp
->b_flags
& B_LOCKED
)
flist
= &bfreelist
[BQ_LOCKED
];
else if (bp
->b_flags
& B_AGE
)
flist
= &bfreelist
[BQ_AGE
];
flist
= &bfreelist
[BQ_LRU
];
bp
->b_flags
&= ~(B_WANTED
|B_BUSY
|B_ASYNC
|B_AGE
);
* See if the block is associated with some buffer
* (mainly to avoid getting hung up on a wait in breada)
dp
= BUFHASH(dev
, blkno
);
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
)
if (bp
->b_blkno
== blkno
&& bp
->b_dev
== dev
&&
(bp
->b_flags
& B_INVAL
) == 0)
baddr(dev
, blkno
, size
, secsize
)
return (bread(dev
, blkno
, size
, secsize
));
return (bread(dev
, blkno
, size
));
* Assign a buffer for the given block. If the appropriate
* block is already associated, return it; otherwise search
* for the oldest non-busy buffer and reassign it.
* If we find the buffer, but it is dirty (marked DELWRI) and
* its size is changing, we must write it out first. When the
* buffer is shrinking, the write is done by brealloc to avoid
* losing the unwritten data. When the buffer is growing, the
* write is done by getblk, so that bread will not read stale
* disk data over the modified data in the buffer.
* We use splx here because this routine may be called
* on the interrupt stack during a dump, and we don't
* want to lower the ipl back to 0.
getblk(dev
, blkno
, size
, secsize
)
register struct buf
*bp
, *dp
;
panic("getblk: size too big");
* To prevent overflow of 32-bit ints when converting block
* numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set
* to the maximum number that can be converted to a byte offset
* without overflow. This is historic code; what bug it fixed,
* or whether it is still a reasonable thing to do is open to
if ((unsigned)blkno
>= 1 << (sizeof(int)*NBBY
-DEV_BSHIFT
))
blkno
= 1 << ((sizeof(int)*NBBY
-DEV_BSHIFT
) + 1);
* Search the cache for the block. If we hit, but
* the buffer is in use for i/o, then we wait until
dp
= BUFHASH(dev
, blkno
);
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
) {
if (bp
->b_blkno
!= blkno
|| bp
->b_dev
!= dev
||
if (bp
->b_flags
&B_BUSY
) {
sleep((caddr_t
)bp
, PRIBIO
+1);
if (bp
->b_bcount
!= size
) {
if (bp
->b_bcount
< size
&& (bp
->b_flags
&B_DELWRI
)) {
if (brealloc(bp
, size
) == 0)
if (bp
->b_bcount
!= size
&& brealloc(bp
, size
) == 0)
if (major(dev
) >= nblkdev
)
if (brealloc(bp
, size
) == 0)
* not assigned to any particular device
register struct buf
*bp
, *flist
;
panic("geteblk: size too big");
flist
= &bfreelist
[BQ_AGE
];
bp
->b_dev
= (dev_t
)NODEV
;
bp
->b_blksize
= DEV_BSIZE
;
if (brealloc(bp
, size
) == 0)
* Allocate space associated with a buffer.
* If can't get space, buffer is released
* First need to make sure that all overlapping previous I/O
if (size
== bp
->b_bcount
)
if (size
< bp
->b_bcount
) {
if (bp
->b_flags
& B_DELWRI
) {
if (bp
->b_flags
& B_LOCKED
)
return (allocbuf(bp
, size
));
return (allocbuf(bp
, size
));
trace(TR_BREALLOC
, pack(bp
->b_dev
, size
), bp
->b_blkno
);
* Search cache for any buffers that overlap the one that we
* are trying to allocate. Overlapping buffers must be marked
* invalid, after being written out if they are dirty. (indicated
* by B_DELWRI) A disk block must be mapped by at most one buffer
* at any point in time. Care must be taken to avoid deadlocking
* when two buffer are trying to get the same set of disk blocks.
last
= start
+ size
/bp
->b_blksize
- 1;
last
= start
+ btodb(size
) - 1;
dp
= BUFHASH(bp
->b_dev
, bp
->b_blkno
);
for (ep
= dp
->b_forw
; ep
!= dp
; ep
= ep
->b_forw
) {
if (ep
== bp
|| ep
->b_dev
!= bp
->b_dev
|| (ep
->b_flags
&B_INVAL
))
if (ep
->b_bcount
== 0 || ep
->b_blkno
> last
||
ep
->b_blkno
+ ep
->b_bcount
/ep
->b_blksize
<= start
)
ep
->b_blkno
+ btodb(ep
->b_bcount
) <= start
)
if (ep
->b_flags
&B_BUSY
) {
sleep((caddr_t
)ep
, PRIBIO
+1);
if (ep
->b_flags
& B_DELWRI
) {
return (allocbuf(bp
, size
));
* Find a buffer which is available for use.
* Select something from a free list.
* Preference is to AGE list, then LRU list.
register struct buf
*bp
, *dp
;
for (dp
= &bfreelist
[BQ_AGE
]; dp
> bfreelist
; dp
--)
if (dp
== bfreelist
) { /* no free blocks */
sleep((caddr_t
)dp
, PRIBIO
+1);
if (bp
->b_flags
& B_DELWRI
) {
trace(TR_BRELSE
, pack(bp
->b_dev
, bp
->b_bufsize
), bp
->b_blkno
);
* Wait for I/O completion on the buffer; return errors
while ((bp
->b_flags
&B_DONE
)==0)
sleep((caddr_t
)bp
, PRIBIO
);
if (u
.u_error
== 0) /* XXX */
u
.u_error
= geterror(bp
);
* Mark I/O complete on a buffer.
* If someone should be called, e.g. the pageout
* daemon, do so. Otherwise, wake up anyone
if (bp
->b_flags
& B_DONE
)
if (bp
->b_flags
& B_CALL
) {
bp
->b_flags
&= ~B_WANTED
;
* Insure that no part of a specified block is in an incore buffer.
* "size" is given in device blocks (the units of b_blkno).
* "size" is given in device blocks (the units of b_blkno).
blkflush(dev
, blkno
, size
)
last
= start
+ btodb(size
) - 1;
dp
= BUFHASH(dev
, blkno
);
for (ep
= dp
->b_forw
; ep
!= dp
; ep
= ep
->b_forw
) {
if (ep
->b_dev
!= dev
|| (ep
->b_flags
&B_INVAL
))
if (ep
->b_bcount
== 0 || ep
->b_blkno
> last
||
ep
->b_blkno
+ ep
->b_bcount
/ ep
->b_blksize
<= start
)
ep
->b_blkno
+ btodb(ep
->b_bcount
) <= start
)
if (ep
->b_flags
&B_BUSY
) {
sleep((caddr_t
)ep
, PRIBIO
+1);
if (ep
->b_flags
& B_DELWRI
) {
* Make sure all write-behind blocks
* on dev (or NODEV for all)
* (from umount and update)
register struct buf
*flist
;
for (flist
= bfreelist
; flist
< &bfreelist
[BQ_EMPTY
]; flist
++)
for (bp
= flist
->av_forw
; bp
!= flist
; bp
= bp
->av_forw
) {
if ((bp
->b_flags
& B_DELWRI
) == 0)
if (dev
== NODEV
|| dev
== bp
->b_dev
) {
* Pick up the device's error number and pass it to the user;
* if there is an error but the number is 0 set a generalized code.
if ((error
= bp
->b_error
)==0)
* Invalidate in core blocks belonging to closed or umounted filesystem
* This is not nicely done at all - the buffer ought to be removed from the
* hash chains & have its dev/blkno fields clobbered, but unfortunately we
* can't do that here, as it is quite possible that the block is still
* being used for i/o. Eventually, all disc drivers should be forced to
* have a close routine, which ought ensure that the queue is empty, then
* properly flush the queues. Until that happy day, this suffices for
register struct bufhd
*hp
;
#define dp ((struct buf *)hp)
for (hp
= bufhash
; hp
< &bufhash
[BUFHSZ
]; hp
++)
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
)