* Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
* This module is believed to contain source code proprietary to AT&T.
* Use and redistribution is subject to the Berkeley Software License
* Agreement and your Software Agreement with AT&T (Western Electric).
* @(#)vfs_bio.c 7.50 (Berkeley) %G%
#include <sys/resourcevar.h>
* Initialize buffers and hash links for buffers.
register struct buf
*bp
, *dp
;
register struct bufhd
*hp
;
for (hp
= bufhash
, i
= 0; i
< BUFHSZ
; i
++, hp
++)
hp
->b_forw
= hp
->b_back
= (struct buf
*)hp
;
for (dp
= bfreelist
; dp
< &bfreelist
[BQUEUES
]; dp
++) {
dp
->b_forw
= dp
->b_back
= dp
->av_forw
= dp
->av_back
= dp
;
residual
= bufpages
% nbuf
;
for (i
= 0; i
< nbuf
; i
++) {
bp
->b_un
.b_addr
= buffers
+ i
* MAXBSIZE
;
bp
->b_bufsize
= (base
+ 1) * CLBYTES
;
bp
->b_bufsize
= base
* CLBYTES
;
binshash(bp
, &bfreelist
[BQ_AGE
]);
dp
= bp
->b_bufsize
? &bfreelist
[BQ_AGE
] : &bfreelist
[BQ_EMPTY
];
* Find the block in the buffer pool.
* If the buffer is not present, allocate a new buffer and load
* its contents according to the filesystem fill routine.
bread(vp
, blkno
, size
, cred
, bpp
)
struct proc
*p
= curproc
; /* XXX */
bp
= getblk(dev
, blkno
, size
, secsize
);
*bpp
= bp
= getblk(vp
, blkno
, size
);
if (bp
->b_flags
& (B_DONE
| B_DELWRI
)) {
trace(TR_BREADHIT
, pack(vp
, size
), blkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
if (bp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISS
, pack(vp
, size
), blkno
);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay for read */
* Operates like bread, but also starts I/O on the N specified
breadn(vp
, blkno
, size
, rablkno
, rabsize
, num
, cred
, bpp
)
daddr_t rablkno
[]; int rabsize
[];
struct proc
*p
= curproc
; /* XXX */
register struct buf
*bp
, *rabp
;
* If the block is not memory resident,
* allocate a buffer and start I/O.
if (!incore(vp
, blkno
)) {
*bpp
= bp
= getblk(vp
, blkno
, size
);
if ((bp
->b_flags
& (B_DONE
| B_DELWRI
)) == 0) {
if (bp
->b_bcount
> bp
->b_bufsize
)
if (bp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISS
, pack(vp
, size
), blkno
);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay for read */
trace(TR_BREADHIT
, pack(vp
, size
), blkno
);
* If there's read-ahead block(s), start I/O
* on them also (as above).
for (i
= 0; i
< num
; i
++) {
if (incore(vp
, rablkno
[i
]))
rabp
= getblk(vp
, rablkno
[i
], rabsize
[i
]);
if (rabp
->b_flags
& (B_DONE
| B_DELWRI
)) {
trace(TR_BREADHITRA
, pack(vp
, rabsize
[i
]), rablkno
[i
]);
rabp
->b_flags
|= B_ASYNC
| B_READ
;
if (rabp
->b_bcount
> rabp
->b_bufsize
)
if (rabp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISSRA
, pack(vp
, rabsize
[i
]), rablkno
[i
]);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay in advance */
* If block was memory resident, let bread get it.
* If block was not memory resident, the read was
* started above, so just wait for the read to complete.
return (bread(dev
, blkno
, size
, secsize
));
return (bread(vp
, blkno
, size
, cred
, bpp
));
* Release buffer on completion.
struct proc
*p
= curproc
; /* XXX */
bp
->b_flags
&= ~(B_READ
| B_DONE
| B_ERROR
| B_DELWRI
);
if ((flag
& B_DELWRI
) == 0)
p
->p_stats
->p_ru
.ru_oublock
++; /* no one paid yet */
reassignbuf(bp
, bp
->b_vp
);
trace(TR_BWRITE
, pack(bp
->b_vp
, bp
->b_bcount
), bp
->b_lblkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
* If the write was synchronous, then await I/O completion.
* If the write was "delayed", then we put the buffer on
* the queue of blocks awaiting I/O completion status.
if ((flag
& B_ASYNC
) == 0) {
if ((flag
&B_DELWRI
) == 0)
p
->p_stats
->p_ru
.ru_oublock
++; /* no one paid yet */
reassignbuf(bp
, bp
->b_vp
);
} else if (flag
& B_DELWRI
) {
struct vop_bwrite_args
*ap
;
return bwrite (ap
->a_bp
);
* The buffer is marked dirty, but is not queued for I/O.
* This routine should be used when the buffer is expected
* to be modified again soon, typically a small write that
* partially fills a buffer.
* NB: magnetic tapes cannot be delayed; they must be
* written in the order that the writes are requested.
struct proc
*p
= curproc
; /* XXX */
if ((bp
->b_flags
& B_DELWRI
) == 0) {
reassignbuf(bp
, bp
->b_vp
);
p
->p_stats
->p_ru
.ru_oublock
++; /* no one paid yet */
* If this is a tape drive, the write must be initiated.
if (bdevsw
[major(bp
->b_dev
)].d_flags
& B_TAPE
)
bp
->b_flags
|= (B_DONE
| B_DELWRI
);
* Start I/O on a buffer, but do not wait for it to complete.
* The buffer is released when the I/O completes.
* Setting the ASYNC flag causes bwrite to return
* after starting the I/O.
* Even if the buffer is dirty, no I/O is started.
register struct buf
*flist
;
trace(TR_BRELSE
, pack(bp
->b_vp
, bp
->b_bufsize
), bp
->b_lblkno
);
* If a process is waiting for the buffer, or
* is waiting for a free buffer, awaken it.
if (bp
->b_flags
& B_WANTED
)
if (bfreelist
[0].b_flags
& B_WANTED
) {
bfreelist
[0].b_flags
&= ~B_WANTED
;
wakeup((caddr_t
)bfreelist
);
* Retry I/O for locked buffers rather than invalidating them.
if ((bp
->b_flags
& B_ERROR
) && (bp
->b_flags
& B_LOCKED
))
* Disassociate buffers that are no longer valid.
if (bp
->b_flags
& (B_NOCACHE
| B_ERROR
))
if ((bp
->b_bufsize
<= 0) || (bp
->b_flags
& (B_ERROR
| B_INVAL
))) {
bp
->b_flags
&= ~B_DELWRI
;
* Stick the buffer back on a free list.
if (bp
->b_bufsize
<= 0) {
/* block has no buffer ... put at front of unused buffer list */
flist
= &bfreelist
[BQ_EMPTY
];
} else if (bp
->b_flags
& (B_ERROR
| B_INVAL
)) {
/* block has no info ... put at front of most free list */
flist
= &bfreelist
[BQ_AGE
];
if (bp
->b_flags
& B_LOCKED
)
flist
= &bfreelist
[BQ_LOCKED
];
else if (bp
->b_flags
& B_AGE
)
flist
= &bfreelist
[BQ_AGE
];
flist
= &bfreelist
[BQ_LRU
];
bp
->b_flags
&= ~(B_WANTED
| B_BUSY
| B_ASYNC
| B_AGE
| B_NOCACHE
);
* Check to see if a block is currently memory resident.
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
)
if (bp
->b_lblkno
== blkno
&& bp
->b_vp
== vp
&&
(bp
->b_flags
& B_INVAL
) == 0)
* Check to see if a block is currently memory resident.
* If it is resident, return it. If it is not resident,
* allocate a new buffer and assign it to the block.
getblk(dev
, blkno
, size
, secsize
)
register struct vnode
*vp
;
register struct buf
*bp
, *dp
;
panic("getblk: size too big");
* Search the cache for the block. If the buffer is found,
* but it is currently locked, the we must wait for it to
for (bp
= dp
->b_forw
; bp
!= dp
; bp
= bp
->b_forw
) {
if (bp
->b_lblkno
!= blkno
|| bp
->b_vp
!= vp
||
if (bp
->b_flags
& B_BUSY
) {
sleep((caddr_t
)bp
, PRIBIO
+ 1);
if (bp
->b_bcount
!= size
) {
printf("getblk: stray size");
* The caller will assign it to a block.
register struct buf
*bp
, *flist
;
panic("geteblk: size too big");
flist
= &bfreelist
[BQ_AGE
];
bp
->b_blksize
= DEV_BSIZE
;
* Expand or contract the actual memory allocated to a buffer.
* If no memory is available, release buffer and take error exit.
register struct buf
*bp
, *ep
;
sizealloc
= roundup(size
, CLBYTES
);
* Buffer size does not change
if (sizealloc
== tp
->b_bufsize
)
* Buffer size is shrinking.
* Place excess space in a buffer header taken from the
* BQ_EMPTY buffer list and placed on the "most free" list.
* If no extra buffer headers are available, leave the
* extra space in the present buffer.
if (sizealloc
< tp
->b_bufsize
) {
ep
= bfreelist
[BQ_EMPTY
].av_forw
;
if (ep
== &bfreelist
[BQ_EMPTY
])
pagemove(tp
->b_un
.b_addr
+ sizealloc
, ep
->b_un
.b_addr
,
(int)tp
->b_bufsize
- sizealloc
);
ep
->b_bufsize
= tp
->b_bufsize
- sizealloc
;
tp
->b_bufsize
= sizealloc
;
* More buffer space is needed. Get it out of buffers on
* the "most free" list, placing the empty headers on the
* BQ_EMPTY buffer header list.
while (tp
->b_bufsize
< sizealloc
) {
take
= sizealloc
- tp
->b_bufsize
;
if (take
>= bp
->b_bufsize
)
pagemove(&bp
->b_un
.b_addr
[bp
->b_bufsize
- take
],
&tp
->b_un
.b_addr
[tp
->b_bufsize
], take
);
bp
->b_bufsize
= bp
->b_bufsize
- take
;
if (bp
->b_bcount
> bp
->b_bufsize
)
bp
->b_bcount
= bp
->b_bufsize
;
if (bp
->b_bufsize
<= 0) {
binshash(bp
, &bfreelist
[BQ_EMPTY
]);
* Find a buffer which is available for use.
* Select something from a free list.
* Preference is to AGE list, then LRU list.
register struct buf
*bp
, *dp
;
register struct ucred
*cred
;
for (dp
= &bfreelist
[BQ_AGE
]; dp
> bfreelist
; dp
--)
if (dp
== bfreelist
) { /* no free blocks */
sleep((caddr_t
)dp
, PRIBIO
+ 1);
if (bp
->b_flags
& B_DELWRI
) {
trace(TR_BRELSE
, pack(bp
->b_vp
, bp
->b_bufsize
), bp
->b_lblkno
);
if (bp
->b_rcred
!= NOCRED
) {
if (bp
->b_wcred
!= NOCRED
) {
bp
->b_dirtyoff
= bp
->b_dirtyend
= 0;
bp
->b_validoff
= bp
->b_validend
= 0;
* Wait for I/O to complete.
* Extract and return any errors associated with the I/O.
* If the error flag is set, but no specific error is
while ((bp
->b_flags
& B_DONE
) == 0)
sleep((caddr_t
)bp
, PRIBIO
);
if ((bp
->b_flags
& B_ERROR
) == 0)
* Mark I/O complete on a buffer.
* If a callback has been requested, e.g. the pageout
* daemon, do so. Otherwise, awaken waiting processes.
if (bp
->b_flags
& B_DONE
)
if ((bp
->b_flags
& B_READ
) == 0)
if (bp
->b_flags
& B_CALL
) {
if (bp
->b_flags
& B_ASYNC
)
bp
->b_flags
&= ~B_WANTED
;