* Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
* This module is believed to contain source code proprietary to AT&T.
* Use and redistribution is subject to the Berkeley Software License
* Agreement and your Software Agreement with AT&T (Western Electric).
* @(#)vfs_bio.c 7.59 (Berkeley) %G%
#include <sys/resourcevar.h>
#include <libkern/libkern.h>
* Definitions for the buffer hash lists.
#define BUFHASH(dvp, lbn) \
(&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash])
struct list_entry
*bufhashtbl
, invalhash
;
* Insq/Remq for the buffer hash lists.
#define binshash(bp, dp) list_enter_head(dp, bp, struct buf *, b_hash)
#define bremhash(bp) list_remove(bp, struct buf *, b_hash)
* Definitions for the buffer free lists.
#define BQUEUES 4 /* number of free buffer queues */
#define BQ_LOCKED 0 /* super-blocks &c */
#define BQ_LRU 1 /* lru, useful buffers */
#define BQ_AGE 2 /* rubbish */
#define BQ_EMPTY 3 /* buffer headers with no memory */
struct queue_entry bufqueues
[BQUEUES
];
* Insq/Remq for the buffer free lists.
#define binsheadfree(bp, dp) \
queue_enter_head(dp, bp, struct buf *, b_freelist)
#define binstailfree(bp, dp) \
queue_enter_tail(dp, bp, struct buf *, b_freelist)
struct buf
*cluster_newbuf
__P((struct vnode
*, struct buf
*, long, daddr_t
,
struct buf
*cluster_rbuild
__P((struct vnode
*, u_quad_t
, struct buf
*,
daddr_t
, daddr_t
, long, int, long));
void cluster_wbuild
__P((struct vnode
*, struct buf
*, long size
,
daddr_t start_lbn
, int len
, daddr_t lbn
));
* We only calculate the head of the freelist when removing
* the last element of the list as that is the only time that
* it is needed (e.g. to reset the tail pointer).
if (bp
->b_freelist
.qe_next
== NULL
) {
for (dp
= bufqueues
; dp
< &bufqueues
[BQUEUES
]; dp
++)
if (dp
->qe_prev
== &bp
->b_freelist
.qe_next
)
if (dp
== &bufqueues
[BQUEUES
])
panic("bremfree: lost tail");
queue_remove(dp
, bp
, struct buf
*, b_freelist
);
* Initialize buffers and hash links for buffers.
for (dp
= bufqueues
; dp
< &bufqueues
[BQUEUES
]; dp
++)
bufhashtbl
= (struct list_entry
*)hashinit(nbuf
, M_CACHE
, &bufhash
);
residual
= bufpages
% nbuf
;
for (i
= 0; i
< nbuf
; i
++) {
bzero((char *)bp
, sizeof *bp
);
bp
->b_un
.b_addr
= buffers
+ i
* MAXBSIZE
;
bp
->b_bufsize
= (base
+ 1) * CLBYTES
;
bp
->b_bufsize
= base
* CLBYTES
;
dp
= bp
->b_bufsize
? &bufqueues
[BQ_AGE
] : &bufqueues
[BQ_EMPTY
];
binshash(bp
, &invalhash
);
* Find the block in the buffer pool.
* If the buffer is not present, allocate a new buffer and load
* its contents according to the filesystem fill routine.
bread(vp
, blkno
, size
, cred
, bpp
)
struct proc
*p
= curproc
; /* XXX */
bp
= getblk(dev
, blkno
, size
, secsize
);
*bpp
= bp
= getblk(vp
, blkno
, size
, 0, 0);
if (bp
->b_flags
& (B_DONE
| B_DELWRI
)) {
trace(TR_BREADHIT
, pack(vp
, size
), blkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
if (bp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISS
, pack(vp
, size
), blkno
);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay for read */
* Operates like bread, but also starts I/O on the N specified
breadn(vp
, blkno
, size
, rablkno
, rabsize
, num
, cred
, bpp
)
daddr_t rablkno
[]; int rabsize
[];
struct proc
*p
= curproc
; /* XXX */
register struct buf
*bp
, *rabp
;
* If the block is not memory resident,
* allocate a buffer and start I/O.
if (!incore(vp
, blkno
)) {
*bpp
= bp
= getblk(vp
, blkno
, size
, 0, 0);
if ((bp
->b_flags
& (B_DONE
| B_DELWRI
)) == 0) {
if (bp
->b_bcount
> bp
->b_bufsize
)
if (bp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISS
, pack(vp
, size
), blkno
);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay for read */
trace(TR_BREADHIT
, pack(vp
, size
), blkno
);
* If there's read-ahead block(s), start I/O
* on them also (as above).
for (i
= 0; i
< num
; i
++) {
if (incore(vp
, rablkno
[i
]))
rabp
= getblk(vp
, rablkno
[i
], rabsize
[i
], 0, 0);
if (rabp
->b_flags
& (B_DONE
| B_DELWRI
)) {
trace(TR_BREADHITRA
, pack(vp
, rabsize
[i
]), rablkno
[i
]);
rabp
->b_flags
|= B_ASYNC
| B_READ
;
if (rabp
->b_bcount
> rabp
->b_bufsize
)
if (rabp
->b_rcred
== NOCRED
&& cred
!= NOCRED
) {
trace(TR_BREADMISSRA
, pack(vp
, rabsize
[i
]), rablkno
[i
]);
p
->p_stats
->p_ru
.ru_inblock
++; /* pay in advance */
* If block was memory resident, let bread get it.
* If block was not memory resident, the read was
* started above, so just wait for the read to complete.
return (bread(dev
, blkno
, size
, secsize
));
return (bread(vp
, blkno
, size
, cred
, bpp
));
* We could optimize this by keeping track of where the last read-ahead
* was, but it would involve adding fields to the vnode. For now, let's
* This replaces bread. If this is a bread at the beginning of a file and
* lastr is 0, we assume this is the first read and we'll read up to two
* blocks if they are sequential. After that, we'll do regular read ahead
* There are 4 or 5 cases depending on how you count:
* Desired block is in the cache:
* 1 Not sequential access (0 I/Os).
* 2 Access is sequential, do read-ahead (1 ASYNC).
* Desired block is not in cache:
* 3 Not sequential access (1 SYNC).
* 4 Sequential access, next block is contiguous (1 SYNC).
* 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC)
* There are potentially two buffers that require I/O.
* bp is the block requested.
* rbp is the read-ahead block.
* If either is NULL, then you don't have to do the I/O.
cluster_read(vp
, filesize
, lblkno
, size
, cred
, bpp
)
int error
, num_ra
, alreadyincore
;
panic("cluster_read: size = 0");
*bpp
= bp
= getblk(vp
, lblkno
, size
, 0, 0);
if (bp
->b_flags
& (B_CACHE
| B_DONE
| B_DELWRI
)) {
* Desired block is in cache; do any readahead ASYNC.
trace(TR_BREADHIT
, pack(vp
, size
), lblkno
);
(lblkno
< vp
->v_ralen
? vp
->v_ralen
>> 1 : vp
->v_ralen
);
alreadyincore
= (int)incore(vp
, ioblkno
);
/* Block wasn't in cache, case 3, 4, 5. */
trace(TR_BREADMISS
, pack(vp
, size
), lblkno
);
curproc
->p_stats
->p_ru
.ru_inblock
++; /* XXX */
* Replace 1 with a window size based on some permutation of
* maxcontig and rot_delay. This will let you figure out how
* many blocks you should read-ahead (case 2, 4, 5).
* If the access isn't sequential, cut the window size in half.
if (lblkno
!= vp
->v_lastr
+ 1 && lblkno
!= 0)
vp
->v_ralen
= max(vp
->v_ralen
>> 1, 1);
else if ((ioblkno
+ 1) * size
< filesize
&& !alreadyincore
&&
!(error
= VOP_BMAP(vp
, ioblkno
, NULL
, &blkno
, &num_ra
))) {
* Reading sequentially, and the next block is not in the
* cache. We are going to try reading ahead. If this is
* the first read of a file, then limit read-ahead to a
* single block, else read as much as we're allowed.
if (num_ra
> vp
->v_ralen
) {
vp
->v_ralen
= min(MAXPHYS
/ size
, vp
->v_ralen
<< 1);
vp
->v_ralen
= num_ra
+ 1;
if (num_ra
) /* case 2, 4 */
rbp
= cluster_rbuild(vp
, filesize
,
bp
, ioblkno
, blkno
, size
, num_ra
, flags
);
else if (lblkno
!= 0 && ioblkno
== lblkno
) {
/* Case 5: check how many blocks to read ahead */
if ((ioblkno
+ 1) * size
> filesize
||
ioblkno
, NULL
, &blkno
, &num_ra
)))
rbp
= cluster_rbuild(vp
, filesize
,
NULL
, ioblkno
, blkno
, size
, num_ra
, flags
);
rbp
= getblk(vp
, ioblkno
, size
, 0, 0);
} else if (lblkno
!= 0) {
/* case 2; read ahead single block */
rbp
= getblk(vp
, ioblkno
, size
, 0, 0);
} else if (bp
) /* case 1, 3, block 0 */
/* Case 1 on block 0; not really doing sequential I/O */
if (rbp
== bp
) /* case 4 */
else if (rbp
) { /* case 2, 5 */
pack(vp
, (num_ra
+ 1) * size
), ioblkno
);
curproc
->p_stats
->p_ru
.ru_inblock
++; /* XXX */
/* XXX Kirk, do we need to make sure the bp has creds? */
if (bp
->b_flags
& (B_DONE
| B_DELWRI
))
panic("cluster_read: DONE bp");
error
= VOP_STRATEGY(bp
);
if (error
|| rbp
->b_flags
& (B_DONE
| B_DELWRI
)) {
rbp
->b_flags
&= ~(B_ASYNC
| B_READ
);
(void) VOP_STRATEGY(rbp
);
* If blocks are contiguous on disk, use this to provide clustered
* read ahead. We will read as many blocks as possible sequentially
* and then parcel them up into logical blocks in the buffer hash table.
cluster_rbuild(vp
, filesize
, bp
, lbn
, blkno
, size
, run
, flags
)
struct cluster_save
*b_save
;
if (size
!= vp
->v_mount
->mnt_stat
.f_iosize
)
panic("cluster_rbuild: size %d != filesize %d\n",
size
, vp
->v_mount
->mnt_stat
.f_iosize
);
if (size
* (lbn
+ run
+ 1) > filesize
)
bp
= getblk(vp
, lbn
, size
, 0, 0);
bp
= cluster_newbuf(vp
, bp
, flags
, blkno
, lbn
, size
, run
+ 1);
if (bp
->b_flags
& (B_DONE
| B_DELWRI
))
b_save
= malloc(sizeof(struct buf
*) * run
+ sizeof(struct cluster_save
),
b_save
->bs_bufsize
= b_save
->bs_bcount
= size
;
b_save
->bs_nchildren
= 0;
b_save
->bs_children
= (struct buf
**)(b_save
+ 1);
b_save
->bs_saveaddr
= bp
->b_saveaddr
;
bp
->b_saveaddr
= (caddr_t
) b_save
;
for (bn
= blkno
+ inc
, i
= 1; i
<= run
; ++i
, bn
+= inc
) {
if (incore(vp
, lbn
+ i
)) {
bp
->b_saveaddr
= b_save
->bs_saveaddr
;
tbp
= getblk(vp
, lbn
+ i
, 0, 0, 0);
tbp
->b_bcount
= tbp
->b_bufsize
= size
;
tbp
->b_flags
|= flags
| B_READ
| B_ASYNC
;
b_save
->bs_children
[i
- 1] = tbp
;
if (!(bp
->b_flags
& B_ASYNC
))
vp
->v_ralen
= max(vp
->v_ralen
- 1, 1);
* Either get a new buffer or grow the existing one.
cluster_newbuf(vp
, bp
, flags
, blkno
, lblkno
, size
, run
)
bp
= getblk(vp
, lblkno
, size
, 0, 0);
if (bp
->b_flags
& (B_DONE
| B_DELWRI
)) {
allocbuf(bp
, run
* size
);
bp
->b_iodone
= cluster_callback
;
bp
->b_flags
|= flags
| B_CALL
;
* Cleanup after a clustered read or write.
struct cluster_save
*b_save
;
b_save
= (struct cluster_save
*)(bp
->b_saveaddr
);
bp
->b_saveaddr
= b_save
->bs_saveaddr
;
cp
= bp
->b_un
.b_addr
+ b_save
->bs_bufsize
;
for (tbp
= b_save
->bs_children
; b_save
->bs_nchildren
--; ++tbp
) {
pagemove(cp
, (*tbp
)->b_un
.b_addr
, (*tbp
)->b_bufsize
);
bp
->b_bufsize
-= (*tbp
)->b_bufsize
;
if (bp
->b_bufsize
!= b_save
->bs_bufsize
)
panic ("cluster_callback: more space to reclaim");
bp
->b_bcount
= bp
->b_bufsize
;
if (bp
->b_flags
& B_ASYNC
)
* Release buffer on completion.
struct proc
*p
= curproc
; /* XXX */
bp
->b_flags
&= ~(B_READ
| B_DONE
| B_ERROR
| B_DELWRI
);
if ((flag
& B_DELWRI
) == 0)
p
->p_stats
->p_ru
.ru_oublock
++; /* no one paid yet */
reassignbuf(bp
, bp
->b_vp
);
trace(TR_BWRITE
, pack(bp
->b_vp
, bp
->b_bcount
), bp
->b_lblkno
);
if (bp
->b_bcount
> bp
->b_bufsize
)
bp
->b_flags
|= B_WRITEINPROG
;
* If the write was synchronous, then await I/O completion.
* If the write was "delayed", then we put the buffer on
* the queue of blocks awaiting I/O completion status.
if ((flag
& B_ASYNC
) == 0) {
if ((flag
&B_DELWRI
) == 0)
p
->p_stats
->p_ru
.ru_oublock
++; /* no one paid yet */
reassignbuf(bp
, bp
->b_vp
);
if (bp
->b_flags
& B_EINTR
) {
} else if (flag
& B_DELWRI
) {
struct vop_bwrite_args
*ap
;
return (bwrite(ap
->a_bp
));
* The buffer is marked dirty, but is not queued for I/O.
* This routine should be used when the buffer is expected
* to be modified again soon, typically a small write that
* partially fills a buffer.
* NB: magnetic tapes cannot be delayed; they must be
* written in the order that the writes are requested.
struct proc
*p
= curproc
; /* XXX */
if ((bp
->b_flags
& B_DELWRI
) == 0) {
reassignbuf(bp
, bp
->b_vp
);
p
->p_stats
->p_ru
.ru_oublock
++; /* no one paid yet */
* If this is a tape drive, the write must be initiated.
if (bdevsw
[major(bp
->b_dev
)].d_flags
& B_TAPE
)
bp
->b_flags
|= (B_DONE
| B_DELWRI
);
* Start I/O on a buffer, but do not wait for it to complete.
* The buffer is released when the I/O completes.
* Setting the ASYNC flag causes bwrite to return
* after starting the I/O.
* Do clustered write for FFS.
* 1. Write is not sequential (write asynchronously)
* 2. beginning of cluster - begin cluster
* 3. middle of a cluster - add to cluster
* 4. end of a cluster - asynchronously write cluster
cluster_write(bp
, filesize
)
/* Initialize vnode to beginning of file. */
vp
->v_lasta
= vp
->v_clen
= vp
->v_cstart
= vp
->v_lastw
= 0;
if (vp
->v_clen
== 0 || lbn
!= vp
->v_lastw
+ 1 ||
(bp
->b_blkno
!= vp
->v_lasta
+ bp
->b_bcount
/ DEV_BSIZE
)) {
* Write is not sequential.
cluster_wbuild(vp
, NULL
, bp
->b_bcount
, vp
->v_cstart
,
vp
->v_lastw
- vp
->v_cstart
+ 1, lbn
);
* Consider beginning a cluster.
if ((lbn
+ 1) * bp
->b_bcount
== filesize
)
/* End of file, make cluster as large as possible */
clen
= MAXBSIZE
/ vp
->v_mount
->mnt_stat
.f_iosize
- 1;
else if (VOP_BMAP(vp
, lbn
, NULL
, &bp
->b_blkno
, &clen
)) {
vp
->v_lasta
= bp
->b_blkno
;
if (clen
== 0) { /* I/O not contiguous */
} else { /* Wait for rest of cluster */
} else if (lbn
== vp
->v_cstart
+ vp
->v_clen
) {
* At end of cluster, write it out.
cluster_wbuild(vp
, bp
, bp
->b_bcount
, vp
->v_cstart
,
* In the middle of a cluster, so just delay the
vp
->v_lasta
= bp
->b_blkno
;
* This is an awful lot like cluster_rbuild...wish they could be combined.
* The last lbn argument is the current block on which I/O is being
* performed. Check to see that it doesn't fall in the middle of
cluster_wbuild(vp
, last_bp
, size
, start_lbn
, len
, lbn
)
struct cluster_save
*b_save
;
if (size
!= vp
->v_mount
->mnt_stat
.f_iosize
)
panic("cluster_wbuild: size %d != filesize %d\n",
size
, vp
->v_mount
->mnt_stat
.f_iosize
);
while ((!incore(vp
, start_lbn
) || start_lbn
== lbn
) && len
) {
/* Get more memory for current buffer */
bp
= getblk(vp
, start_lbn
, size
, 0, 0);
bp
= getblk(vp
, start_lbn
, size
, 0, 0);
if (!(bp
->b_flags
& B_DELWRI
)) {
b_save
= malloc(sizeof(struct buf
*) * len
+ sizeof(struct cluster_save
),
b_save
->bs_bcount
= bp
->b_bcount
;
b_save
->bs_bufsize
= bp
->b_bufsize
;
b_save
->bs_nchildren
= 0;
b_save
->bs_children
= (struct buf
**)(b_save
+ 1);
b_save
->bs_saveaddr
= bp
->b_saveaddr
;
bp
->b_saveaddr
= (caddr_t
) b_save
;
bp
->b_iodone
= cluster_callback
;
cp
= bp
->b_un
.b_addr
+ bp
->b_bufsize
;
for (++start_lbn
, i
= 0; i
< len
; ++i
, ++start_lbn
) {
if (!incore(vp
, start_lbn
) || start_lbn
== lbn
)
if (last_bp
== NULL
|| start_lbn
!= last_bp
->b_lblkno
) {
tbp
= getblk(vp
, start_lbn
, size
, 0, 0);
if (tbp
->b_bcount
!= tbp
->b_bufsize
)
panic("cluster_wbuild: Buffer too big");
if (!(tbp
->b_flags
& B_DELWRI
)) {
/* Move memory from children to parent */
if (tbp
->b_blkno
!= (bp
->b_blkno
+ bp
->b_bufsize
/ DEV_BSIZE
)) {
printf("Clustered Block: %d addr %x bufsize: %d\n",
bp
->b_lblkno
, bp
->b_blkno
, bp
->b_bufsize
);
printf("Child Block: %d addr: %x\n", tbp
->b_lblkno
,
panic("Clustered write to wrong blocks");
pagemove(tbp
->b_un
.b_daddr
, cp
, size
);
tbp
->b_flags
&= ~(B_READ
| B_DONE
| B_ERROR
| B_DELWRI
);
reassignbuf(tbp
, tbp
->b_vp
); /* put on clean list */
++tbp
->b_vp
->v_numoutput
;
b_save
->bs_children
[i
] = tbp
;
bp
->b_saveaddr
= b_save
->bs_saveaddr
;
* Even if the buffer is dirty, no I/O is started.
register struct queue_entry
*flist
;
trace(TR_BRELSE
, pack(bp
->b_vp
, bp
->b_bufsize
), bp
->b_lblkno
);
* If a process is waiting for the buffer, or
* is waiting for a free buffer, awaken it.
if (bp
->b_flags
& B_WANTED
)
wakeup((caddr_t
)&needbuffer
);
* Retry I/O for locked buffers rather than invalidating them.
if ((bp
->b_flags
& B_ERROR
) && (bp
->b_flags
& B_LOCKED
))
* Disassociate buffers that are no longer valid.
if (bp
->b_flags
& (B_NOCACHE
| B_ERROR
))
if ((bp
->b_bufsize
<= 0) || (bp
->b_flags
& (B_ERROR
| B_INVAL
))) {
bp
->b_flags
&= ~B_DELWRI
;
* Stick the buffer back on a free list.
if (bp
->b_bufsize
<= 0) {
/* block has no buffer ... put at front of unused buffer list */
flist
= &bufqueues
[BQ_EMPTY
];
} else if (bp
->b_flags
& (B_ERROR
| B_INVAL
)) {
/* block has no info ... put at front of most free list */
flist
= &bufqueues
[BQ_AGE
];
if (bp
->b_flags
& B_LOCKED
)
flist
= &bufqueues
[BQ_LOCKED
];
else if (bp
->b_flags
& B_AGE
)
flist
= &bufqueues
[BQ_AGE
];
flist
= &bufqueues
[BQ_LRU
];
bp
->b_flags
&= ~(B_WANTED
| B_BUSY
| B_ASYNC
| B_AGE
| B_NOCACHE
);
* Check to see if a block is currently memory resident.
for (bp
= BUFHASH(vp
, blkno
)->le_next
; bp
; bp
= bp
->b_hash
.qe_next
)
if (bp
->b_lblkno
== blkno
&& bp
->b_vp
== vp
&&
(bp
->b_flags
& B_INVAL
) == 0)
* Check to see if a block is currently memory resident.
* If it is resident, return it. If it is not resident,
* allocate a new buffer and assign it to the block.
getblk(dev
, blkno
, size
, secsize
)
getblk(vp
, blkno
, size
, slpflag
, slptimeo
)
register struct vnode
*vp
;
int size
, slpflag
, slptimeo
;
panic("getblk: size too big");
* Search the cache for the block. If the buffer is found,
* but it is currently locked, the we must wait for it to
for (bp
= dp
->le_next
; bp
; bp
= bp
->b_hash
.qe_next
) {
if (bp
->b_lblkno
!= blkno
|| bp
->b_vp
!= vp
)
if (bp
->b_flags
& B_BUSY
) {
error
= tsleep((caddr_t
)bp
, slpflag
| (PRIBIO
+ 1),
* The test for B_INVAL is moved down here, since there
* are cases where B_INVAL is set before VOP_BWRITE() is
* called and for NFS, the process cannot be allowed to
* allocate a new buffer for the same block until the write
* back to the server has been completed. (ie. B_BUSY clears)
if (bp
->b_flags
& B_INVAL
) {
if (bp
->b_bcount
!= size
) {
printf("getblk: stray size");
* The loop back to the top when getnewbuf() fails is because
* stateless filesystems like NFS have no node locks. Thus,
* there is a slight chance that more than one process will
* try and getnewbuf() for the same block concurrently when
* the first sleeps in getnewbuf(). So after a sleep, go back
* up to the top to check the hash lists again.
if ((bp
= getnewbuf(slpflag
, slptimeo
)) == 0)
* The caller will assign it to a block.
panic("geteblk: size too big");
while ((bp
= getnewbuf(0, 0)) == NULL
)
binshash(bp
, &invalhash
);
bp
->b_blksize
= DEV_BSIZE
;
* Expand or contract the actual memory allocated to a buffer.
* If no memory is available, release buffer and take error exit.
register struct buf
*bp
, *ep
;
sizealloc
= roundup(size
, CLBYTES
);
* Buffer size does not change
if (sizealloc
== tp
->b_bufsize
)
* Buffer size is shrinking.
* Place excess space in a buffer header taken from the
* BQ_EMPTY buffer list and placed on the "most free" list.
* If no extra buffer headers are available, leave the
* extra space in the present buffer.
if (sizealloc
< tp
->b_bufsize
) {
if ((ep
= bufqueues
[BQ_EMPTY
].qe_next
) == NULL
)
pagemove(tp
->b_un
.b_addr
+ sizealloc
, ep
->b_un
.b_addr
,
(int)tp
->b_bufsize
- sizealloc
);
ep
->b_bufsize
= tp
->b_bufsize
- sizealloc
;
tp
->b_bufsize
= sizealloc
;
* More buffer space is needed. Get it out of buffers on
* the "most free" list, placing the empty headers on the
* BQ_EMPTY buffer header list.
while (tp
->b_bufsize
< sizealloc
) {
take
= sizealloc
- tp
->b_bufsize
;
while ((bp
= getnewbuf(0, 0)) == NULL
)
if (take
>= bp
->b_bufsize
)
pagemove(&bp
->b_un
.b_addr
[bp
->b_bufsize
- take
],
&tp
->b_un
.b_addr
[tp
->b_bufsize
], take
);
bp
->b_bufsize
= bp
->b_bufsize
- take
;
if (bp
->b_bcount
> bp
->b_bufsize
)
bp
->b_bcount
= bp
->b_bufsize
;
if (bp
->b_bufsize
<= 0) {
binshash(bp
, &invalhash
);
* Find a buffer which is available for use.
* Select something from a free list.
* Preference is to AGE list, then LRU list.
getnewbuf(slpflag
, slptimeo
)
register struct queue_entry
*dp
;
register struct ucred
*cred
;
for (dp
= &bufqueues
[BQ_AGE
]; dp
> bufqueues
; dp
--)
if (dp
== bufqueues
) { /* no free blocks */
(void) tsleep((caddr_t
)&needbuffer
, slpflag
| (PRIBIO
+ 1),
if (bp
->b_flags
& B_DELWRI
) {
trace(TR_BRELSE
, pack(bp
->b_vp
, bp
->b_bufsize
), bp
->b_lblkno
);
if (bp
->b_rcred
!= NOCRED
) {
if (bp
->b_wcred
!= NOCRED
) {
bp
->b_dirtyoff
= bp
->b_dirtyend
= 0;
bp
->b_validoff
= bp
->b_validend
= 0;
* Wait for I/O to complete.
* Extract and return any errors associated with the I/O.
* If the error flag is set, but no specific error is
while ((bp
->b_flags
& B_DONE
) == 0)
sleep((caddr_t
)bp
, PRIBIO
);
if ((bp
->b_flags
& B_ERROR
) == 0)
* Mark I/O complete on a buffer.
* If a callback has been requested, e.g. the pageout
* daemon, do so. Otherwise, awaken waiting processes.
if (bp
->b_flags
& B_DONE
)
if ((bp
->b_flags
& B_READ
) == 0)
if (bp
->b_flags
& B_CALL
) {
if (bp
->b_flags
& B_ASYNC
)
bp
->b_flags
&= ~B_WANTED
;
for (ret
= 0, bp
= (struct buf
*)bufqueues
[BQ_LOCKED
].qe_next
;
bp
; bp
= (struct buf
*)bp
->b_freelist
.qe_next
)
* Print out statistics on the current allocation of the buffer pool.
* Can be enabled to print out on every ``sync'' by setting "syncprt"
register struct queue_entry
*dp
;
int counts
[MAXBSIZE
/CLBYTES
+1];
static char *bname
[BQUEUES
] = { "LOCKED", "LRU", "AGE", "EMPTY" };
for (dp
= bufqueues
, i
= 0; dp
< &bufqueues
[BQUEUES
]; dp
++, i
++) {
for (j
= 0; j
<= MAXBSIZE
/CLBYTES
; j
++)
for (bp
= dp
->qe_next
; bp
; bp
= bp
->b_freelist
.qe_next
) {
counts
[bp
->b_bufsize
/CLBYTES
]++;
printf("%s: total-%d", bname
[i
], count
);
for (j
= 0; j
<= MAXBSIZE
/CLBYTES
; j
++)
printf(", %d-%d", j
* CLBYTES
, counts
[j
]);