*
* %sccs.include.redist.c%
*
- * @(#)lfs_segment.c 5.6 (Berkeley) %G%
+ * @(#)lfs_segment.c 7.14 (Berkeley) %G%
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
-#include <sys/resourcevar.h>
#include <sys/kernel.h>
+#include <sys/resourcevar.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/mount.h>
-#include <ufs/quota.h>
-#include <ufs/inode.h>
-#include <ufs/dir.h>
-#include <ufs/ufsmount.h>
-
-#include <lfs/lfs.h>
-#include <lfs/lfs_extern.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
+
+/* In-memory description of a segment about to be written. */
+struct segment {
+ struct buf **bpp; /* pointer to buffer array */
+ struct buf **cbpp; /* pointer to next available bp */
+ struct buf *ibp; /* buffer pointer to inode page */
+ struct finfo *fip; /* current fileinfo pointer */
+ void *segsum; /* segment summary info */
+ u_long ninodes; /* number of inodes in this segment */
+ u_long seg_bytes_left; /* bytes left in segment */
+ u_long sum_bytes_left; /* bytes left in summary block */
+ u_long seg_number; /* number of this segment */
+#define SEGM_CKP 0x01 /* doing a checkpoint */
+ u_long seg_flags; /* run-time flags for this segment */
+};
/*
- * Add a check so that if the segment is empty, you don't write it.
- *
- * Change lfs_ialloc to allocate a new page of inodes if you have to.
- *
- * Need to keep vnode v_numoutput up to date for pending writes? Could
- * actually fire off the datablock writes before you finish. This would give
- * them a chance to get started earlier.
+ * Determine if it's OK to start a partial in this segment, or if we need
+ * to go on to a new segment.
*/
-
-static int lfs_biocallback __P((BUF *));
-static void lfs_endsum __P((LFS *, SEGMENT *, int));
-static SEGMENT *lfs_gather
- __P((LFS *, SEGMENT *, VNODE *, int (*) __P((BUF *))));
-static BUF *lfs_newbuf __P((LFS *, daddr_t, size_t));
-static SEGMENT *lfs_newseg __P((LFS *));
-static SEGMENT *lfs_newsum __P((LFS *, SEGMENT *));
-static daddr_t lfs_nextseg __P((LFS *));
-static void lfs_updatemeta __P((LFS *, SEGMENT *, INODE *, daddr_t *,
- BUF **, int));
-static SEGMENT *lfs_writeckp __P((LFS *, SEGMENT *));
-static SEGMENT *lfs_writefile __P((LFS *, SEGMENT *, VNODE *, int));
-static SEGMENT *lfs_writeinode __P((LFS *, SEGMENT *, INODE *));
-static void lfs_writeseg __P((LFS *, SEGMENT *));
-static void lfs_writesum __P((LFS *));
-static void lfs_writesuper __P((LFS *));
-static int match_data __P((BUF *));
-static int match_dindir __P((BUF *));
-static int match_indir __P((BUF *));
-static daddr_t next __P((LFS *, SEGMENT *, int *));
-static void shellsort __P((BUF **, daddr_t *, register int));
+#define LFS_PARTIAL_FITS(fs) \
+ ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
+ 1 << (fs)->lfs_fsbtodb)
+
+int lfs_callback __P((struct buf *));
+void lfs_gather __P((struct lfs *, struct segment *,
+ struct vnode *, int (*) __P((struct lfs *, struct buf *))));
+void lfs_initseg __P((struct lfs *, struct segment *));
+void lfs_iset __P((struct inode *, daddr_t, time_t));
+int lfs_match_data __P((struct lfs *, struct buf *));
+int lfs_match_dindir __P((struct lfs *, struct buf *));
+int lfs_match_indir __P((struct lfs *, struct buf *));
+int lfs_match_tindir __P((struct lfs *, struct buf *));
+struct buf *
+ lfs_newbuf __P((struct lfs *, daddr_t, size_t));
+void lfs_newseg __P((struct lfs *));
+void lfs_shellsort __P((struct buf **, daddr_t *, register int));
+void lfs_updatemeta __P((struct lfs *,
+ struct segment *, struct vnode *, daddr_t *, struct buf **, int));
+void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
+void lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
+void lfs_writeseg __P((struct lfs *, struct segment *));
+void lfs_writesuper __P((struct lfs *, struct segment *));
+
+int lfs_allclean_wakeup; /* Cleaner wakeup address. */
/*
- * XXX -- when we add fragments in here, we will need to allocate a larger
- * buffer pointer array (sp->bpp).
+ * Ifile and meta data blocks are not marked busy, so segment writes MUST be
+ * single threaded. Currently, there are two paths into lfs_segwrite, sync()
+ * and getnewbuf(). They both mark the file system busy. Lfs_vflush()
+ * explicitly marks the file system busy. So lfs_segwrite is safe. I think.
*/
+
int
-lfs_segwrite(mp, do_ckp)
- MOUNT *mp;
- int do_ckp; /* do a checkpoint too */
+lfs_vflush(vp)
+ struct vnode *vp;
{
- INODE *ip;
- LFS *fs;
- VNODE *vp;
- SEGMENT *sp;
- int s;
-
+ struct inode *ip;
+ struct lfs *fs;
+ struct mount *mp;
+ struct segment *sp;
+ int error, s;
+
+#ifdef VERBOSE
+ printf("lfs_vflush\n");
+#endif
+ mp = vp->v_mount;
fs = VFSTOUFS(mp)->um_lfs;
-#ifdef DIAGNOSTIC
- if (fs->lfs_seglist != NULL)
- panic("lfs_segwrite: seglist not NULL");
+ /*
+ * XXX
+ * check flags?
+ * mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY) ||
+ */
+ if (vfs_busy(mp))
+ return (0);
+
+ /*
+ * Allocate a segment structure and enough space to hold pointers to
+ * the maximum possible number of buffers which can be described in a
+ * single summary block.
+ */
+ sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
+ sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
+ sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
+ sp->seg_flags = SEGM_CKP;
+ lfs_initseg(fs, sp);
+
+ /*
+ * Keep a cumulative count of the outstanding I/O operations. If the
+ * disk drive catches up with us it could go to zero before we finish,
+ * so we artificially increment it by one until we've scheduled all of
+ * the writes we intend to do.
+ */
+ s = splbio();
+ ++fs->lfs_iocount;
+ splx(s);
+
+ if (vp->v_dirtyblkhd != NULL)
+ lfs_writefile(fs, sp, vp);
+ ip = VTOI(vp);
+ lfs_writeinode(fs, sp, ip);
+ ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
+
+ lfs_writeseg(fs, sp);
+
+ /*
+ * If the I/O count is non-zero, sleep until it reaches zero. At the
+ * moment, the user's process hangs around so we can sleep.
+ */
+ s = splbio();
+ if (--fs->lfs_iocount && (error =
+ tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0)))
+ return (error);
+ splx(s);
+ vfs_unbusy(mp);
+
+ free(sp->bpp, M_SEGMENT);
+ free(sp, M_SEGMENT);
+
+ return (0);
+}
+
+int
+lfs_segwrite(mp, do_ckp)
+ struct mount *mp;
+ int do_ckp; /* Do a checkpoint. */
+{
+ struct inode *ip;
+ struct lfs *fs;
+ struct segment *sp;
+ struct vnode *vp;
+ int error, islocked, s;
+
+#ifdef VERBOSE
+ printf("lfs_segwrite\n");
#endif
+ fs = VFSTOUFS(mp)->um_lfs;
/*
- * LFS requires that the summary blocks be written after the rest of
- * the segment, and that the super blocks (on checkpoint) be written
- * last of all. We keep a cumulative count of the outstanding blocks
- * from all of the segments, and write these blocks when this count
- * goes to zero. If the disk drive catches up with us it could go
- * to zero before we finish, so we artificially increment it by one
- * until we've scheduled all of the writes we intend to do. At the
- * moment, the user's process hangs around so we can sleep; this should
- * probably be redone using a kernel thread.
+ * Allocate a segment structure and enough space to hold pointers to
+ * the maximum possible number of buffers which can be described in a
+ * single summary block.
+ */
+ sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
+ sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
+ sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
+ sp->seg_flags = do_ckp ? SEGM_CKP : 0;
+ lfs_initseg(fs, sp);
+
+ /*
+ * Keep a cumulative count of the outstanding I/O operations. If the
+ * disk drive catches up with us it could go to zero before we finish,
+ * so we artificially increment it by one until we've scheduled all of
+ * the writes we intend to do. If not a checkpoint, we never do the
+ * final decrement, avoiding the wakeup in the callback routine.
*/
s = splbio();
- fs->lfs_iocount = 1;
+ ++fs->lfs_iocount;
splx(s);
- sp = lfs_newseg(fs);
-loop:
- for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
+loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
- if (VOP_ISLOCKED(vp))
- continue;
- ip = VTOI(vp);
- if (ip->i_number == LFS_IFILE_INUM)
- continue;
- if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG)) == 0 &&
- vp->v_dirtyblkhd == NULL)
- continue;
- if (vget(vp))
+
+ islocked = VOP_ISLOCKED(vp);
+
+ /*
+ * XXX
+ * This is wrong, I think -- we should just wait until we
+ * get the vnode and go on. Probably going to reschedule
+ * all of the writes we already scheduled...
+ */
+ if (islocked)
+ VREF(vp);
+ else if (vget(vp))
+{
+printf("lfs_segment: failed to get vnode (tell Keith)!\n");
goto loop;
- sp = lfs_writefile(fs, sp, vp, do_ckp);
+}
+ /*
+ * Write the inode/file if dirty and it's not the
+ * the IFILE.
+ */
+ ip = VTOI(vp);
+ if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) ||
+ vp->v_dirtyblkhd != NULL) &&
+ ip->i_number != LFS_IFILE_INUM) {
+ if (vp->v_dirtyblkhd != NULL)
+ lfs_writefile(fs, sp, vp);
+ lfs_writeinode(fs, sp, ip);
+ ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
+ }
+ if (islocked)
+ vrele(vp);
+ else
+ vput(vp);
+ }
+ if (do_ckp) {
+ vp = fs->lfs_ivnode;
+ while (vget(vp));
+ ip = VTOI(vp);
+ if (vp->v_dirtyblkhd != NULL)
+ lfs_writefile(fs, sp, vp);
+ lfs_writeinode(fs, sp, ip);
+ ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
vput(vp);
}
- if (do_ckp)
- sp = lfs_writeckp(fs, sp);
lfs_writeseg(fs, sp);
+ /*
+ * If the I/O count is non-zero, sleep until it reaches zero. At the
+ * moment, the user's process hangs around so we can sleep.
+ */
s = splbio();
- if (--fs->lfs_iocount)
- sleep(&fs->lfs_iocount, PRIBIO + 1);
- splx(s);
- lfs_writesum(fs);
- if (do_ckp)
- lfs_writesuper(fs);
+ --fs->lfs_iocount;
+ if (do_ckp) {
+ if (fs->lfs_iocount && (error =
+ tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0)))
+ return (error);
+ splx(s);
+ lfs_writesuper(fs, sp);
+ } else
+ splx(s);
+
+ free(sp->bpp, M_SEGMENT);
+ free(sp, M_SEGMENT);
+
return (0);
}
-static int /* XXX should be void */
-lfs_biocallback(bp)
- BUF *bp;
+/*
+ * Write the dirty blocks associated with a vnode.
+ */
+void
+lfs_writefile(fs, sp, vp)
+ struct lfs *fs;
+ struct segment *sp;
+ struct vnode *vp;
{
- LFS *fs;
+ struct buf *bp;
+ struct finfo *fip;
+ IFILE *ifp;
- /*
- * XXX
- * Reset the flags (probably wrong). If the contents of the buffer
- * are valid, move back onto the clean list.
- */
- bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
- fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
- if (bp->b_flags & B_NOCACHE)
- bp->b_vp = NULL;
- else
- reassignbuf(bp, bp->b_vp);
+#ifdef VERBOSE
+ printf("lfs_writefile\n");
+#endif
+ if (sp->seg_bytes_left < fs->lfs_bsize ||
+ sp->sum_bytes_left < sizeof(struct finfo)) {
+ lfs_writeseg(fs, sp);
+ lfs_initseg(fs, sp);
+ }
+ sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t);
+
+ fip = sp->fip;
+ fip->fi_nblocks = 0;
+ fip->fi_ino = VTOI(vp)->i_number;
+ LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
+ fip->fi_version = ifp->if_version;
brelse(bp);
-#ifdef SEGWRITE
-printf("callback: buffer: %x iocount %d\n", bp, fs->lfs_iocount);
+ /*
+ * It may not be necessary to write the meta-data blocks at this point,
+ * as the roll-forward recovery code should be able to reconstruct the
+ * list.
+ */
+ lfs_gather(fs, sp, vp, lfs_match_data);
+ lfs_gather(fs, sp, vp, lfs_match_indir);
+ lfs_gather(fs, sp, vp, lfs_match_dindir);
+#ifdef TRIPLE
+ lfs_gather(fs, sp, vp, lfs_match_tindir);
#endif
- if (fs->lfs_iocount == 0)
- panic("lfs_biocallback: zero iocount\n");
- if (--fs->lfs_iocount == 0)
- wakeup(&fs->lfs_iocount);
+ fip = sp->fip;
+#ifdef META
+ printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks);
+#endif
+ if (fip->fi_nblocks != 0) {
+ ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
+ sp->fip =
+ (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
+ sizeof(daddr_t) * (fip->fi_nblocks - 1));
+ } else
+ sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t);
}
-/* Finish up a summary block. */
-static void
-lfs_endsum(fs, sp, calc_next)
- LFS *fs;
- SEGMENT *sp;
- int calc_next;
+void
+lfs_writeinode(fs, sp, ip)
+ struct lfs *fs;
+ struct segment *sp;
+ struct inode *ip;
{
- SEGSUM *ssp;
- int nsums_per_blk;
+ struct buf *bp, *ibp;
+ IFILE *ifp;
+ SEGUSE *sup;
+ daddr_t daddr;
+ ino_t ino;
+ int ndx;
- if (sp->sbp == NULL)
- return;
+#ifdef VERBOSE
+ printf("lfs_writeinode\n");
+#endif
+ /* Allocate a new inode block if necessary. */
+ if (sp->ibp == NULL) {
+ /* Allocate a new segment if necessary. */
+ if (sp->seg_bytes_left < fs->lfs_bsize ||
+ sp->sum_bytes_left < sizeof(daddr_t)) {
+ lfs_writeseg(fs, sp);
+ lfs_initseg(fs, sp);
+ }
- ssp = sp->segsum;
+ /* Get next inode block. */
+ daddr = fs->lfs_offset;
+ fs->lfs_offset += fsbtodb(fs, 1);
+ sp->ibp = *sp->cbpp++ =
+ lfs_newbuf(fs, daddr, fs->lfs_bsize);
+
+ /* Set remaining space counters. */
+ sp->seg_bytes_left -= fs->lfs_bsize;
+ sp->sum_bytes_left -= sizeof(daddr_t);
+ ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
+ sp->ninodes / INOPB(fs) - 1;
+ ((daddr_t *)(sp->segsum))[ndx] = daddr;
+ }
+
+ /* Update the inode times and copy the inode onto the inode page. */
+ ITIMES(ip, &time, &time);
+ bp = sp->ibp;
+ bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din;
+
+ /* Increment inode count in segment summary block. */
+ ++((SEGSUM *)(sp->segsum))->ss_ninos;
+
+ /* If this page is full, set flag to allocate a new page. */
+ if (++sp->ninodes % INOPB(fs) == 0)
+ sp->ibp = NULL;
/*
- * Compute the address of the next summary block if calc_next is set,
- * otherwise end the chain. If the summary block is full, close it
- * by setting sp->sbp to NULL, so lfs_newsum will allocate a new one.
- * Calculate the checksum last.
+ * If updating the ifile, update the super-block. Update the disk
+ * address and access times for this inode in the ifile.
*/
- nsums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE;
- if (sp->nsums % nsums_per_blk == 0) {
- ssp->ss_nextsum =
- calc_next ? next(fs, sp, NULL) +
- (nsums_per_blk - 1) * LFS_SUMMARY_SIZE / DEV_BSIZE :
- (daddr_t)-1;
- sp->sbp = NULL;
- } else
- ssp->ss_nextsum = calc_next ?
- sp->sum_addr - LFS_SUMMARY_SIZE / DEV_BSIZE : (daddr_t)-1;
+ ino = ip->i_number;
+ if (ino == LFS_IFILE_INUM)
+ fs->lfs_idaddr = bp->b_blkno;
+
+ LFS_IENTRY(ifp, fs, ino, ibp);
+ daddr = ifp->if_daddr;
+ ifp->if_daddr = bp->b_blkno;
+ LFS_UBWRITE(ibp);
- ssp->ss_cksum =
- cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum));
+ if (daddr != LFS_UNUSED_DADDR) {
+ LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
+#ifdef DIAGNOSTIC
+ if (sup->su_nbytes < sizeof(struct dinode))
+ /* XXX -- Change to a panic. */
+ printf("lfs: negative bytes (segment %d)\n",
+ datosn(fs, daddr));
+#endif
+ sup->su_nbytes -= sizeof(struct dinode);
+ LFS_UBWRITE(bp);
+ }
}
-static SEGMENT *
+void
lfs_gather(fs, sp, vp, match)
- LFS *fs;
- SEGMENT *sp;
- VNODE *vp;
- int (*match) __P((BUF *));
+ struct lfs *fs;
+ struct segment *sp;
+ struct vnode *vp;
+ int (*match) __P((struct lfs *, struct buf *));
{
- BUF **bpp, *bp, *nbp;
- FINFO *fip;
- INODE *ip;
+ struct buf **bpp, *bp, *nbp;
+ struct finfo *fip;
+ struct inode *ip;
daddr_t *lbp, *start_lbp;
u_long version;
int s;
+#ifdef VERBOSE
+ printf("lfs_gather\n");
+#endif
ip = VTOI(vp);
bpp = sp->cbpp;
fip = sp->fip;
s = splbio();
for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
nbp = bp->b_blockf;
- if (bp->b_flags & B_BUSY)
+ /*
+ * XXX
+ * Should sleep on any BUSY buffer if doing an fsync?
+ */
+ if (bp->b_flags & B_BUSY || !match(fs, bp))
continue;
#ifdef DIAGNOSTIC
- if ((bp->b_flags & B_DELWRI) == 0)
- panic("lfs_gather: not dirty");
+ if (!(bp->b_flags & B_DELWRI))
+ panic("lfs_gather: bp not B_DELWRI");
+ if (!(bp->b_flags & B_LOCKED))
+ panic("lfs_gather: bp not B_LOCKED");
#endif
- if (!match(bp))
- continue;
-
- /* Remove the buffer from the free lists, prepare it for I/O. */
- bremfree(bp);
- bp->b_flags |= B_BUSY | B_CALL;
- bp->b_iodone = lfs_biocallback;
- bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev;
-
- /* Insert into the buffer list, update the FINFO block. */
- *sp->cbpp++ = bp;
- ++fip->fi_nblocks;
- *lbp++ = bp->b_lblkno;
-
- sp->sum_bytes_left -= sizeof(daddr_t);
- sp->seg_bytes_left -= bp->b_bufsize;
-
/*
- * Allocate a new summary block (and, possibly, a new segment)
- * if necessary. In this case we sort the blocks we've done
- * so far and assign disk addresses so we can start the new
- * block correctly. We may be doing I/O, so we need to release
- * the splbio() before anything else.
+ * If full, finish this segment. We may be doing I/O, so
+ * release and reacquire the splbio().
*/
if (sp->sum_bytes_left < sizeof(daddr_t) ||
sp->seg_bytes_left < fs->lfs_bsize) {
splx(s);
lfs_updatemeta(fs,
- sp, ip, start_lbp, bpp, lbp - start_lbp);
+ sp, vp, start_lbp, bpp, lbp - start_lbp);
/* Add the current file to the segment summary. */
++((SEGSUM *)(sp->segsum))->ss_nfinfo;
version = fip->fi_version;
- if (sp->seg_bytes_left < fs->lfs_bsize) {
- lfs_writeseg(fs, sp);
- sp = lfs_newseg(fs);
- } else if (sp->sum_bytes_left < sizeof(daddr_t))
- sp = lfs_newsum(fs, sp);
+ lfs_writeseg(fs, sp);
+ lfs_initseg(fs, sp);
- /* A new FINFO either way. */
fip = sp->fip;
fip->fi_version = version;
fip->fi_ino = ip->i_number;
start_lbp = lbp = fip->fi_blocks;
+ sp->sum_bytes_left -=
+ sizeof(struct finfo) - sizeof(daddr_t);
+
bpp = sp->cbpp;
s = splbio();
}
+
+ /* Insert into the buffer list, update the FINFO block. */
+ *sp->cbpp++ = bp;
+ ++fip->fi_nblocks;
+ *lbp++ = bp->b_lblkno;
+
+ sp->sum_bytes_left -= sizeof(daddr_t);
+ sp->seg_bytes_left -= bp->b_bufsize;
}
splx(s);
- lfs_updatemeta(fs, sp, ip, start_lbp, bpp, lbp - start_lbp);
- return (sp);
+ lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp);
}
/*
- * Allocate a new buffer header.
+ * Update the metadata that points to the blocks listed in the FINFO
+ * array.
*/
-static BUF *
-lfs_newbuf(fs, daddr, size)
- LFS *fs;
- daddr_t daddr;
- size_t size;
+void
+lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks)
+ struct lfs *fs;
+ struct segment *sp;
+ struct vnode *vp;
+ daddr_t *lbp;
+ struct buf **bpp;
+ int nblocks;
{
- BUF *bp;
+ SEGUSE *sup;
+ struct buf *bp;
+ INDIR a[NIADDR], *ap;
+ struct inode *ip;
+ daddr_t daddr, lbn, off;
+ int db_per_fsb, error, i, num;
+
+#ifdef VERBOSE
+ printf("lfs_updatemeta\n");
+#endif
+ if (nblocks == 0)
+ return;
- bp = getnewbuf();
- bremhash(bp);
- bp->b_vp = fs->lfs_ivnode;
- bp->b_bcount = 0;
- bp->b_blkno = bp->b_lblkno = daddr;
- bp->b_error = 0;
- bp->b_resid = 0;
- bp->b_flags |= B_DELWRI | B_NOCACHE;
- bp->b_iodone = lfs_biocallback;
- bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev;
- allocbuf(bp, size);
- return (bp);
+ /* Sort the blocks. */
+ lfs_shellsort(bpp, lbp, nblocks);
+
+ /*
+ * Assign disk addresses, and update references to the logical
+ * block and the segment usage information.
+ */
+ db_per_fsb = fsbtodb(fs, 1);
+ for (i = nblocks; i--; ++bpp) {
+ lbn = *lbp++;
+ (*bpp)->b_blkno = off = fs->lfs_offset;
+ fs->lfs_offset += db_per_fsb;
+
+ if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num))
+ panic("lfs_updatemeta: lfs_bmaparray %d", error);
+ ip = VTOI(vp);
+ switch (num) {
+ case 0:
+ ip->i_db[lbn] = off;
+ break;
+ case 1:
+ ip->i_ib[a[0].in_off] = off;
+ break;
+ default:
+ ap = &a[num - 1];
+ if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
+ panic("lfs_updatemeta: bread bno %d",
+ ap->in_lbn);
+ bp->b_un.b_daddr[ap->in_off] = off;
+ lfs_bwrite(bp);
+ }
+
+ /* Update segment usage information. */
+ if (daddr != UNASSIGNED) {
+ LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
+#ifdef DIAGNOSTIC
+ if (sup->su_nbytes < fs->lfs_bsize)
+ /* XXX -- Change to a panic. */
+ printf("lfs: negative bytes (segment %d)\n",
+ datosn(fs, daddr));
+#endif
+ sup->su_nbytes -= fs->lfs_bsize;
+ LFS_UBWRITE(bp);
+ }
+ }
}
/*
* Start a new segment.
*/
-static SEGMENT *
-lfs_newseg(fs)
- LFS *fs;
+void
+lfs_initseg(fs, sp)
+ struct lfs *fs;
+ struct segment *sp;
{
- FINFO *fip;
- SEGMENT *sp;
SEGUSE *sup;
SEGSUM *ssp;
+ struct buf *bp;
daddr_t lbn, *lbnp;
- sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK);
- sp->nextp = NULL;
- sp->cbpp = sp->bpp =
- malloc(fs->lfs_ssize * sizeof(BUF *), M_SEGMENT, M_WAITOK);
- sp->ibp = sp->sbp = NULL;
- sp->seg_bytes_left = (fs->lfs_segmask + 1);
- sp->saddr = fs->lfs_nextseg;
- sp->sum_addr = sp->saddr + sp->seg_bytes_left / DEV_BSIZE;
- sp->ninodes = 0;
- sp->nsums = 0;
- sp->seg_number =
- (sp->saddr - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize);
-
+#ifdef VERBOSE
+ printf("lfs_initseg\n");
+#endif
/* Advance to the next segment. */
- fs->lfs_nextseg = lfs_nextseg(fs);
-
- /* Initialize the summary block. */
- sp = lfs_newsum(fs, sp);
-
- /*
- * If su_nbytes non-zero after the segment was cleaned, the segment
- * contains a super-block. Add segment summary information to not
- * allocate over it.
- */
- sup = fs->lfs_segtab + sp->seg_number;
- if (sup->su_nbytes != 0) {
- ssp = (SEGSUM *)sp->segsum;
- ++ssp->ss_nfinfo;
- fip = sp->fip;
- fip->fi_nblocks = LFS_SBPAD >> fs->lfs_bshift;
- fip->fi_version = 1;
- fip->fi_ino = LFS_UNUSED_INUM;
- lbnp = fip->fi_blocks;
- for (lbn = 0; lbn < fip->fi_nblocks; ++lbn)
- *lbnp++ = lbn;
- sp->saddr += fsbtodb(fs, fip->fi_nblocks);
- sp->seg_bytes_left -= sup->su_nbytes;
- sp->sum_bytes_left -=
- sizeof(FINFO) + (fip->fi_nblocks - 1) * sizeof(daddr_t);
- sp->fip = (FINFO *)lbnp;
- }
- return (sp);
-}
-
-static SEGMENT *
-lfs_newsum(fs, sp)
- LFS *fs;
- SEGMENT *sp;
-{
- SEGSUM *ssp;
- int nblocks;
-
- lfs_endsum(fs, sp, 1);
-
- /* Allocate a new buffer if necessary. */
- if (sp->sbp == NULL) {
- /* Allocate a new segment if necessary. */
- if (sp->seg_bytes_left < fs->lfs_bsize) {
- lfs_writeseg(fs, sp);
- sp = lfs_newseg(fs);
- }
-
- /* Get the next summary block. */
- sp->sum_addr = next(fs, sp, &nblocks);
-
- /*
- * Get a new buffer and enter into the buffer list from
- * the top of the list.
- */
- sp->sbp = sp->bpp[fs->lfs_ssize - (nblocks + 1)] =
- lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize);
+ if (!LFS_PARTIAL_FITS(fs)) {
+ /* Wake up any cleaning procs waiting on this file system. */
+ wakeup(&fs->lfs_nextseg);
+ wakeup(&lfs_allclean_wakeup);
- sp->seg_bytes_left -= fs->lfs_bsize;
+ lfs_newseg(fs);
+ fs->lfs_offset = fs->lfs_curseg;
+ sp->seg_number = datosn(fs, fs->lfs_curseg);
+ sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
/*
- * Do a callback for all but the very last summary block in
- * the segment, for which we wait.
+ * If the segment contains a superblock, update the offset
+ * and summary address to skip over it.
*/
- if (sp->nsums != 0)
- sp->sbp->b_flags |= B_CALL;
- /*
- * Fill in the block from the end. The summary block is filled
- * in from the end to the beginning so that the last summary
- * is the last thing written, verifying the entire block. This
- * should go away when fragments are available.
- */
- sp->segsum =
- sp->sbp->b_un.b_addr + fs->lfs_bsize - LFS_SUMMARY_SIZE;
- sp->sum_addr += (fs->lfs_bsize - LFS_SUMMARY_SIZE) / DEV_BSIZE;
-
-#ifdef SEGWRITE
- printf("alloc summary: bp %x, lblkno %x, bp index %d\n",
- sp->sbp, sp->sbp->b_lblkno, fs->lfs_ssize - nblocks);
-#endif
+ LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
+ if (sup->su_flags & SEGUSE_SUPERBLOCK) {
+ fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
+ sp->seg_bytes_left -= LFS_SBPAD;
+ }
+ brelse(bp);
} else {
- sp->segsum -= LFS_SUMMARY_SIZE;
- sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE;
+ sp->seg_number = datosn(fs, fs->lfs_curseg);
+ sp->seg_bytes_left = (fs->lfs_dbpseg -
+ (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
}
- ++sp->nsums;
+
+ sp->ibp = NULL;
+ sp->ninodes = 0;
+
+ /* Get a new buffer for SEGSUM and enter it into the buffer list. */
+ sp->cbpp = sp->bpp;
+ *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE);
+ sp->segsum = (*sp->cbpp)->b_un.b_addr;
+ ++sp->cbpp;
+ fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
/* Set point to SEGSUM, initialize it. */
ssp = sp->segsum;
ssp->ss_next = fs->lfs_nextseg;
- ssp->ss_prev = fs->lfs_lastseg;
- ssp->ss_nextsum = (daddr_t)-1;
- ssp->ss_create = time.tv_sec;
ssp->ss_nfinfo = ssp->ss_ninos = 0;
/* Set pointer to first FINFO, initialize it. */
- sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM));
+ sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM));
+ sp->fip->fi_nblocks = 0;
+ sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
- return (sp);
}
-#define seginc(fs, sn) /* increment segment number */ \
- (((sn) + 1) % (fs)->lfs_nseg)
/*
* Return the next segment to write.
*/
-static daddr_t
-lfs_nextseg(fs)
- LFS *fs;
-{
- int segnum, sn;
-
- segnum = sn = datosn(fs, fs->lfs_nextseg);
- while ((sn = seginc(fs, sn)) != segnum &&
- fs->lfs_segtab[sn].su_flags & SEGUSE_DIRTY);
-
- if (sn == segnum)
- panic("lfs_nextseg: file system full"); /* XXX */
- return (sntoda(fs, sn));
-}
-
-/*
- * Update the metadata that points to the blocks listed in the FINFO
- * array.
- */
-static void
-lfs_updatemeta(fs, sp, ip, lbp, bpp, nblocks)
- LFS *fs;
- SEGMENT *sp;
- INODE *ip;
- daddr_t *lbp;
- BUF **bpp;
- int nblocks;
-{
- SEGUSE *segup;
- BUF **lbpp, *bp;
- daddr_t daddr, iblkno;
- int db_per_fsb, error, i;
- long lbn;
-
- if (nblocks == 0)
- return;
-
- /* Sort the blocks and add disk addresses */
- shellsort(bpp, lbp, nblocks);
-
- db_per_fsb = 1 << fs->lfs_fsbtodb;
- for (lbpp = bpp, i = 0; i < nblocks; ++i, ++lbpp) {
- (*lbpp)->b_blkno = sp->saddr;
- sp->saddr += db_per_fsb;
- }
-
- for (lbpp = bpp, i = 0; i < nblocks; ++i, ++lbpp) {
- lbn = lbp[i];
- if (error = lfs_bmap(ip, lbn, &daddr))
- panic("lfs_updatemeta: lfs_bmap");
-
- /* Update in-core copy of old segment usage information. */
- if (daddr != UNASSIGNED) {
- segup = fs->lfs_segtab + datosn(fs, daddr);
- segup->su_lastmod = time.tv_sec;
-#ifdef DIAGNOSTIC
- if (segup->su_nbytes < fs->lfs_bsize)
- panic("lfs: negative bytes (segment %d)\n",
- segup - fs->lfs_segtab);
-#endif
- segup->su_nbytes -= fs->lfs_bsize;
- }
-
- /*
- * Now change whomever points to lbn. We could start with the
- * smallest (most negative) block number in these if clauses,
- * but we assume that indirect blocks are least common, and
- * handle them separately. The test for < 0 is correct and
- * minimizes the path in the common case.
- */
-#define BREAD(bno) \
- if (error = bread(ITOV(ip), (bno), fs->lfs_bsize, NOCRED, &bp)) \
- panic("lfs_updatemeta: bread");
-
- if (lbn < 0)
- if (lbn < -NIADDR) {
-#ifdef META
- printf("meta: update indirect block %d\n",
- D_INDIR);
-#endif
- BREAD(D_INDIR);
- bp->b_un.b_daddr[-lbn % NINDIR(fs)] =
- (*lbpp)->b_blkno;
- lfs_bwrite(bp);
- } else {
- ip->i_ib[-lbn-1] = (*lbpp)->b_blkno;
- } else if (lbn < NDADDR) {
- ip->i_db[lbn] = (*lbpp)->b_blkno;
- } else if ((lbn -= NDADDR) < NINDIR(fs)) {
-#ifdef META
- printf("meta: update indirect block %d\n", S_INDIR);
-#endif
- BREAD(S_INDIR);
- bp->b_un.b_daddr[lbn] = (*lbpp)->b_blkno;
- lfs_bwrite(bp);
- } else if ((lbn =
- (lbn - NINDIR(fs)) / NINDIR(fs)) < NINDIR(fs)) {
- iblkno = -(lbn + NIADDR + 1);
-#ifdef META
- printf("meta: update indirect block %d\n", iblkno);
-#endif
- BREAD(iblkno);
- bp->b_un.b_daddr[lbn % NINDIR(fs)] = (*lbpp)->b_blkno;
- lfs_bwrite(bp);
- } else
- panic("lfs_updatemeta: logical block number too large");
- }
-}
-
-static SEGMENT *
-lfs_writeckp(fs, sp)
- LFS *fs;
- SEGMENT *sp;
+void
+lfs_newseg(fs)
+ struct lfs *fs;
{
- BUF *bp;
- FINFO *fip;
- INODE *ip;
+ CLEANERINFO *cip;
SEGUSE *sup;
- void *xp;
- daddr_t *lbp;
- int bytes_needed, i;
-
- /*
- * This will write the dirty ifile blocks, but not the segusage
- * table nor the ifile inode.
- */
- sp = lfs_writefile(fs, sp, fs->lfs_ivnode, 1);
+ struct buf *bp;
+ int curseg, isdirty, sn;
- /*
- * If the segment usage table and the ifile inode won't fit in this
- * segment, put them in the next one.
- */
- bytes_needed = fs->lfs_segtabsz << fs->lfs_bshift;
- if (sp->ninodes % INOPB(fs) == 0)
- bytes_needed += fs->lfs_bsize;
-
- if (sp->seg_bytes_left < bytes_needed) {
- lfs_writeseg(fs, sp);
- sp = lfs_newseg(fs);
- ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
- } else if (sp->sum_bytes_left < fs->lfs_segtabsz * sizeof(daddr_t)) {
- sp = lfs_newsum(fs, sp);
- ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
- }
-
-#ifdef DEBUG
- if (sp->seg_bytes_left < bytes_needed)
- panic("lfs_writeckp: unable to write checkpoint");
+#ifdef VERBOSE
+ printf("lfs_newseg\n");
#endif
/*
- * Update the segment usage information and the ifile inode
- * and write it out.
- */
- sup = fs->lfs_segtab + sp->seg_number;
- sup->su_nbytes =
- (fs->lfs_segmask + 1) - sp->seg_bytes_left + bytes_needed;
- sup->su_lastmod = time.tv_sec;
- sup->su_flags = SEGUSE_DIRTY;
-
- /*
- * Get buffers for the segusage table and write it out. Don't
- * bother updating the FINFO pointer, it's not used after this.
+ * Turn off the active bit for the current segment, turn on the
+ * active and dirty bits for the next segment, update the cleaner
+ * info. Set the current segment to the next segment, get a new
+ * next segment.
*/
- ip = VTOI(fs->lfs_ivnode);
- fip = sp->fip;
- lbp = &fip->fi_blocks[fip->fi_nblocks];
- for (xp = fs->lfs_segtab, i = 0; i < fs->lfs_segtabsz;
- xp += fs->lfs_bsize, ++i, ++lbp) {
- *sp->cbpp++ = bp = lfs_newbuf(fs, sp->saddr, fs->lfs_bsize);
- bp->b_flags |= B_CALL;
- bcopy(xp, bp->b_un.b_addr, fs->lfs_bsize);
- ip->i_db[i] = sp->saddr;
- sp->saddr += (1 << fs->lfs_fsbtodb);
- *lbp = i;
- ++fip->fi_nblocks;
- }
- return (lfs_writeinode(fs, sp, VTOI(fs->lfs_ivnode)));
-}
-
-/*
- * Write the dirty blocks associated with a vnode.
- */
-static SEGMENT *
-lfs_writefile(fs, sp, vp, do_ckp)
- LFS *fs;
- SEGMENT *sp;
- VNODE *vp;
- int do_ckp;
-{
- FINFO *fip;
- ino_t inum;
-
- inum = VTOI(vp)->i_number;
-
- if (vp->v_dirtyblkhd != NULL) {
- if (sp->seg_bytes_left < fs->lfs_bsize) {
- lfs_writeseg(fs, sp);
- sp = lfs_newseg(fs);
- } else if (sp->sum_bytes_left < sizeof(FINFO))
- sp = lfs_newsum(fs, sp);
- sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
-
- fip = sp->fip;
- fip->fi_nblocks = 0;
- fip->fi_version =
- inum == LFS_IFILE_INUM ? 1 : lfs_getversion(fs, inum);
- fip->fi_ino = inum;
-
- sp = lfs_gather(fs, sp, vp, match_data);
- if (do_ckp) {
- sp = lfs_gather(fs, sp, vp, match_indir);
- sp = lfs_gather(fs, sp, vp, match_dindir);
- }
-
- fip = sp->fip;
-
-#ifdef META
- printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks);
-#endif
- /*
- * If this is the ifile, always update the file count as we'll
- * be adding the segment usage information even if we didn't
- * write any blocks. Also, don't update the FINFO pointer for
- * the ifile as the segment usage information hasn't yet been
- * added.
- */
- if (inum == LFS_IFILE_INUM)
- ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
- else if (fip->fi_nblocks != 0) {
- ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
- sp->fip = (FINFO *)((caddr_t)fip + sizeof(FINFO) +
- sizeof(daddr_t) * (fip->fi_nblocks - 1));
- }
- }
-
- /* If this isn't the ifile, update the inode. */
- if (inum != LFS_IFILE_INUM)
- sp = lfs_writeinode(fs, sp, VTOI(vp));
- return (sp);
-}
-
-static SEGMENT *
-lfs_writeinode(fs, sp, ip)
- LFS *fs;
- SEGMENT *sp;
- INODE *ip;
-{
- BUF *bp;
- daddr_t next_addr;
- int nblocks;
-
- /* Allocate a new inode block if necessary. */
- if (sp->ibp == NULL) {
- /* Allocate a new segment if necessary. */
- if (sp->seg_bytes_left < fs->lfs_bsize) {
- lfs_writeseg(fs, sp);
- sp = lfs_newseg(fs);
- }
-
- /* Get next inode block. */
- next_addr = next(fs, sp, &nblocks);
-
- /*
- * Get a new buffer and enter into the buffer list from
- * the top of the list.
- */
- sp->ibp = sp->bpp[fs->lfs_ssize - (nblocks + 1)] =
- lfs_newbuf(fs, next_addr, fs->lfs_bsize);
- sp->ibp->b_flags |= B_CALL;
-
- /* Set remaining space counter. */
- sp->seg_bytes_left -= fs->lfs_bsize;
+ LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp);
+ sup->su_flags &= ~SEGUSE_ACTIVE;
+ LFS_UBWRITE(bp);
+
+ LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
+ sup->su_flags |= SEGUSE_ACTIVE | SEGUSE_DIRTY;
+ LFS_UBWRITE(bp);
+
+ LFS_CLEANERINFO(cip, fs, bp);
+ --cip->clean;
+ ++cip->dirty;
+ LFS_UBWRITE(bp);
+
+ fs->lfs_lastseg = fs->lfs_curseg;
+ fs->lfs_curseg = fs->lfs_nextseg;
+ for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
+ sn = (sn + 1) % fs->lfs_nseg;
+ if (sn == curseg)
+ panic("lfs_nextseg: no clean segments");
+ LFS_SEGENTRY(sup, fs, sn, bp);
+ isdirty = sup->su_flags & SEGUSE_DIRTY;
+ brelse(bp);
+ if (!isdirty)
+ break;
}
-
- /* Copy the new inode onto the inode page. */
- bp = sp->ibp;
- bcopy(&ip->i_din,
- bp->b_un.b_dino + (sp->ninodes % INOPB(fs)), sizeof(DINODE));
-
- /* Increment inode count in segment summary block. */
- ++((SEGSUM *)(sp->segsum))->ss_ninos;
-
- /* If this page is full, set flag to allocate a new page. */
- if (++sp->ninodes % INOPB(fs) == 0)
- sp->ibp = NULL;
-
- /*
- * If updating the ifile, update the super-block; otherwise, update
- * the ifile itself. In either case, turn of inode update flags.
- */
- if (ip->i_number == LFS_IFILE_INUM)
- fs->lfs_idaddr = bp->b_blkno;
- else
- lfs_iset(ip, bp->b_blkno, ip->i_atime);
- ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
- return (sp);
+ fs->lfs_nextseg = sntoda(fs, sn);
}
-static void
+void
lfs_writeseg(fs, sp)
- LFS *fs;
- SEGMENT *sp;
+ struct lfs *fs;
+ struct segment *sp;
{
- BUF **bpp;
+ struct buf **bpp, *bp, *cbp;
SEGUSE *sup;
- int i, nblocks, s, (*strategy) __P((BUF *));
- void *pmeta;
-
- /* Update superblock segment address. */
- fs->lfs_lastseg = sntoda(fs, sp->seg_number);
+ SEGSUM *ssp;
+ dev_t i_dev;
+ u_long *datap, *dp;
+ size_t size;
+ int ch_per_blk, i, nblocks, num, s, (*strategy)__P((struct buf *));
+ char *p;
- /* Finish up any summary block. */
- lfs_endsum(fs, sp, 0);
+#ifdef VERBOSE
+ printf("lfs_writeseg\n");
+#endif
+ if ((nblocks = sp->cbpp - sp->bpp) == 0)
+ return;
/*
- * Copy inode and summary block buffer pointers down so they are
- * contiguous with the page buffer pointers.
+ * Compute checksum across data and then across summary; the first
+ * block (the summary block) is skipped. Set the create time here
+ * so that it's guaranteed to be later than the inode mod times.
+ *
+ * XXX
+ * Fix this to do it inline, instead of malloc/copy.
*/
- (void)next(fs, sp, &nblocks);
- pmeta = (sp->bpp + fs->lfs_ssize) - nblocks;
- if (pmeta != sp->cbpp)
- bcopy(pmeta, sp->cbpp, sizeof(BUF *) * nblocks);
- sp->cbpp += nblocks;
- nblocks = sp->cbpp - sp->bpp;
-
- sup = fs->lfs_segtab + sp->seg_number;
- sup->su_nbytes = nblocks << fs->lfs_bshift;
- sup->su_lastmod = time.tv_sec;
- sup->su_flags = SEGUSE_DIRTY;
+ datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
+ for (bpp = sp->bpp, i = nblocks - 1; i--;)
+ *dp++ = (*++bpp)->b_un.b_words[0];
+ ssp = (SEGSUM *)sp->segsum;
+ ssp->ss_create = time.tv_sec;
+ ssp->ss_datasum = cksum(datap, nblocks * sizeof(u_long));
+ ssp->ss_sumsum =
+ cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
+ free(datap, M_SEGMENT);
+
+ i_dev = VTOI(fs->lfs_ivnode)->i_dev;
+ strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy;
/*
- * Since we need to guarantee that the summary block gets written last,
- * we issue the writes in two sets. The first n-1 buffers first, and
- * then, after they've completed, the summary buffer. Only when that
- * final write completes is the segment valid.
+ * When we simply write the blocks we lose a rotation for every block
+ * written. To avoid this problem, we allocate memory in chunks, copy
+ * the buffers into the chunk and write the chunk. 56K was chosen as
+ * some driver/controllers can't handle unsigned 16 bit transfers.
+ * When the data is copied to the chunk, turn off the the B_LOCKED bit
+ * and brelse the buffer (which will move them to the LRU list). Add
+ * the B_CALL flag to the buffer header so we can count I/O's for the
+ * checkpoints and so we can release the allocated memory.
+ *
+ * XXX
+ * This should be removed if the new virtual memory system allows us to
+ * easily make the buffers contiguous in kernel memory and if that's
+ * fast enough.
*/
- --nblocks; /* Don't count last summary block. */
-
- sp->nextp = fs->lfs_seglist;
- fs->lfs_seglist = sp;
-
- s = splbio();
- fs->lfs_iocount += nblocks;
- splx(s);
-
- strategy =
- VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp->v_op->vop_strategy;
- for (bpp = sp->bpp, i = 0; i < nblocks; ++i, ++bpp)
- (strategy)(*bpp);
-}
-
-static void
-lfs_writesum(fs)
- LFS *fs;
-{
- BUF *bp;
- SEGMENT *next_sp, *sp;
- int (*strategy) __P((BUF *));
-
- strategy =
- VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp->v_op->vop_strategy;
- for (sp = fs->lfs_seglist; sp; sp = next_sp) {
- bp = *(sp->cbpp - 1);
- (strategy)(bp);
- biowait(bp);
- bp->b_vp = NULL; /* No associated vnode. */
- brelse(bp);
-
- next_sp = sp->nextp;
- free(sp->bpp, M_SEGMENT);
- free(sp, M_SEGMENT);
+#define LFS_CHUNKSIZE (56 * 1024)
+ ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize;
+ for (bpp = sp->bpp, i = nblocks; i;) {
+ num = ch_per_blk;
+ if (num > i)
+ num = i;
+ i -= num;
+ size = num * fs->lfs_bsize;
+
+ cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0);
+ cbp->b_dev = i_dev;
+ cbp->b_flags = B_ASYNC | B_BUSY | B_CALL;
+ cbp->b_iodone = lfs_callback;
+ cbp->b_saveaddr = cbp->b_un.b_addr;
+ cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK);
+
+ s = splbio();
+ ++fs->lfs_iocount;
+ for (p = cbp->b_un.b_addr; num--;) {
+ bp = *bpp++;
+ bcopy(bp->b_un.b_addr, p, bp->b_bcount);
+ p += bp->b_bcount;
+ bp->b_flags &=
+ ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED);
+ if (!(bp->b_flags & B_NOCACHE)) {
+ bremfree(bp);
+ reassignbuf(bp, bp->b_vp);
+ }
+ brelse(bp);
+ }
+ splx(s);
+ cbp->b_bcount = p - cbp->b_un.b_addr;
+ (strategy)(cbp);
}
- /* Segment list is done. */
- fs->lfs_seglist = NULL;
+
+ /* Update the segment usage information. */
+ LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
+ sup->su_nbytes += nblocks - 1 -
+ (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs) << fs->lfs_bshift;
+ sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode);
+ sup->su_lastmod = time.tv_sec;
+ LFS_UBWRITE(bp);
}
-static void
-lfs_writesuper(fs)
- LFS *fs;
+void
+lfs_writesuper(fs, sp)
+ struct lfs *fs;
+ struct segment *sp;
{
- BUF *bp;
- int (*strategy) __P((BUF *));
+ struct buf *bp;
+ dev_t i_dev;
+ int (*strategy) __P((struct buf *));
- strategy =
- VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp->v_op->vop_strategy;
+#ifdef VERBOSE
+ printf("lfs_writesuper\n");
+#endif
+ i_dev = VTOI(fs->lfs_ivnode)->i_dev;
+ strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy;
/* Checksum the superblock and copy it into a buffer. */
- fs->lfs_cksum = cksum(fs, sizeof(LFS) - sizeof(fs->lfs_cksum));
+ fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD);
- bcopy(fs, bp->b_un.b_lfs, sizeof(LFS));
+ *bp->b_un.b_lfs = *fs;
/* Write the first superblock (wait). */
+ bp->b_dev = i_dev;
+ bp->b_flags |= B_BUSY;
+ bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
(strategy)(bp);
biowait(bp);
/* Write the second superblock (don't wait). */
- bp->b_flags &= ~B_DONE;
- bp->b_flags |= B_ASYNC;
- bp->b_vp = NULL; /* No associated vnode. */
bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1];
+ bp->b_flags |= B_ASYNC | B_BUSY;
+ bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
(strategy)(bp);
}
* Logical block number match routines used when traversing the dirty block
* chain.
*/
-static int
-match_data(bp)
- BUF *bp;
+int
+lfs_match_data(fs, bp)
+ struct lfs *fs;
+ struct buf *bp;
{
return (bp->b_lblkno >= 0);
}
-static int
-match_dindir(bp)
- BUF *bp;
+int
+lfs_match_indir(fs, bp)
+ struct lfs *fs;
+ struct buf *bp;
{
- return (bp->b_lblkno == D_INDIR);
+ int lbn;
+
+ lbn = bp->b_lblkno;
+ return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
}
-/*
- * These are single indirect blocks. There are three types:
- *
- * the one in the inode (lblkno == S_INDIR, or -1).
- * the ones that hang off of the double indirect in the inode (D_INDIR);
- * these all have addresses in the range -2NINDIR to -(3NINDIR-1).
- * the ones that hang off of the double indirect that hangs off of the
- * triple indirect. These all have addresses < -(NINDIR^2).
- *
- * Since we currently don't support triple indirect blocks, this gets
- * simpler, and we just look for block numbers less than -NIADDR.
- */
-static int
-match_indir(bp)
- BUF *bp;
+int
+lfs_match_dindir(fs, bp)
+ struct lfs *fs;
+ struct buf *bp;
{
- return (bp->b_lblkno == S_INDIR || bp->b_lblkno < -NIADDR);
+ int lbn;
+
+ lbn = bp->b_lblkno;
+ return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
}
-/* Get the next inode/summary block. */
-static daddr_t
-next(fs, sp, nbp)
- LFS *fs;
- SEGMENT *sp;
- int *nbp;
+int
+lfs_match_tindir(fs, bp)
+ struct lfs *fs;
+ struct buf *bp;
{
- int nblocks, nino_blocks, nseg_blocks, sums_per_block;
+ int lbn;
- /* Fs blocks allocated to summary blocks. */
- sums_per_block = fs->lfs_bsize / LFS_SUMMARY_SIZE;
- nseg_blocks = (sp->nsums + sums_per_block - 1) / sums_per_block;
+ lbn = bp->b_lblkno;
+ return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
+}
- /* Fs blocks allocated to inodes. */
- nino_blocks = (sp->ninodes + INOPB(fs) - 1) / INOPB(fs);
+/*
+ * Allocate a new buffer header.
+ */
+struct buf *
+lfs_newbuf(fs, daddr, size)
+ struct lfs *fs;
+ daddr_t daddr;
+ size_t size;
+{
+ struct buf *bp;
- /* Total number of fs blocks allocated. */
- nblocks = nseg_blocks + nino_blocks;
+#ifdef VERBOSE
+ printf("lfs_newbuf\n");
+#endif
+ bp = getnewbuf();
+ bremhash(bp);
+ bgetvp(fs->lfs_ivnode, bp);
+ bp->b_bcount = 0;
+ bp->b_lblkno = daddr;
+ bp->b_blkno = daddr;
+ bp->b_error = 0;
+ bp->b_resid = 0;
+ if (size)
+ allocbuf(bp, size);
+ bp->b_flags |= B_NOCACHE;
+ bp->b_saveaddr = NULL;
+ binshash(bp, &bfreelist[BQ_AGE]);
+ return (bp);
+}
- if (nbp)
- *nbp = nblocks;
+int /* XXX should be void */
+lfs_callback(bp)
+ struct buf *bp;
+{
+ struct lfs *fs;
- /*
- * The disk address of the new inode/summary block is the address of
- * the start of the segment after this one minus the number of blocks
- * that we've already used.
- */
- return (sntoda(fs, sp->seg_number + 1) - fsbtodb(fs, nblocks + 1));
+ fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
+#ifdef DIAGNOSTIC
+ if (fs->lfs_iocount == 0)
+ panic("lfs_callback: zero iocount\n");
+#endif
+ if (--fs->lfs_iocount == 0)
+ wakeup(&fs->lfs_iocount);
+
+ if (bp->b_saveaddr) {
+ free(bp->b_un.b_addr, M_SEGMENT);
+ bp->b_un.b_addr = bp->b_saveaddr;
+ bp->b_saveaddr = NULL;
+ }
+ brelse(bp);
}
/*
* of logical block numbers to a unsigned in this routine so that the
* negative block numbers (meta data blocks) sort AFTER the data blocks.
*/
-static void
-shellsort(bp_array, lb_array, nmemb)
- BUF **bp_array;
+void
+lfs_shellsort(bp_array, lb_array, nmemb)
+ struct buf **bp_array;
daddr_t *lb_array;
register int nmemb;
{
static int __rsshell_increments[] = { 4, 1, 0 };
register int incr, *incrp, t1, t2;
- BUF *bp_temp;
+ struct buf *bp_temp;
u_long lb_temp;
for (incrp = __rsshell_increments; incr = *incrp++;)