From 275ca4f06eee4dda4f550bc95075e3bf3bd355c9 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 2 Oct 1991 17:00:38 -0800 Subject: [PATCH] checkpoint SCCS-vsn: sys/ufs/lfs/lfs.h 5.5 SCCS-vsn: sys/ufs/lfs/lfs_alloc.c 7.30 SCCS-vsn: sys/ufs/lfs/lfs_bio.c 5.2 SCCS-vsn: sys/ufs/lfs/lfs_balloc.c 7.16 SCCS-vsn: sys/ufs/lfs/lfs_cksum.c 5.2 SCCS-vsn: sys/ufs/lfs/lfs_debug.c 5.2 SCCS-vsn: sys/ufs/lfs/lfs_extern.h 5.2 SCCS-vsn: sys/ufs/lfs/lfs_inode.c 7.43 SCCS-vsn: sys/ufs/lfs/lfs_segment.c 5.2 SCCS-vsn: sys/ufs/lfs/lfs_vfsops.c 7.59 SCCS-vsn: sys/ufs/lfs/lfs_vnops.c 7.68 --- usr/src/sys/ufs/lfs/lfs.h | 6 +- usr/src/sys/ufs/lfs/lfs_alloc.c | 114 +++-- usr/src/sys/ufs/lfs/lfs_balloc.c | 6 +- usr/src/sys/ufs/lfs/lfs_bio.c | 21 +- usr/src/sys/ufs/lfs/lfs_cksum.c | 8 +- usr/src/sys/ufs/lfs/lfs_debug.c | 13 +- usr/src/sys/ufs/lfs/lfs_extern.h | 7 +- usr/src/sys/ufs/lfs/lfs_inode.c | 14 +- usr/src/sys/ufs/lfs/lfs_segment.c | 735 +++++++++++++++++++++--------- usr/src/sys/ufs/lfs/lfs_vfsops.c | 141 ++++-- usr/src/sys/ufs/lfs/lfs_vnops.c | 8 +- 11 files changed, 759 insertions(+), 314 deletions(-) diff --git a/usr/src/sys/ufs/lfs/lfs.h b/usr/src/sys/ufs/lfs/lfs.h index c0e83065ee..8dc9e2c3b6 100644 --- a/usr/src/sys/ufs/lfs/lfs.h +++ b/usr/src/sys/ufs/lfs/lfs.h @@ -4,7 +4,7 @@ * * %sccs.include.redist.c% * - * @(#)lfs.h 5.4 (Berkeley) %G% + * @(#)lfs.h 5.5 (Berkeley) %G% */ typedef struct buf BUF; @@ -33,7 +33,7 @@ typedef struct finfo FINFO; struct finfo { u_long fi_nblocks; /* number of blocks */ u_long fi_version; /* version number */ - ino_t fi_ino; /* inode number */ + u_long fi_ino; /* inode number */ long fi_blocks[1]; /* array of logical block numbers */ }; @@ -43,6 +43,8 @@ struct segment { SEGMENT *nextp; /* Links segments together */ BUF **bpp; /* Pointer to buffer array */ BUF **cbpp; /* Pointer to next available bp */ + BUF *ibp; /* Buffer pointer to inode page */ + BUF *sbp; /* Segment summary buffer pointer */ void *segsum; /* Segment Summary info */ u_long sum_bytes_left; /* Bytes left in summary */ u_long seg_bytes_left; /* Bytes left in segment */ diff --git a/usr/src/sys/ufs/lfs/lfs_alloc.c b/usr/src/sys/ufs/lfs/lfs_alloc.c index 465c9bdb50..f1693b56e1 100644 --- a/usr/src/sys/ufs/lfs/lfs_alloc.c +++ b/usr/src/sys/ufs/lfs/lfs_alloc.c @@ -4,27 +4,32 @@ * * %sccs.include.redist.c% * - * @(#)lfs_alloc.c 7.29 (Berkeley) %G% + * @(#)lfs_alloc.c 7.30 (Berkeley) %G% */ +#ifdef LOGFS #include "param.h" #include "kernel.h" #include "buf.h" #include "vnode.h" #include "syslog.h" +#include "mount.h" #include "../ufs/quota.h" #include "../ufs/inode.h" -#include "mount.h" #include "../ufs/ufsmount.h" #include "lfs.h" #include "lfs_extern.h" +/* Read in the block containing a specific inode from the ifile. */ #define LFS_IENTRY(I, F, IN, BP) \ if (bread((F)->lfs_ivnode, (IN) / IFPB(F) + (F)->lfs_segtabsz, \ (F)->lfs_bsize, NOCRED, &BP)) \ - panic("ifile read"); \ + panic("lfs_ientry: read"); \ (I) = (IFILE *)BP->b_un.b_addr + IN % IFPB(F); +/* + * Allocate a new inode. + */ ino_t lfs_ialloc(fs, pip, ipp, cred) LFS *fs; @@ -38,47 +43,53 @@ lfs_ialloc(fs, pip, ipp, cred) ino_t new_ino; int error; + /* Get the head of the freelist. */ new_ino = fs->lfs_free; -printf("lfs_ialloc: next free %d\n", new_ino); - if (new_ino == LFS_UNUSED_INUM) { /* XXX -- allocate more */ + if (new_ino == LFS_UNUSED_INUM) { + /* + * XXX + * Currently, no more inodes are allocated if the ifile fills + * up. The ifile should be extended instead. + */ uprintf("\n%s: no inodes left\n", fs->lfs_fsmnt); log(LOG_ERR, "uid %d on %s: out of inodes\n", cred->cr_uid, fs->lfs_fsmnt); return (ENOSPC); } +printf("lfs_ialloc: allocate inode %d\n", new_ino); - /* Read the appropriate block from the ifile */ - vp = fs->lfs_ivnode; + /* Read the appropriate block from the ifile. */ LFS_IENTRY(ifp, fs, new_ino, bp); if (ifp->if_daddr != LFS_UNUSED_DADDR) - panic("lfs_ialloc: corrupt free list"); + panic("lfs_ialloc: inuse inode on the free list"); - /* Remove from free list, set the access time. */ + /* Remove from the free list, set the access time, write it back. */ fs->lfs_free = ifp->if_nextfree; ifp->if_st_atime = time.tv_sec; - brelse(bp); + lfs_bwrite(bp); + /* Create a vnode to associate with the inode. */ error = lfs_vcreate(ITOV(pip)->v_mount, new_ino, &vp); if (error) return (error); + *ipp = ip = VTOI(vp); - ip = VTOI(vp); - VREF(ip->i_devvp); - - /* - * Set up a new generation number for this inode. - */ + /* Set a new generation number for this inode. */ if (++nextgennumber < (u_long)time.tv_sec) nextgennumber = time.tv_sec; ip->i_gen = nextgennumber; + /* Insert into the inode hash table. */ lfs_hqueue(ip); - *ipp = ip; + /* Set superblock modified bit and increment file count. */ + fs->lfs_fmod = 1; + ++fs->lfs_nfiles; return (0); } +/* Free an inode. */ void lfs_ifree(ip) INODE *ip; @@ -89,18 +100,28 @@ lfs_ifree(ip) ino_t ino; printf("lfs_ifree: free %d\n", ip->i_number); + /* Get the inode number and file system. */ fs = ip->i_lfs; ino = ip->i_number; - LFS_IENTRY(ifp, fs, ino, bp); + /* + * Read the appropriate block from the ifile. Set the inode entry to + * unused, increment its version number and link it into the free chain. + */ + LFS_IENTRY(ifp, fs, ino, bp); ifp->if_daddr = LFS_UNUSED_DADDR; ++ifp->if_version; ifp->if_nextfree = fs->lfs_free; - brelse(bp); fs->lfs_free = ino; + + lfs_bwrite(bp); + + /* Set superblock modified bit and decrement file count. */ fs->lfs_fmod = 1; + --fs->lfs_nfiles; } +/* Translate an inode number to a disk address. */ daddr_t itod(fs, ino) LFS *fs; @@ -110,16 +131,17 @@ itod(fs, ino) IFILE *ifp; daddr_t iaddr; -printf("itod: ino %d\n", ino); + /* Read the appropriate block from the ifile. */ LFS_IENTRY(ifp, fs, ino, bp); if (ifp->if_daddr == LFS_UNUSED_DADDR) - panic("itod: unused daddr"); + panic("itod: unused disk address"); iaddr = ifp->if_daddr; brelse(bp); return (iaddr); } +/* Search a block for a specific dinode. */ DINODE * lfs_ifind(fs, ino, page) LFS *fs; @@ -135,14 +157,11 @@ printf("lfs_ifind: inode %d\n", ino); if (dip->di_inum == ino) return (dip); - (void)printf("lfs_ifind: dinode %u not found", ino); - panic("lfs_ifind: inode not found"); + panic("lfs_ifind: dinode %%u not found", ino); /* NOTREACHED */ } -/* - * Create a new vnode/inode and initialize the fields we can. - */ +/* Create a new vnode/inode pair and initialize what fields we can. */ lfs_vcreate(mp, ino, vpp) MOUNT *mp; ino_t ino; @@ -153,10 +172,11 @@ lfs_vcreate(mp, ino, vpp) int error, i; printf("lfs_vcreate: ino %d\n", ino); - error = getnewvnode(VT_LFS, mp, &lfs_vnodeops, vpp); - if (error) + /* Create the vnode. */ + if (error = getnewvnode(VT_LFS, mp, &lfs_vnodeops, vpp)) return(error); + /* Get a pointer to the private mount structure. */ ump = VFSTOUFS(mp); /* Initialize the inode. */ @@ -174,24 +194,50 @@ printf("lfs_vcreate: ino %d\n", ino); for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; #endif + VREF(ip->i_devvp); /* XXX: Why? */ return (0); } -/* - * Return the current version number for a specific inode. - */ +/* Return the current version number for a specific inode. */ u_long lfs_getversion(fs, ino) LFS *fs; ino_t ino; { - IFILE *ifp; BUF *bp; - int version; + IFILE *ifp; + u_long version; -printf("lfs_getversion: %d\n", ino); + /* + * Read the appropriate block from the ifile. Return the version + * number. + */ LFS_IENTRY(ifp, fs, ino, bp); version = ifp->if_version; brelse(bp); return(version); } + +/* Set values in the ifile for the inode. */ +void +lfs_iset(ip, daddr, atime) + INODE *ip; + daddr_t daddr; + time_t atime; +{ + BUF *bp; + IFILE *ifp; + LFS *fs; + ino_t ino; + +printf("lfs_iset: setting ino %d daddr %lx time %lx\n", ip->i_number, daddr, atime); + + fs = ip->i_lfs; + ino = ip->i_number; + LFS_IENTRY(ifp, fs, ino, bp); + + ifp->if_daddr = daddr; + ifp->if_st_atime = atime; + lfs_bwrite(bp); +} +#endif /* LOGFS */ diff --git a/usr/src/sys/ufs/lfs/lfs_balloc.c b/usr/src/sys/ufs/lfs/lfs_balloc.c index b086a46256..8daeacf679 100644 --- a/usr/src/sys/ufs/lfs/lfs_balloc.c +++ b/usr/src/sys/ufs/lfs/lfs_balloc.c @@ -4,9 +4,10 @@ * * %sccs.include.redist.c% * - * @(#)lfs_balloc.c 7.15 (Berkeley) %G% + * @(#)lfs_balloc.c 7.16 (Berkeley) %G% */ +#ifdef LOGFS #include "param.h" #include "systm.h" #include "buf.h" @@ -112,7 +113,7 @@ printf("lfs_bmap: block number %d, inode %d\n", bn, ip->i_number); bp->b_blkno = daddr; bp->b_flags |= B_READ; bp->b_dev = devvp->v_rdev; - (*(devvp->v_op->vop_strategy))(bp); + (devvp->v_op->vop_strategy)(bp); curproc->p_stats->p_ru.ru_inblock++; /* XXX */ if (error = biowait(bp)) { brelse(bp); @@ -130,3 +131,4 @@ printf("lfs_bmap: block number %d, inode %d\n", bn, ip->i_number); *bnp = daddr; return (0); } +#endif /* LOGFS */ diff --git a/usr/src/sys/ufs/lfs/lfs_bio.c b/usr/src/sys/ufs/lfs/lfs_bio.c index 0d5f25949f..f7fb4118f5 100644 --- a/usr/src/sys/ufs/lfs/lfs_bio.c +++ b/usr/src/sys/ufs/lfs/lfs_bio.c @@ -4,9 +4,10 @@ * * %sccs.include.redist.c% * - * @(#)lfs_bio.c 5.1 (Berkeley) %G% + * @(#)lfs_bio.c 5.2 (Berkeley) %G% */ +#ifdef LOGFS #include "param.h" #include "proc.h" #include "buf.h" @@ -18,17 +19,21 @@ #include "lfs.h" /* - * lfs_bwrite -- - * LFS version of bawrite, bdwrite, bwrite. Set the delayed write flag - * and use reassignbuf to move the buffer from the clean list to the - * dirty one. Then unlock the buffer. + * LFS version of bawrite, bdwrite, bwrite. Set the delayed write flag and + * use reassignbuf to move the buffer from the clean list to the dirty one, + * then unlock the buffer. */ lfs_bwrite(bp) register BUF *bp; { +#ifdef DO_ACCOUNTING + Not included as this gets called from lots of places where the + current proc structure is probably wrong. Ignore for now. curproc->p_stats->p_ru.ru_oublock++; /* XXX: no one paid yet */ - bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); - bp->b_flags |= B_DELWRI; - reassignbuf(bp, bp->b_vp); /* XXX: do this inline */ +#endif + bp->b_flags &= ~(B_READ | B_DONE | B_ERROR); + bp->b_flags |= B_WRITE | B_DELWRI; + reassignbuf(bp, bp->b_vp); /* XXX: do this inline? */ brelse(bp); } +#endif /* LOGFS */ diff --git a/usr/src/sys/ufs/lfs/lfs_cksum.c b/usr/src/sys/ufs/lfs/lfs_cksum.c index 2c4a26bfc0..c83949d73b 100644 --- a/usr/src/sys/ufs/lfs/lfs_cksum.c +++ b/usr/src/sys/ufs/lfs/lfs_cksum.c @@ -4,14 +4,15 @@ * * %sccs.include.redist.c% * - * @(#)lfs_cksum.c 5.1 (Berkeley) %G% + * @(#)lfs_cksum.c 5.2 (Berkeley) %G% */ +#ifdef LOGFS #include /* - * cksum -- - * Simple, general purpose, fast checksum. + * Simple, general purpose, fast checksum. Data must be short-aligned. + * Returns a u_long in case we ever want to do something more rigorous. */ u_long cksum(str, len) @@ -25,3 +26,4 @@ cksum(str, len) sum ^= *((u_short *)str)++; return (sum); } +#endif /* LOGFS */ diff --git a/usr/src/sys/ufs/lfs/lfs_debug.c b/usr/src/sys/ufs/lfs/lfs_debug.c index 6de5e8f18c..85ae7cab2d 100644 --- a/usr/src/sys/ufs/lfs/lfs_debug.c +++ b/usr/src/sys/ufs/lfs/lfs_debug.c @@ -4,9 +4,10 @@ * * %sccs.include.redist.c% * - * @(#)lfs_debug.c 5.1 (Berkeley) %G% + * @(#)lfs_debug.c 5.2 (Berkeley) %G% */ +#ifdef LOGFS #include "param.h" #include "namei.h" #include "vnode.h" @@ -104,3 +105,13 @@ lfs_print_inumber(vp) { (void)printf("%d\n", VTOI(vp)->i_number); } + +void +lfs_spin() +{ + u_long i, j; + + for (i = 0; i < 10; ++i) + for (j = 0; j < 1000000; ++j); +} +#endif /* LOGFS */ diff --git a/usr/src/sys/ufs/lfs/lfs_extern.h b/usr/src/sys/ufs/lfs/lfs_extern.h index e814f9be43..01f08b02dc 100644 --- a/usr/src/sys/ufs/lfs/lfs_extern.h +++ b/usr/src/sys/ufs/lfs/lfs_extern.h @@ -4,12 +4,13 @@ * * %sccs.include.redist.c% * - * @(#)lfs_extern.h 5.1 (Berkeley) %G% + * @(#)lfs_extern.h 5.2 (Berkeley) %G% */ struct fid; struct inode; struct mount; +struct nameidata; struct statfs; struct proc; struct ucred; @@ -19,6 +20,7 @@ daddr_t itod __P((LFS *, ino_t)); int lfs_balloc __P((LFS *, VNODE *, daddr_t, int, BUF **)); int lfs_bmap __P((struct inode *, daddr_t, daddr_t *)); void lfs_bwrite __P((struct buf *)); +void lfs_cleaner __P((void)); int lfs_fhtovp __P((struct mount *, struct fid *, struct vnode **)); u_long lfs_getversion __P((LFS *fs, ino_t)); ino_t lfs_ialloc __P((LFS *, struct inode *, struct inode **, @@ -29,9 +31,11 @@ struct dinode * void lfs_ifree __P((struct inode *)); int lfs_inactive __P((struct vnode *, struct proc *)); int lfs_init __P((void)); +void lfs_iset __P((INODE *, daddr_t, time_t)); int lfs_lookup __P((struct vnode *, struct nameidata *, struct proc *)); int lfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); int lfs_root __P((struct mount *, struct vnode **)); +int lfs_segwrite __P((MOUNT *, int)); int lfs_statfs __P((struct mount *, struct statfs *, struct proc *)); int lfs_sync __P((struct mount *, int)); int lfs_unmount __P((struct mount *, int, struct proc *)); @@ -41,6 +45,7 @@ int lfs_vcreate __P((struct mount *, ino_t, struct vnode **)); void dump_super __P((LFS *)); void dump_dinode __P((struct dinode *)); void lfs_print_inumber __P((struct vnode *)); +void lfs_spin __P((void)); #endif extern struct vnodeops lfs_vnodeops; diff --git a/usr/src/sys/ufs/lfs/lfs_inode.c b/usr/src/sys/ufs/lfs/lfs_inode.c index 30bf84db7f..12b22dc1ad 100644 --- a/usr/src/sys/ufs/lfs/lfs_inode.c +++ b/usr/src/sys/ufs/lfs/lfs_inode.c @@ -4,9 +4,10 @@ * * %sccs.include.redist.c% * - * @(#)lfs_inode.c 7.42 (Berkeley) %G% + * @(#)lfs_inode.c 7.43 (Berkeley) %G% */ +#ifdef LOGFS #include "param.h" #include "systm.h" #include "mount.h" @@ -20,6 +21,8 @@ #include "../ufs/quota.h" #include "../ufs/inode.h" #include "../ufs/ufsmount.h" +#include "../vm/vm_param.h" +#include "../vm/lock.h" #include "lfs.h" #include "lfs_extern.h" @@ -38,6 +41,8 @@ union lfsihead { /* LFS */ /* LFS */ extern int prtactive; /* 1 => print out reclaim of active vnodes */ +lock_data_t lfs_sync_lock; + /* * Initialize hash links for inodes. */ @@ -47,10 +52,13 @@ lfs_init() register union lfsihead *ih = lfsihead; printf("lfs_init\n"); + #ifndef lint if (VN_MAXPRIVATE < sizeof(struct inode)) panic("ihinit: too small"); -#endif /* not lint */ +#endif + lock_init(&lfs_sync_lock, 1); + for (i = INOHSZ; --i >= 0; ih++) { ih->ih_head[0] = ih; ih->ih_head[1] = ih; @@ -67,7 +75,6 @@ lfs_hqueue(ip) { union lfsihead *ih; -printf("lfs_hqueue ino %d\n", ip->i_number); ih = &lfsihead[INOHASH(ip->i_dev, ip->i_number)]; insque(ip, ih); ILOCK(ip); @@ -561,3 +568,4 @@ lfs_indirtrunc(ip, bn, lastbn, level, countp) panic("lfs_indirtrunc not implemented"); #endif } +#endif /* LOGFS */ diff --git a/usr/src/sys/ufs/lfs/lfs_segment.c b/usr/src/sys/ufs/lfs/lfs_segment.c index b9efcf18bc..baf38c6793 100644 --- a/usr/src/sys/ufs/lfs/lfs_segment.c +++ b/usr/src/sys/ufs/lfs/lfs_segment.c @@ -4,9 +4,10 @@ * * %sccs.include.redist.c% * - * @(#)lfs_segment.c 5.1 (Berkeley) %G% + * @(#)lfs_segment.c 5.2 (Berkeley) %G% */ +#ifdef LOGFS #include "param.h" #include "systm.h" #include "namei.h" @@ -31,31 +32,53 @@ #include "lfs_extern.h" /* -Need to write the inodes out. -The indirect buffers need to be marked dirty -What about sync? How do you wait on the last I/O? -Need to keep vnode v_numoutput up to date for pending writes. +Add a check so that if the segment is empty, you don't write it. +Write the code with lfs_ialloc to allocate a new page of inodes if you have to. +Make an incoming sync wait until the previous one finishes. Keith + will write this. When this happens, we no longer have to be + able to chain superblocks together and handle multiple segments + writing -- Seems like we can call biowait to wait for an io. + However, I don't think we want to wait on the summary I/O + necessarily, because if we've got lots of dirty buffers piling + up, it would be nice to process them and get the segment all + ready to write. Perhaps we can just wait before firing up the + next set of writes, rather than waiting to start doing anything. + Also -- my lfs_writesuper should wait until all the segment writes + are done (I added a biowait, but we need to make sure that the SEGMENT + structure hasn't been freed before we get there). +Need to keep vnode v_numoutput up to date for pending writes? +???Could actually fire off the datablock writes before you finish. This +would give them a chance to get started earlier... */ static int lfs_biocallback __P((BUF *)); static void lfs_endsum __P((LFS *, SEGMENT *, int)); +static SEGMENT *lfs_gather + __P((LFS *, SEGMENT *, VNODE *, int (*) __P((BUF *)))); static BUF *lfs_newbuf __P((LFS *, daddr_t, size_t)); static SEGMENT *lfs_newseg __P((LFS *)); static void lfs_newsum __P((LFS *, SEGMENT *)); static daddr_t lfs_nextseg __P((LFS *)); -static int lfs_updatemeta __P((LFS *, INODE *, FINFO *, BUF **)); -static SEGMENT *lfs_writefile __P((SEGMENT *, LFS *, VNODE *)); -static void lfs_writemeta __P((void)); +static void lfs_updatemeta __P((LFS *, SEGMENT *, INODE *, daddr_t *, + BUF **, int)); +static void lfs_writeckp __P((LFS *, SEGMENT *)); +static SEGMENT *lfs_writefile __P((SEGMENT *, LFS *, VNODE *, int)); +static SEGMENT *lfs_writeinode __P((LFS *, SEGMENT *, VNODE *)); static void lfs_writeseg __P((LFS *, SEGMENT *)); -static void shellsort __P((BUF **, u_long *, register int)); +static void lfs_writesuper __P((LFS *, SEGMENT *)); +static int match_data __P((BUF *)); +static int match_dindir __P((BUF *)); +static int match_indir __P((BUF *)); +static void shellsort __P((BUF **, daddr_t *, register int)); /* * XXX -- when we add fragments in here, we will need to allocate a larger * buffer pointer array (sp->bpp). */ int -lfs_segwrite(mp) +lfs_segwrite(mp, do_ckp) MOUNT *mp; + int do_ckp; /* do a checkpoint too */ { FINFO *fip; /* current file info structure */ INODE *ip; @@ -63,9 +86,7 @@ lfs_segwrite(mp) VNODE *vp; SEGMENT *sp; -printf("lfs_segwrite: %s %s\n", mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); fs = VFSTOUFS(mp)->um_lfs; - sp = lfs_newseg(fs); loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { @@ -73,7 +94,6 @@ loop: * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ -printf("lfs_segwrite: processing inum %d\n", VTOI(vp)->i_number); if (vp->v_mount != mp) goto loop; if (VOP_ISLOCKED(vp)) @@ -86,18 +106,16 @@ printf("lfs_segwrite: processing inum %d\n", VTOI(vp)->i_number); continue; if (vget(vp)) goto loop; - sp = lfs_writefile(sp, fs, vp); - - /* Need to take care of inode now */ -printf("lfs_segwrite: need to add dinode %d to seg\n", ip->i_din.di_inum); + sp = lfs_writefile(sp, fs, vp, do_ckp); vput(vp); } - /* - * Force stale file system control information to be flushed. - */ - lfs_writeseg(fs, sp); -/* vflushbuf(ump->um_devvp, waitfor == MNT_WAIT ? B_SYNC : 0); */ -printf("lfs_segwrite: returning from segwrite\n"); + if (do_ckp) + lfs_writeckp(fs, sp); + else + lfs_writeseg(fs, sp); +#ifdef NOTLFS + vflushbuf(ump->um_devvp, waitfor == MNT_WAIT ? B_SYNC : 0); +#endif return (0); } @@ -110,25 +128,45 @@ lfs_biocallback(bp) UFSMOUNT *ump; VNODE *devvp; + /* + * Grab the mount point for later (used to find the file system and + * block device) and, if the contents are valid, move the buffer back + * onto the clean list. + */ +printf("lfs_biocallback: buffer %x\n", bp, bp->b_lblkno); ump = VFSTOUFS(bp->b_vp->v_mount); + if (bp->b_flags & B_NOCACHE) + bp->b_vp = NULL; + else { + bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); + reassignbuf(bp, bp->b_vp); + } + fs = ump->um_lfs; devvp = ump->um_devvp; - /* XXX splbio(); */ -printf("lfs_biocallback: iocount: %d\n", fs->lfs_iocount); - if (--fs->lfs_iocount) { + brelse(bp); /* move up... XXX */ + +printf("\nlfs_biocallback: iocount %d\n", fs->lfs_iocount); + if (fs->lfs_iocount == 0) { + /* Wake up any other syncs waiting on this file system. */ + return; + } + --fs->lfs_iocount; + if (fs->lfs_iocount == 0) { +printf("\nlfs_biocallback: doing summary write\n"); /* Fire off summary writes */ for (sp = fs->lfs_seglist; sp; sp = next_sp) { next_sp = sp->nextp; - (*(devvp->v_op->vop_strategy))(*(sp->cbpp - 1)); -printf("free: segsum %x bpp %x sp %x\n", sp->segsum, sp->bpp, sp); - free(sp->segsum, M_SEGMENT); +#ifdef MOVETONEWBUF + (*(sp->cbpp - 1))->b_dev = bp->b_dev; +#endif + (devvp->v_op->vop_strategy)(*(sp->cbpp - 1)); free(sp->bpp, M_SEGMENT); free(sp, M_SEGMENT); } } } - static void lfs_endsum(fs, sp, calc_next) LFS *fs; @@ -138,50 +176,117 @@ lfs_endsum(fs, sp, calc_next) BUF *bp; SEGSUM *ssp; daddr_t next_addr; - int npages, nseg_pages; + int npages, nseg_pages, nsums_per_blk; + +/* printf("lfs_endsum\n"); /**/ + if (sp->sbp == NULL) + return; -printf("lfs_endsum\n"); ssp = sp->segsum; if (!calc_next) ssp->ss_nextsum = (daddr_t) -1; + else + ssp->ss_nextsum = sp->sum_addr - LFS_SUMMARY_SIZE / DEV_BSIZE; - nseg_pages = sp->sum_num / (fs->lfs_bsize / LFS_SUMMARY_SIZE); - if ((sp->sum_num % (fs->lfs_bsize / LFS_SUMMARY_SIZE)) == 0) { + if ((sp->sum_num % (fs->lfs_bsize / LFS_SUMMARY_SIZE)) == (nsums_per_blk - 1)) { /* - * May need to change the nextsum field on the previous - * summary header in which case we need to recompute the - * checksum as well. + * This buffer is now full. Compute the next address if appropriate + * and the checksum, and close the buffer by setting sp->sbp NULL. */ - npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); - next_addr = fs->lfs_sboffs[0] + - (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) - - fsbtodb(fs, npages) - LFS_SUMMARY_SIZE / DEV_BSIZE; - if (calc_next) + if (calc_next) { + nsums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; + nseg_pages = 1 + sp->sum_num / nsums_per_blk; + npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); + next_addr = fs->lfs_sboffs[0] + + (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) + - fsbtodb(fs, (npages - 1)) - LFS_SUMMARY_SIZE / DEV_BSIZE; ssp->ss_nextsum = next_addr; - ssp->ss_cksum = cksum(&ssp->ss_cksum, - LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); - bp = lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize); - bcopy(sp->segsum, bp->b_un.b_words, fs->lfs_bsize); - bp->b_flags |= B_BUSY; - if (nseg_pages != 1) { - bp->b_flags |= B_CALL; - bp->b_iodone = lfs_biocallback; } - brelse(bp); - sp->bpp[fs->lfs_ssize - npages] = bp; - sp->segsum = (SEGSUM *)(sp->segsum + fs->lfs_bsize - - LFS_SUMMARY_SIZE); - sp->sum_addr = next_addr; - } else { - sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE; - ssp->ss_nextsum = sp->sum_addr; + ssp->ss_cksum = cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); + sp->sbp = NULL; + } else /* Calculate cksum on previous segment summary */ ssp->ss_cksum = cksum(&ssp->ss_cksum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); - sp->segsum -= LFS_SUMMARY_SIZE; +} + +static SEGMENT * +lfs_gather(fs, sp, vp, match) + LFS *fs; + SEGMENT *sp; + VNODE *vp; + int (*match) __P((BUF *)); +{ + BUF **bpp, *bp, *nbp; + FINFO *fip; + INODE *ip; + int count, s, version; + daddr_t *lbp, *start_lbp; + + ip = VTOI(vp); + bpp = sp->cbpp; + fip = sp->fip; + version = fip->fi_version; + start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; + count = 0; + + s = splbio(); + for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { + nbp = bp->b_blockf; + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("lfs_write: not dirty"); + if (!match(bp)) + continue; + bremfree(bp); + bp->b_flags |= B_BUSY | B_CALL; + bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev; + bp->b_iodone = lfs_biocallback; + + *lbp++ = bp->b_lblkno; + *sp->cbpp++ = bp; + fip->fi_nblocks++; + sp->sum_bytes_left -= sizeof(daddr_t); + sp->seg_bytes_left -= bp->b_bufsize; + if (sp->sum_bytes_left < sizeof(daddr_t) || + sp->seg_bytes_left < fs->lfs_bsize) { + /* + * We are about to allocate a new summary block + * and possibly a new segment. So, we need to + * sort the blocks we've done so far, and assign + * the disk addresses, so we can start a new block + * correctly. We may be doing I/O so we need to + * release the s lock before doing anything. + */ + splx(s); + lfs_updatemeta(fs, sp, ip, start_lbp, bpp, + lbp - start_lbp); + + /* Put this file in the segment summary */ + ((SEGSUM *)(sp->segsum))->ss_nfinfo++; + + if (sp->seg_bytes_left < fs->lfs_bsize) { + lfs_writeseg(fs, sp); + sp = lfs_newseg(fs); + } else if (sp->sum_bytes_left < sizeof(daddr_t)) + lfs_newsum(fs, sp); + fip = sp->fip; + fip->fi_ino = ip->i_number; + fip->fi_version = version; + bpp = sp->cbpp; + /* You know that you have a new FINFO either way */ + start_lbp = lbp = fip->fi_blocks; + s = splbio(); + } } + splx(s); + lfs_updatemeta(fs, sp, ip, start_lbp, bpp, lbp - start_lbp); + + return(sp); } + static BUF * lfs_newbuf(fs, daddr, size) LFS *fs; @@ -191,23 +296,33 @@ lfs_newbuf(fs, daddr, size) BUF *bp; VNODE *devvp; -printf("lfs_newbuf\n"); bp = getnewbuf(); bremhash(bp); /* * XXX * Need a devvp, but this isn't a particularly clean way to get one. + * devvp = VTOI(fs->lfs_ivnode)->i_devvp; */ - devvp = VTOI(fs->lfs_ivnode)->i_devvp; +#ifdef NOTWORKING + devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; bgetvp(devvp, bp); +#endif + bp->b_vp = fs->lfs_ivnode; + bp->b_dev = VTOI(fs->lfs_ivnode)->i_dev; bp->b_bcount = 0; - bp->b_lblkno = daddr; - bp->b_blkno = daddr; + bp->b_blkno = bp->b_lblkno = daddr; bp->b_error = 0; bp->b_resid = 0; + bp->b_flags |= B_CALL | B_DELWRI | B_NOCACHE | B_WRITE; + bp->b_iodone = lfs_biocallback; +#ifdef PROBABLYWRONG binshash(bp, BUFHASH(devvp, daddr)); +#endif allocbuf(bp, size); +#ifdef PROBABLYWRONG + reassignbuf(bp, devvp); +#endif return (bp); } @@ -225,17 +340,20 @@ lfs_newseg(fs) printf("lfs_newseg\n"); /* Get buffer space to write out a segment */ sp = malloc(sizeof(SEGMENT), M_SEGMENT, M_WAITOK); + sp->ibp = NULL; + sp->sbp = NULL; sp->cbpp = sp->bpp = malloc(fs->lfs_ssize * sizeof(BUF *), M_SEGMENT, M_WAITOK); sp->nextp = NULL; sp->sum_bytes_left = LFS_SUMMARY_SIZE; sp->seg_bytes_left = (fs->lfs_segmask + 1) - LFS_SUMMARY_SIZE; sp->saddr = fs->lfs_nextseg; +printf("lfs_newseg: About to write segment %lx\n", sp->saddr); sp->sum_addr = sp->saddr + sp->seg_bytes_left / DEV_BSIZE; sp->ninodes = 0; sp->sum_num = -1; - sp->seg_number = (sp->saddr - fs->lfs_sboffs[0]) / - fsbtodb(fs, fs->lfs_ssize); + sp->seg_number = + (sp->saddr - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize); /* initialize segment summary info */ lfs_newsum(fs, sp); @@ -245,7 +363,10 @@ printf("lfs_newseg\n"); /* This is a segment containing a super block */ FINFO *fip; daddr_t lbn, *lbnp; + SEGSUM *ssp; + ssp = (SEGSUM *)sp->segsum; + ssp->ss_nfinfo++; fip = sp->fip; fip->fi_nblocks = LFS_SBPAD >> fs->lfs_bshift; fip->fi_version = 1; @@ -269,25 +390,42 @@ lfs_newsum(fs, sp) SEGMENT *sp; { SEGSUM *ssp; - void *sum; + int npages, nseg_pages, sums_per_blk; printf("lfs_newsum\n"); - sp->sum_num++; - if (sp->sum_num == 0) { - sum = malloc(fs->lfs_bsize, M_SEGMENT, M_WAITOK); - sp->segsum = sum + fs->lfs_bsize - LFS_SUMMARY_SIZE; - ssp = sp->segsum; - ssp->ss_next = fs->lfs_nextseg = lfs_nextseg(fs); - ssp->ss_prev = fs->lfs_lastseg; + lfs_endsum(fs, sp, 1); + ++sp->sum_num; + if (sp->sbp == NULL) { + /* Allocate a new buffer. */ + if (sp->seg_bytes_left < fs->lfs_bsize) { + lfs_writeseg(fs, sp); + sp = lfs_newseg(fs); + } + sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; + nseg_pages = 1 + sp->sum_num / sums_per_blk; + npages = nseg_pages + (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); + sp->sum_addr = fs->lfs_sboffs[0] + + (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) + - fsbtodb(fs, npages); + sp->sbp = lfs_newbuf(fs, sp->sum_addr, fs->lfs_bsize); + sp->bpp[fs->lfs_ssize - npages] = sp->sbp; +printf("Inserting summary block, address %x at index %d\n", +sp->sbp->b_lblkno, fs->lfs_ssize - npages); + sp->seg_bytes_left -= fs->lfs_bsize; + sp->segsum = sp->sbp->b_un.b_addr + fs->lfs_bsize - LFS_SUMMARY_SIZE; + sp->sum_addr += (fs->lfs_bsize - LFS_SUMMARY_SIZE) / DEV_BSIZE; } else { - lfs_endsum(fs, sp, 1); - ssp = sp->segsum; - ssp->ss_next = ssp->ss_next; - ssp->ss_prev = ssp->ss_prev; + sp->segsum -= LFS_SUMMARY_SIZE; + sp->sum_addr -= LFS_SUMMARY_SIZE / DEV_BSIZE; } + ssp = sp->segsum; + ssp->ss_next = fs->lfs_nextseg = lfs_nextseg(fs); + ssp->ss_prev = fs->lfs_lastseg; + /* Initialize segment summary info. */ sp->fip = (FINFO *)(sp->segsum + sizeof(SEGSUM)); + sp->fip->fi_nblocks = 0; ssp->ss_nextsum = (daddr_t)-1; ssp->ss_create = time.tv_sec; @@ -305,13 +443,11 @@ lfs_nextseg(fs) int segnum, sn; SEGUSE *sup; -printf("lfs_nextseg\n"); segnum = satosn(fs, fs->lfs_nextseg); - for (sn = seginc(fs, sn); sn != segnum; sn = seginc(fs, sn)) { - sup = &fs->lfs_segtab[sn]; - if (!(sup->su_flags & SEGUSE_DIRTY)) + for (sn = seginc(fs, segnum); sn != segnum; sn = seginc(fs, sn)) + if (!(fs->lfs_segtab[sn].su_flags & SEGUSE_DIRTY)) break; - } + if (sn == segnum) panic("lfs_nextseg: file system full"); /* XXX */ return(sntosa(fs, sn)); @@ -321,27 +457,42 @@ printf("lfs_nextseg\n"); * Update the metadata that points to the blocks listed in the FIP * array. */ -static -lfs_updatemeta(fs, ip, fip, bpp) +static void +lfs_updatemeta(fs, sp, ip, lbp, bpp, nblocks) LFS *fs; + SEGMENT *sp; INODE *ip; - FINFO *fip; + daddr_t *lbp; BUF **bpp; + int nblocks; { SEGUSE *segup; - BUF **lbpp, *bp; + BUF **lbpp, *bp, *mbp; daddr_t da, iblkno; - int error, i, oldsegnum; - long lbn, *lbp; + int db_per_fsb, error, i, oldsegnum; + long lbn; -printf("lfs_updatemeta\n"); - for (lbpp= bpp, lbp = fip->fi_blocks, i = 0; - i < fip->fi_nblocks; i++, lbp++, bp++) { - lbn = *lbp; +printf("lfs_updatemeta of %d blocks\n", nblocks); + if ((nblocks == 0) && (ip->i_flag & (IMOD|IACC|IUPD|ICHG)) == 0) + return; + + /* First sort the blocks and add disk addresses */ + shellsort(bpp, lbp, nblocks); + + db_per_fsb = 1 << fs->lfs_fsbtodb; + for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) { + (*lbpp)->b_blkno = sp->saddr; + sp->saddr += db_per_fsb; + } + + for (lbpp = bpp, i = 0; i < nblocks; i++, lbpp++) { + lbn = lbp[i]; +printf("lfs_updatemeta: block %d\n", lbn); if (error = lfs_bmap(ip, lbn, &da)) - return(error); + panic("lfs_updatemeta: lfs_bmap returned error"); if (da) { + /* Update segment usage information */ oldsegnum = (da - fs->lfs_sboffs[0]) / fsbtodb(fs, fs->lfs_ssize); segup = fs->lfs_segtab+oldsegnum; @@ -351,136 +502,222 @@ printf("lfs_updatemeta\n"); "in segment", oldsegnum); } - /* Now change whoever points to lbn */ - if (lbn < NDADDR) + /* + * Now change whoever points to lbn. We could start with the + * smallest (most negative) block number in these if clauses, + * but we assume that indirect blocks are least common, and + * handle them separately. + */ + bp = NULL; + if (lbn < 0) { + if (lbn < -NIADDR) { +printf("lfs_updatemeta: changing indirect block %d\n", D_INDIR); + if (error = bread(ITOV(ip), D_INDIR, + fs->lfs_bsize, NOCRED, &bp)) + panic("lfs_updatemeta: error on bread"); + + bp->b_un.b_daddr[-lbn % NINDIR(fs)] = + (*lbpp)->b_blkno; + } else + ip->i_din.di_ib[-lbn-1] = (*lbpp)->b_blkno; + + } else if (lbn < NDADDR) ip->i_din.di_db[lbn] = (*lbpp)->b_blkno; else if ((lbn -= NDADDR) < NINDIR(fs)) { printf("lfs_updatemeta: changing indirect block %d\n", S_INDIR); - error = bread(ITOV(ip), S_INDIR, fs->lfs_bsize, - NOCRED, &bp); - if (error) - return(error); + if (error = bread(ITOV(ip), S_INDIR, fs->lfs_bsize, + NOCRED, &bp)) + panic("lfs_updatemeta: bread returned error"); + bp->b_un.b_daddr[lbn] = (*lbpp)->b_blkno; - brelse(bp); } else if ( (lbn = (lbn - NINDIR(fs)) / NINDIR(fs)) < NINDIR(fs)) { iblkno = - (lbn + NIADDR + 1); printf("lfs_updatemeta: changing indirect block %d\n", iblkno); - error = bread(ITOV(ip), iblkno, fs->lfs_bsize, NOCRED, - &bp); - if (error) - return(error); + if (error = bread(ITOV(ip), iblkno, fs->lfs_bsize, + NOCRED, &bp)) + panic("lfs_updatemeta: bread returned error"); + bp->b_un.b_daddr[lbn % NINDIR(fs)] = (*lbpp)->b_blkno; } else - return(EFBIG); + panic("lfs_updatemeta: logical block number too large"); + if (bp) + lfs_bwrite(bp); } - return(0); +} + +static void +lfs_writeckp(fs, sp) + LFS *fs; + SEGMENT *sp; +{ + BUF *bp; + FINFO *fip; + INODE *ip; + SEGUSE *sup; + daddr_t *lbp; + int bytes_needed, i; + void *xp; + +printf("lfs_writeckp\n"); + /* + * This will write the dirty ifile blocks, but not the segusage + * table nor the ifile inode. + */ + sp = lfs_writefile(sp, fs, fs->lfs_ivnode, 1); + + /* + * Make sure that the segment usage table and the ifile inode will + * fit in this segment. If they won't, put them in the next segment + */ + bytes_needed = fs->lfs_segtabsz << fs->lfs_bshift; + if (sp->ninodes % INOPB(fs) == 0) + bytes_needed += fs->lfs_bsize; + + if (sp->seg_bytes_left < bytes_needed) { + lfs_writeseg(fs, sp); + sp = lfs_newseg(fs); + } else if (sp->sum_bytes_left < (fs->lfs_segtabsz * sizeof(daddr_t))) + lfs_newsum(fs, sp); + +#ifdef DEBUG + if (sp->seg_bytes_left < bytes_needed) + panic("lfs_writeckp: unable to write checkpoint"); +#endif + + /* + * Now, update the segment usage information and the ifile inode and + * and write it out + */ + + sup = fs->lfs_segtab + sp->seg_number; + sup->su_nbytes = (fs->lfs_segmask + 1) - sp->seg_bytes_left + + bytes_needed; + sup->su_lastmod = time.tv_sec; + sup->su_flags = SEGUSE_DIRTY; + + /* Get buffers for the segusage table and write it out */ + ip = VTOI(fs->lfs_ivnode); + fip = sp->fip; + lbp = &fip->fi_blocks[fip->fi_nblocks]; + for (xp = fs->lfs_segtab, i = 0; i < fs->lfs_segtabsz; + i++, xp += fs->lfs_bsize, lbp++) { + bp = lfs_newbuf(fs, sp->saddr, fs->lfs_bsize); + *sp->cbpp++ = bp; + bcopy(xp, bp->b_un.b_words, fs->lfs_bsize); + ip->i_din.di_db[i] = sp->saddr; + sp->saddr += (1 << fs->lfs_fsbtodb); + *lbp = i; + fip->fi_nblocks++; + } + sp = lfs_writeinode(fs, sp, fs->lfs_ivnode); + lfs_writeseg(fs, sp); + lfs_writesuper(fs, sp); } /* - * Returns 0 if the entire file fit into the current segment and - * summary region, 1 if not. * XXX -- I think we need to figure out what to do if we write * the segment and find more dirty blocks when we're done. */ static SEGMENT * -lfs_writefile(sp, fs, vp) +lfs_writefile(sp, fs, vp, do_ckp) SEGMENT *sp; LFS *fs; VNODE *vp; + int do_ckp; { - register BUF *bp; - BUF **bpp, *nbp; FINFO *fip; INODE *ip; - int db_per_fsb, error, i; - int ret_val, s; - long *lbp; /* initialize the FINFO structure */ ip = VTOI(vp); printf("lfs_writefile: node %d\n", ip->i_number); loop: - fip = sp->fip; - fip->fi_nblocks = 0; - fip->fi_ino = ip->i_number; - fip->fi_version = lfs_getversion(fs, ip->i_number); - lbp = fip->fi_blocks; - - bpp = sp->cbpp; - s = splbio(); - for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { - nbp = bp->b_blockf; -printf("lfs_writefile: disk block num %d flags %x\n", bp->b_blkno, bp->b_flags); - if ((bp->b_flags & B_BUSY)) - continue; - if ((bp->b_flags & B_DELWRI) == 0) - panic("lfs_write: not dirty"); - bremfree(bp); - bp->b_flags |= (B_BUSY | B_CALL); - bp->b_iodone = lfs_biocallback; - - /* UFS does the bawrites and bwrites here; we don't */ - *lbp++ = bp->b_lblkno; /* UPDATE META HERE */ - *sp->cbpp++ = bp; - fip->fi_nblocks++; - sp->sum_bytes_left -= sizeof(daddr_t); - sp->seg_bytes_left -= bp->b_bufsize; - if (sp->sum_bytes_left < sizeof(daddr_t) || - sp->seg_bytes_left < fs->lfs_bsize) { - /* - * We are about to allocate a new summary block - * and possibly a new segment. So, we need to - * sort the blocks we've done so far, and assign - * the disk addresses, so we can start a new block - * correctly. We may be doing I/O so we need to - * release the s lock before doing anything. - */ - splx(s); - if (error = lfs_updatemeta(fs, ip, fip, bpp)) - panic("lfs_writefile: error from lfs_updatemeta\n"); + sp->fip->fi_nblocks = 0; + sp->fip->fi_ino = ip->i_number; + if (ip->i_number != LFS_IFILE_INUM) + sp->fip->fi_version = lfs_getversion(fs, ip->i_number); + else + sp->fip->fi_version = 1; + + sp = lfs_gather(fs, sp, vp, match_data); + if (do_ckp) { + sp = lfs_gather(fs, sp, vp, match_indir); + sp = lfs_gather(fs, sp, vp, match_dindir); + } - /* Put this file in the segment summary */ +(void)printf("lfs_writefile: adding %d blocks to segment\n", +sp->fip->fi_nblocks); + /* + * Update the inode for this file and reflect new inode + * address in the ifile. If this is the ifile, don't update + * the inode, because we're checkpointing and will update the + * inode with the segment usage information (so we musn't + * bump the finfo pointer either). + */ + if (ip->i_number != LFS_IFILE_INUM) { + sp = lfs_writeinode(fs, sp, vp); + fip = sp->fip; + if (fip->fi_nblocks) { ((SEGSUM *)(sp->segsum))->ss_nfinfo++; - - if (sp->seg_bytes_left < fs->lfs_bsize) { - lfs_writeseg(fs, sp); - sp = lfs_newseg(fs); - } else if (sp->sum_bytes_left < sizeof(daddr_t)) - lfs_newsum(fs, sp); - fip = sp->fip; - s = splbio(); + sp->fip = (FINFO *)((u_long)fip + sizeof(FINFO) + + sizeof(u_long) * fip->fi_nblocks - 1); } - - } - splx(s); - db_per_fsb = 1 << fs->lfs_fsbtodb; - shellsort(bpp, (u_long *)fip->fi_blocks, fip->fi_nblocks); - for (bp = *bpp, i = 0; i < fip->fi_nblocks; i++, bp++) { - bp->b_blkno = sp->saddr; - sp->saddr += db_per_fsb; - /* - * Update the meta data now for this file. If we've filled - * a segment, then we'll have to wait until the next segment - * to write out the updated metadata. - */ - lfs_writemeta(); - } -(void)printf("lfs_writefile: adding %d blocks to segment\n", fip->fi_nblocks); - if (fip->fi_nblocks) { - ((SEGSUM *)(sp->segsum))->ss_nfinfo++; - sp->fip = (FINFO *)((u_long)fip + sizeof(FINFO) + - sizeof(u_long) * (fip->fi_nblocks - 1)); } return(sp); } -static void -lfs_writemeta() +static SEGMENT * +lfs_writeinode(fs, sp, vp) + LFS *fs; + SEGMENT *sp; + VNODE *vp; { - printf("lfs_writemeta (STUB)\n"); + BUF *bp; + INODE *ip; + SEGSUM *ssp; + daddr_t iaddr, next_addr; + int npages, nseg_pages, sums_per_blk; + struct dinode *dip; + +printf("lfs_writeinode\n"); + sums_per_blk = fs->lfs_bsize / LFS_SUMMARY_SIZE; + if (sp->ibp == NULL) { + /* Allocate a new buffer. */ + if (sp->seg_bytes_left < fs->lfs_bsize) { + lfs_writeseg(fs, sp); + sp = lfs_newseg(fs); + } + nseg_pages = (sp->sum_num + sums_per_blk) / sums_per_blk; + npages = nseg_pages + (sp->ninodes + INOPB(fs)) / INOPB(fs); + next_addr = fs->lfs_sboffs[0] + + (sp->seg_number + 1) * fsbtodb(fs, fs->lfs_ssize) + - fsbtodb(fs, npages); + sp->ibp = lfs_newbuf(fs, next_addr, fs->lfs_bsize); + sp->ibp->b_flags |= B_BUSY; + sp->bpp[fs->lfs_ssize - npages] = sp->ibp; + sp->seg_bytes_left -= fs->lfs_bsize; +printf("alloc inode block @ daddr %x, bp = %x inserted at %d\n", +next_addr, sp->ibp, fs->lfs_ssize - npages); + } + ip = VTOI(vp); + bp = sp->ibp; + dip = bp->b_un.b_dino + (sp->ninodes % INOPB(fs)); + bcopy(&ip->i_din, dip, sizeof(struct dinode)); + iaddr = bp->b_blkno; + ++sp->ninodes; + ssp = sp->segsum; + ++ssp->ss_ninos; + if (sp->ninodes % INOPB(fs) == 0) + sp->ibp = NULL; + if (ip->i_number == LFS_IFILE_INUM) + fs->lfs_idaddr = iaddr; + else + lfs_iset(ip, iaddr, ip->i_atime); /* Update ifile */ + ip->i_flags &= ~(IMOD|IACC|IUPD|ICHG); /* make inode clean */ + return(sp); } static void @@ -488,67 +725,128 @@ lfs_writeseg(fs, sp) LFS *fs; SEGMENT *sp; { - BUF **bpp, *bp; + BUF **bpp; SEGSUM *ssp; SEGUSE *sup; VNODE *devvp; int nblocks, nbuffers, ninode_blocks, nsegsums, nsum_pb; int i, metaoff, nmeta; +struct buf **xbp; int xi; printf("lfs_writeseg\n"); - ssp = sp->segsum; - nsum_pb = fs->lfs_bsize / LFS_SUMMARY_SIZE; - /* - * This is a hack because we're currently allocating summary segments - * in full blocks. It will go away when we do fragments, when we'll - * allocate fragment sized summary blocks. - */ - do { - sp->sum_num++; - lfs_endsum(fs, sp, 0); - } while (sp->sum_num % nsum_pb); - nbuffers = sp->cbpp - sp->bpp; - nsegsums = (sp->sum_num + nsum_pb - 1) / nsum_pb; - ninode_blocks = (sp->ninodes + INOPB(fs) - 1)/INOPB(fs); - - /* Do checksum for last segment summary */ - ssp->ss_cksum = cksum(&ssp->ss_cksum, - LFS_SUMMARY_SIZE - sizeof(ssp->ss_cksum)); + fs->lfs_lastseg = sntosa(fs, sp->seg_number); + lfs_endsum(fs, sp, 0); +#ifdef HELLNO /* Finish off any inodes */ + if (sp->ibp) + brelse(sp->ibp); +#endif /* * Copy inode and summary block buffer pointers down so they are - * contiguous with the page buffer pointers + * contiguous with the page buffer pointers. */ - nmeta = 1 + ninode_blocks + nsegsums; + ssp = sp->segsum; + nsum_pb = fs->lfs_bsize / LFS_SUMMARY_SIZE; + nbuffers = sp->cbpp - sp->bpp; + nsegsums = 1 + sp->sum_num / nsum_pb; + ninode_blocks = (sp->ninodes + INOPB(fs) - 1) / INOPB(fs); + nmeta = ninode_blocks + nsegsums; metaoff = fs->lfs_ssize - nmeta; + nblocks = nbuffers + nmeta; if (sp->bpp + metaoff != sp->cbpp) - bcopy(sp->bpp+metaoff, sp->cbpp, sizeof(BUF *) * nmeta); + bcopy(sp->bpp + metaoff, sp->cbpp, sizeof(BUF *) * nmeta); + sp->cbpp += nmeta; - nblocks = nbuffers + ninode_blocks + nsegsums; - sup = fs->lfs_segtab + sp->seg_number; sup->su_nbytes = nblocks << fs->lfs_bshift; sup->su_lastmod = time.tv_sec; sup->su_flags = SEGUSE_DIRTY; /* - * Since we need to guarantee that our last buffer gets written last, + * Since we need to guarantee that the summary block gets written last, * we issue the writes in two sets. The first n-1 buffers first, and * then, after they've completed, the last 1 buffer. Only when that - * final write completes is the segment actually written. + * final write completes is the segment valid. */ devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; -/* MIS -- THIS COULD BE BAD IF WE GOT INTERRUPTED IN THE MIDDLE OF THIS */ - fs->lfs_iocount += nblocks - 1; + /* + * Since no writes are yet scheduled, no need to block here; if we + * scheduled the writes at multiple points, we'd need an splbio() + * here. + */ + fs->lfs_iocount = nblocks - 1; sp->nextp = fs->lfs_seglist; fs->lfs_seglist = sp; - for (bpp = sp->bpp, i = 0; i < (nblocks - 1); i++) { - bp = *bpp; -printf("lfs_writeseg: buffer: ino %d lbn %d flags %lx\n", VTOI(bp->b_vp)->i_number, bp->b_lblkno, bp->b_flags); - (*(devvp->v_op->vop_strategy))(*bpp++); - } + + for (bpp = sp->bpp, i = 0; i < (nblocks - 1); i++, ++bpp) + /* (*(devvp->v_op->vop_strategy)) */ sdstrategy(*bpp); +} + +static void +lfs_writesuper(fs, sp) + LFS *fs; + SEGMENT *sp; +{ + BUF *bp; + VNODE *devvp; + +printf("lfs_writesuper\n"); + /* Wait for segment write to complete */ + /* XXX probably should do this biowait(*(sp->cbpp - 1)); */ + + /* Get a buffer for the super block */ + fs->lfs_cksum = cksum(fs, sizeof(LFS) - sizeof(fs->lfs_cksum)); + bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); + bp->b_flags &= ~B_CALL; + bp->b_vp = NULL; + bp->b_iodone = NULL; + bcopy(fs, bp->b_un.b_lfs, sizeof(LFS)); + + /* Write the first superblock; wait. */ + devvp = VFSTOUFS(fs->lfs_ivnode->v_mount)->um_devvp; +#ifdef MOVETONEWBUF + bp->b_dev = devvp->v_rdev; +#endif + (*devvp->v_op->vop_strategy)(bp); + biowait(bp); + + /* Now, write the second one for which we don't have to wait */ + bp->b_flags &= ~B_DONE; + bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; + (*devvp->v_op->vop_strategy)(bp); + brelse(bp); +} + +/* Block match routines used when traversing the dirty block chain. */ +match_data(bp) + BUF *bp; +{ + return(bp->b_lblkno >= 0); +} + + +match_dindir(bp) + BUF *bp; +{ + return(bp->b_lblkno == D_INDIR); +} + +/* + * These are single indirect blocks. There are three types: + * the one in the inode (address S_INDIR = -1). + * the ones that hang off of D_INDIR the double indirect in the inode. + * these all have addresses in the range -2NINDIR to -(3NINDIR-1) + * the ones that hang off of double indirect that hang off of the + * triple indirect. These all have addresses < -(NINDIR^2). + * Since we currently don't support, triple indirect blocks, this gets simpler. + * We just need to look for block numbers less than -NIADDR. + */ +match_indir(bp) + BUF *bp; +{ + return(bp->b_lblkno == S_INDIR || bp->b_lblkno < -NIADDR); } /* @@ -567,7 +865,7 @@ printf("lfs_writeseg: buffer: ino %d lbn %d flags %lx\n", VTOI(bp->b_vp)->i_numb static void shellsort(bp_array, lb_array, nmemb) BUF **bp_array; - u_long *lb_array; + daddr_t *lb_array; register int nmemb; { static int __rsshell_increments[] = { 4, 1, 0 }; @@ -589,3 +887,4 @@ shellsort(bp_array, lb_array, nmemb) } else break; } +#endif /* LOGFS */ diff --git a/usr/src/sys/ufs/lfs/lfs_vfsops.c b/usr/src/sys/ufs/lfs/lfs_vfsops.c index 87a41ae6eb..d4390a9fb7 100644 --- a/usr/src/sys/ufs/lfs/lfs_vfsops.c +++ b/usr/src/sys/ufs/lfs/lfs_vfsops.c @@ -4,9 +4,10 @@ * * %sccs.include.redist.c% * - * @(#)lfs_vfsops.c 7.58 (Berkeley) %G% + * @(#)lfs_vfsops.c 7.59 (Berkeley) %G% */ +#ifdef LOGFS #include "param.h" #include "systm.h" #include "namei.h" @@ -28,12 +29,16 @@ #include "../ufs/quota.h" #include "../ufs/inode.h" #include "../ufs/ufsmount.h" +#include "../vm/vm_param.h" +#include "../vm/lock.h" #include "lfs.h" #include "lfs_extern.h" static int lfs_mountfs __P((struct vnode *, struct mount *, struct proc *)); -static int sbupdate __P((struct ufsmount *, int)); + +static int lfs_umountdebug __P((struct mount *)); +static int lfs_vinvalbuf __P((register struct vnode *)); struct vfsops lfs_vfsops = { lfs_mount, @@ -78,7 +83,6 @@ lfs_mount(mp, path, data, ndp, p) u_int size; int error; -printf("lfs_mount\n"); if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) return (error); /* @@ -310,6 +314,7 @@ lfs_unmount(mp, mntflags, p) register struct ufsmount *ump; register LFS *fs; /* LFS */ int i, error, ronly, flags = 0; + int ndirty; /* LFS */ printf("lfs_unmount\n"); if (mntflags & MNT_FORCE) { @@ -317,7 +322,12 @@ printf("lfs_unmount\n"); return (EINVAL); flags |= FORCECLOSE; } - mntflushbuf(mp, 0); + if (error = lfs_segwrite(mp, 1)) + return(error); + +ndirty = lfs_umountdebug(mp); +printf("lfs_umountdebug: returned %d dirty\n", ndirty); +return(0); if (mntinvalbuf(mp)) return (EBUSY); ump = VFSTOUFS(mp); @@ -357,7 +367,6 @@ lfs_root(mp, vpp) struct vnode tvp; int error; -printf("lfs_root\n"); tvp.v_mount = mp; ip = VTOI(&tvp); ip->i_vnode = &tvp; @@ -380,7 +389,6 @@ lfs_statfs(mp, sbp, p) register LFS *fs; register struct ufsmount *ump; -printf("lfs_statfs\n"); ump = VFSTOUFS(mp); #ifdef NOTLFS /* LFS */ fs = ump->um_fs; @@ -420,6 +428,7 @@ printf("lfs_statfs\n"); } extern int syncprt; /* LFS */ +extern lock_data_t lfs_sync_lock; /* * Go through the disk queues to initiate sandbagged IO; @@ -428,51 +437,34 @@ extern int syncprt; /* LFS */ * * Note: we are always called with the filesystem marked `MPBUSY'. */ +int STOPNOW; lfs_sync(mp, waitfor) struct mount *mp; int waitfor; { - register struct vnode *vp; - register struct inode *ip; - register struct ufsmount *ump = VFSTOUFS(mp); - register struct fs *fs; - int error, allerror = 0; + int error; printf("lfs_sync\n"); + + /* + * Concurrent syncs aren't possible because the meta data blocks are + * only marked dirty, not busy! + */ + lock_write(&lfs_sync_lock); + if (syncprt) bufstats(); -#ifdef NOTLFS /* LFS */ - fs = ump->um_fs; - /* - * Write back modified superblock. - * Consistency check that the superblock - * is still in the buffer cache. + /* + * If we do roll forward, then all syncs do not have to be checkpoints. + * Until then, make sure they are. */ - if (fs->fs_fmod != 0) { - if (fs->fs_ronly != 0) { /* XXX */ - printf("fs = %s\n", fs->fs_fsmnt); - panic("update: rofs mod"); - } - fs->fs_fmod = 0; - fs->fs_time = time.tv_sec; - allerror = sbupdate(ump, waitfor); - } -#else - allerror = lfs_segwrite(mp); -#endif +STOPNOW=1; + error = lfs_segwrite(mp, 1); + lock_done(&lfs_sync_lock); #ifdef QUOTA qsync(mp); #endif - return (allerror); -} - -static int -sbupdate(mp, waitfor) - struct ufsmount *mp; - int waitfor; -{ - /* LFS IMPLEMENT -- sbupdate */ - panic("sbupdate not implemented"); + return (error); } /* @@ -535,3 +527,74 @@ lfs_fhtovp(mp, fhp, vpp) *vpp = ITOV(ip); return (0); } + +static int +lfs_umountdebug(mp) + struct mount *mp; +{ + struct vnode *vp; + int dirty; + + dirty = 0; + if ((mp->mnt_flag & MNT_MPBUSY) == 0) + panic("umountdebug: not busy"); +loop: + for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { + if (vget(vp)) + goto loop; + dirty += lfs_vinvalbuf(vp); + vput(vp); + if (vp->v_mount != mp) + goto loop; + } + return (dirty); +} +static int +lfs_vinvalbuf(vp) + register struct vnode *vp; +{ + register struct buf *bp; + struct buf *nbp, *blist; + int s, dirty = 0; + + for (;;) { + if (blist = vp->v_dirtyblkhd) + /* void */; + else if (blist = vp->v_cleanblkhd) + /* void */; + else + break; + for (bp = blist; bp; bp = nbp) { +printf("lfs_vinvalbuf: ino %d, lblkno %d, blkno %lx flags %xl\n", +VTOI(vp)->i_number, bp->b_lblkno, bp->b_blkno, bp->b_flags); + nbp = bp->b_blockf; + s = splbio(); + if (bp->b_flags & B_BUSY) { +printf("lfs_vinvalbuf: buffer busy, would normally sleep\n"); +/* + bp->b_flags |= B_WANTED; + sleep((caddr_t)bp, PRIBIO + 1); +*/ + splx(s); + break; + } + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + if (bp->b_flags & B_DELWRI) { + dirty++; /* XXX */ +printf("lfs_vinvalbuf: buffer dirty (DELWRI). would normally write\n"); + break; + } + if (bp->b_vp != vp) + reassignbuf(bp, bp->b_vp); + else + bp->b_flags |= B_INVAL; + brelse(bp); + } + } + if (vp->v_dirtyblkhd || vp->v_cleanblkhd) + panic("lfs_vinvalbuf: flush failed"); + return (dirty); +} +#endif /* LOGFS */ diff --git a/usr/src/sys/ufs/lfs/lfs_vnops.c b/usr/src/sys/ufs/lfs/lfs_vnops.c index 3f3b1ba711..a7083f6a19 100644 --- a/usr/src/sys/ufs/lfs/lfs_vnops.c +++ b/usr/src/sys/ufs/lfs/lfs_vnops.c @@ -4,9 +4,10 @@ * * %sccs.include.redist.c% * - * @(#)lfs_vnops.c 7.67 (Berkeley) %G% + * @(#)lfs_vnops.c 7.68 (Berkeley) %G% */ +#ifdef LOGFS #include "param.h" #include "systm.h" #include "namei.h" @@ -1080,7 +1081,7 @@ lfs_strategy(bp) int error; printf("lfs_strategy: type: %d lblk %d pblk %d\n", bp->b_vp->v_type, - bp->b_lblkno, bp->b_blkno); +bp->b_lblkno, bp->b_blkno); if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR) panic("ufs_strategy: spec"); if (bp->b_blkno == bp->b_lblkno) { /* LFS */ @@ -1095,7 +1096,7 @@ printf("lfs_strategy: type: %d lblk %d pblk %d\n", bp->b_vp->v_type, } vp = ip->i_devvp; bp->b_dev = vp->v_rdev; - (*(vp->v_op->vop_strategy))(bp); + (vp->v_op->vop_strategy)(bp); return (0); } @@ -1220,3 +1221,4 @@ struct vnodeops lfs_vnodeops = { ufs_islocked, /* islocked */ ufs_advlock, /* advlock */ }; +#endif /* LOGFS */ -- 2.20.1