Use balloc to extend Ifile.
[unix-history] / usr / src / sys / ufs / lfs / lfs_syscalls.c
index 699d3a4..0ec75be 100644 (file)
@@ -4,7 +4,7 @@
  *
  * %sccs.include.redist.c%
  *
  *
  * %sccs.include.redist.c%
  *
- *     @(#)lfs_syscalls.c      7.13 (Berkeley) %G%
+ *     @(#)lfs_syscalls.c      7.27 (Berkeley) %G%
  */
 
 #include <sys/param.h>
  */
 
 #include <sys/param.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/lfs/lfs.h>
 #include <ufs/lfs/lfs_extern.h>
 
 #include <ufs/lfs/lfs.h>
 #include <ufs/lfs/lfs_extern.h>
+#define BUMP_FIP(SP) \
+       (SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
+
+#define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
+#define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
+
+/*
+ * Before committing to add something to a segment summary, make sure there
+ * is enough room.  S is the bytes added to the summary.
+ */
+#define        CHECK_SEG(s)                    \
+if (sp->sum_bytes_left < (s)) {                \
+       (void) lfs_writeseg(fs, sp);    \
+       lfs_initseg(fs, sp);            \
+}
+struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
 
 /*
  * lfs_markv:
 
 /*
  * lfs_markv:
@@ -42,126 +59,168 @@ lfs_markv(p, uap, retval)
                fsid_t fsid;            /* file system */
                BLOCK_INFO *blkiov;     /* block array */
                int blkcnt;             /* count of block array entries */
                fsid_t fsid;            /* file system */
                BLOCK_INFO *blkiov;     /* block array */
                int blkcnt;             /* count of block array entries */
-               INODE_INFO *inoiov;     /* inode array */
-               int inocnt;             /* count of inode array entries */
        } *uap;
        int *retval;
 {
        } *uap;
        int *retval;
 {
-       USES_VOP_BMAP;
-       USES_VOP_BWRITE;
-       USES_VOP_VGET;
+       struct segment *sp;
        BLOCK_INFO *blkp;
        IFILE *ifp;
        BLOCK_INFO *blkp;
        IFILE *ifp;
-       INODE_INFO *inop;
-       struct buf *bp;
+       struct buf *bp, **bpp;
        struct inode *ip;
        struct lfs *fs;
        struct mount *mntp;
        struct vnode *vp;
        void *start;
        ino_t lastino;
        struct inode *ip;
        struct lfs *fs;
        struct mount *mntp;
        struct vnode *vp;
        void *start;
        ino_t lastino;
-       daddr_t daddr;
+       daddr_t b_daddr, v_daddr;
        u_long bsize;
        int cnt, error;
 
        u_long bsize;
        int cnt, error;
 
-#ifdef VERBOSE
-       printf("lfs_markv\n");
-#endif
        if (error = suser(p->p_ucred, &p->p_acflag))
                return (error);
        if (error = suser(p->p_ucred, &p->p_acflag))
                return (error);
-
        if ((mntp = getvfs(&uap->fsid)) == NULL)
                return (EINVAL);
        if ((mntp = getvfs(&uap->fsid)) == NULL)
                return (EINVAL);
+       /* Initialize a segment. */
+       sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
+       sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
+           sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
+       sp->seg_flags = SEGM_CKP;
+       sp->vp = NULL;
 
        cnt = uap->blkcnt;
        start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
 
        cnt = uap->blkcnt;
        start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
-       if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO))) {
-               free(start, M_SEGMENT);
-               return (error);
-       }
+       if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
+               goto err1;
 
 
-       /*
-        * Mark blocks/inodes dirty.  Note that errors are mostly ignored.  If
-        * we can't get the info, the block is probably not all that useful,
-        * and hopefully subsequent calls from the cleaner will fix everything.
-        */
+       /* Mark blocks/inodes dirty.  */
        fs = VFSTOUFS(mntp)->um_lfs;
        bsize = fs->lfs_bsize;
        fs = VFSTOUFS(mntp)->um_lfs;
        bsize = fs->lfs_bsize;
-       for (lastino = LFS_UNUSED_INUM, blkp = start; cnt--; ++blkp) {
+       error = 0;
+
+       lfs_seglock(fs);
+       lfs_initseg(fs, sp);
+       sp->seg_flags |= SEGM_CLEAN;
+       for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
+           blkp = start; cnt--; ++blkp) {
                /*
                 * Get the IFILE entry (only once) and see if the file still
                 * exists.
                 */
                if (lastino != blkp->bi_inode) {
                /*
                 * Get the IFILE entry (only once) and see if the file still
                 * exists.
                 */
                if (lastino != blkp->bi_inode) {
+                       if (lastino != LFS_UNUSED_INUM) {
+                               /* Finish up last file */
+                               lfs_updatemeta(sp);
+                               lfs_writeinode(fs, sp, ip);
+                               vput(vp);
+                               if (sp->fip->fi_nblocks)
+                                       BUMP_FIP(sp);
+                               else  {
+                                       DEC_FINFO(sp);
+                                       sp->sum_bytes_left +=
+                                               sizeof(FINFO) - sizeof(daddr_t);
+
+                               }
+                       }
+
+                       /* Start a new file */
+                       CHECK_SEG(sizeof(FINFO));
+                       sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
+                       INC_FINFO(sp);
+                       sp->start_lbp = &sp->fip->fi_blocks[0];
+                       sp->vp = NULL;
+                       sp->fip->fi_version = blkp->bi_version;
+                       sp->fip->fi_nblocks = 0;
+                       sp->fip->fi_ino = blkp->bi_inode;
                        lastino = blkp->bi_inode;
                        lastino = blkp->bi_inode;
-                       LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
-                       daddr = ifp->if_daddr;
-                       brelse(bp);
-                       if (daddr == LFS_UNUSED_DADDR)
+                       if (blkp->bi_inode == LFS_IFILE_INUM)
+                               v_daddr = fs->lfs_idaddr;
+                       else {
+                               LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
+                               v_daddr = ifp->if_daddr;
+                               brelse(bp);
+                       }
+                       if (v_daddr == LFS_UNUSED_DADDR)
                                continue;
                                continue;
-               }
 
 
-               /*
-                * Get the vnode/inode.  If the inode modification time is
-                * earlier than the segment in which the block was found then
-                * they have to be valid, skip other checks.
-                */
-               if (LFS_VGET(mntp, blkp->bi_inode, &vp))
+                       /* Get the vnode/inode. */
+                       if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
+                           blkp->bi_lbn == LFS_UNUSED_LBN ? 
+                           blkp->bi_bp : NULL)) {
+#ifdef DIAGNOSTIC
+                               printf("lfs_markv: VFS_VGET failed (%d)\n",
+                                   blkp->bi_inode);
+#endif
+                               lastino = LFS_UNUSED_INUM;
+                               v_daddr = LFS_UNUSED_DADDR;
+                               continue;
+                       }
+                       sp->vp = vp;
+                       ip = VTOI(vp);
+               } else if (v_daddr == LFS_UNUSED_DADDR)
                        continue;
                        continue;
-               ip = VTOI(vp);
 
 
+               /* If this BLOCK_INFO didn't contain a block, keep going. */
+               if (blkp->bi_lbn == LFS_UNUSED_LBN)
+                       continue;
+               if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
+                   b_daddr != blkp->bi_daddr)
+                       continue;
                /*
                /*
-                * If modify time later than segment create time, see if the
-                * block has been replaced.
+                * If we got to here, then we are keeping the block.  If it
+                * is an indirect block, we want to actually put it in the
+                * buffer cache so that it can be updated in the finish_meta
+                * section.  If it's not, we need to allocate a fake buffer
+                * so that writeseg can perform the copyin and write the buffer.
                 */
                 */
-               if (ip->i_mtime.ts_sec > blkp->bi_segcreate &&
-                   (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr) ||
-                   daddr != blkp->bi_daddr)) {
-                       vput(vp);
-                       continue;
+               if (blkp->bi_lbn >= 0)  /* Data Block */
+                       bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
+                           blkp->bi_bp);
+               else {
+                       bp = getblk(vp, blkp->bi_lbn, bsize);
+                       if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
+                           (error = copyin(blkp->bi_bp, bp->b_un.b_addr,
+                           bsize)))
+                               goto err2;
+                       if (error = VOP_BWRITE(bp))
+                               goto err2;
                }
                }
-
-               /* Get the block (from core or the cleaner) and write it. */
-               bp = getblk(vp, blkp->bi_lbn, bsize);
+               while (lfs_gatherblock(sp, bp, NULL));
+       }
+       if (sp->vp) {
+               lfs_updatemeta(sp);
+               lfs_writeinode(fs, sp, ip);
                vput(vp);
                vput(vp);
-               if (!(bp->b_flags & B_CACHE) &&
-                   (error = copyin(blkp->bi_bp, bp->b_un.b_addr, bsize))) {
-                       brelse(bp);
-                       free(start, M_SEGMENT);
-                       return (error);
+               if (!sp->fip->fi_nblocks) {
+                       DEC_FINFO(sp);
+                       sp->sum_bytes_left += sizeof(FINFO) - sizeof(daddr_t);
                }
                }
-               VOP_BWRITE(bp);
        }
        }
+       (void) lfs_writeseg(fs, sp);
+       lfs_segunlock(fs);
        free(start, M_SEGMENT);
        free(start, M_SEGMENT);
+       free(sp->bpp, M_SEGMENT);
+       free(sp, M_SEGMENT);
+       return (error);
+/*
+ * XXX If we come in to error 2, we might have indirect blocks that were
+ * updated and now have bad block pointers.  I don't know what to do
+ * about this.
+ */
 
 
-       cnt = uap->inocnt;
-       start = malloc(cnt * sizeof(INODE_INFO), M_SEGMENT, M_WAITOK);
-       if (error = copyin(uap->inoiov, start, cnt * sizeof(INODE_INFO))) {
-               free(start, M_SEGMENT);
-               return (error);
-       }
-
-       for (inop = start; cnt--; ++inop) {
-               LFS_IENTRY(ifp, fs, inop->ii_inode, bp);
-               daddr = ifp->if_daddr;
-               brelse(bp);
-               if (daddr != inop->ii_daddr)
-                       continue;
-               /*
-                * XXX
-                * This is grossly inefficient since the cleaner just handed
-                * us a copy of the inode and we're going to have to seek
-                * to get our own.  The fix requires creating a version of
-                * lfs_vget that takes the copy and uses it instead of reading
-                * from disk, if it's not already in the cache.
-                */
-               if (!LFS_VGET(mntp, inop->ii_inode, &vp)) {
-                       VTOI(vp)->i_flag |= IMOD;
-                       vput(vp);
-               }       
-       }
+err2:  vput(vp);
+       /* Free up fakebuffers */
+       for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
+               if ((*bpp)->b_flags & B_CALL) {
+                       brelvp(*bpp);
+                       free(*bpp, M_SEGMENT);
+               } else
+                       brelse(*bpp);
+       lfs_segunlock(fs);
+err1:
+       free(sp->bpp, M_SEGMENT);
+       free(sp, M_SEGMENT);
        free(start, M_SEGMENT);
        free(start, M_SEGMENT);
-       return (lfs_segwrite(mntp, 1));
+       return(error);
 }
 
 /*
 }
 
 /*
@@ -182,8 +241,6 @@ lfs_bmapv(p, uap, retval)
        } *uap;
        int *retval;
 {
        } *uap;
        int *retval;
 {
-       USES_VOP_BMAP;
-       USES_VOP_VGET;
        BLOCK_INFO *blkp;
        struct mount *mntp;
        struct vnode *vp;
        BLOCK_INFO *blkp;
        struct mount *mntp;
        struct vnode *vp;
@@ -191,12 +248,8 @@ lfs_bmapv(p, uap, retval)
        daddr_t daddr;
        int cnt, error, step;
 
        daddr_t daddr;
        int cnt, error, step;
 
-#ifdef VERBOSE
-       printf("lfs_bmapv\n");
-#endif
        if (error = suser(p->p_ucred, &p->p_acflag))
                return (error);
        if (error = suser(p->p_ucred, &p->p_acflag))
                return (error);
-
        if ((mntp = getvfs(&uap->fsid)) == NULL)
                return (EINVAL);
 
        if ((mntp = getvfs(&uap->fsid)) == NULL)
                return (EINVAL);
 
@@ -208,10 +261,12 @@ lfs_bmapv(p, uap, retval)
        }
 
        for (step = cnt; step--; ++blkp) {
        }
 
        for (step = cnt; step--; ++blkp) {
-               if (LFS_VGET(mntp, blkp->bi_inode, &vp))
+               if (blkp->bi_lbn == LFS_UNUSED_LBN)
+                       continue;
+               if (VFS_VGET(mntp, blkp->bi_inode, &vp))
                        daddr = LFS_UNUSED_DADDR;
                else {
                        daddr = LFS_UNUSED_DADDR;
                else {
-                       if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr))
+                       if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
                                daddr = LFS_UNUSED_DADDR;
                        vput(vp);
                }
                                daddr = LFS_UNUSED_DADDR;
                        vput(vp);
                }
@@ -246,27 +301,32 @@ lfs_segclean(p, uap, retval)
        struct lfs *fs;
        int error;
 
        struct lfs *fs;
        int error;
 
-#ifdef VERBOSE
-       printf("lfs_segclean\n");
-#endif
        if (error = suser(p->p_ucred, &p->p_acflag))
                return (error);
        if (error = suser(p->p_ucred, &p->p_acflag))
                return (error);
-
        if ((mntp = getvfs(&uap->fsid)) == NULL)
                return (EINVAL);
 
        fs = VFSTOUFS(mntp)->um_lfs;
 
        if ((mntp = getvfs(&uap->fsid)) == NULL)
                return (EINVAL);
 
        fs = VFSTOUFS(mntp)->um_lfs;
 
+       if (datosn(fs, fs->lfs_curseg) == uap->segment)
+               return (EBUSY);
+
        LFS_SEGENTRY(sup, fs, uap->segment, bp);
        LFS_SEGENTRY(sup, fs, uap->segment, bp);
+       if (sup->su_flags & SEGUSE_ACTIVE) {
+               brelse(bp);
+               return(EBUSY);
+       }
+       fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
+       fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
+           sup->su_ninos * btodb(fs->lfs_bsize);
        sup->su_flags &= ~SEGUSE_DIRTY;
        sup->su_flags &= ~SEGUSE_DIRTY;
-       sup->su_nbytes = 0;
-       LFS_UBWRITE(bp);
+       (void) VOP_BWRITE(bp);
 
        LFS_CLEANERINFO(cip, fs, bp);
        ++cip->clean;
        --cip->dirty;
 
        LFS_CLEANERINFO(cip, fs, bp);
        ++cip->clean;
        --cip->dirty;
-       LFS_UBWRITE(bp);
-
+       (void) VOP_BWRITE(bp);
+       wakeup(&fs->lfs_avail);
        return (0);
 }
 
        return (0);
 }
 
@@ -297,12 +357,9 @@ lfs_segwait(p, uap, retval)
        u_long timeout;
        int error, s;
 
        u_long timeout;
        int error, s;
 
-#ifdef VERBOSE
-       printf("lfs_segwait\n");
-#endif
-       if (error = suser(p->p_ucred, &p->p_acflag))
+       if (error = suser(p->p_ucred, &p->p_acflag)) {
                return (error);
                return (error);
-
+}
 #ifdef WHEN_QUADS_WORK
        if (uap->fsid == (fsid_t)-1)
                addr = &lfs_allclean_wakeup;
 #ifdef WHEN_QUADS_WORK
        if (uap->fsid == (fsid_t)-1)
                addr = &lfs_allclean_wakeup;
@@ -323,8 +380,8 @@ lfs_segwait(p, uap, retval)
                        return (error);
                if (itimerfix(&atv))
                        return (EINVAL);
                        return (error);
                if (itimerfix(&atv))
                        return (EINVAL);
-               s = splhigh();
-               timevaladd(&atv, &time);
+               s = splclock();
+               timevaladd(&atv, (struct timeval *)&time);
                timeout = hzto(&atv);
                splx(s);
        } else
                timeout = hzto(&atv);
                splx(s);
        } else
@@ -333,3 +390,122 @@ lfs_segwait(p, uap, retval)
        error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
        return (error == ERESTART ? EINTR : 0);
 }
        error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
        return (error == ERESTART ? EINTR : 0);
 }
+
+/*
+ * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
+ * daddr from the ifile, so don't look it up again.  If the cleaner is
+ * processing IINFO structures, it may have the ondisk inode already, so
+ * don't go retrieving it again.
+ */
+int
+lfs_fastvget(mp, ino, daddr, vpp, dinp)
+       struct mount *mp;
+       ino_t ino;
+       daddr_t daddr;
+       struct vnode **vpp;
+       struct dinode *dinp;
+{
+       register struct inode *ip;
+       struct vnode *vp;
+       struct ufsmount *ump;
+       struct buf *bp;
+       dev_t dev;
+       int error;
+
+       ump = VFSTOUFS(mp);
+       dev = ump->um_dev;
+       if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
+               ip = VTOI(*vpp);
+               if (!(ip->i_flag & IMOD)) {
+                       ++ump->um_lfs->lfs_uinodes;
+                       ip->i_flag |= IMOD;
+               }
+               ip->i_flag |= IMOD;
+               return (0);
+       }
+
+       /* Allocate new vnode/inode. */
+       if (error = lfs_vcreate(mp, ino, &vp)) {
+               *vpp = NULL;
+               return (error);
+       }
+
+       /*
+        * Put it onto its hash chain and lock it so that other requests for
+        * this inode will block if they arrive while we are sleeping waiting
+        * for old data structures to be purged or for the contents of the
+        * disk portion of this inode to be read.
+        */
+       ip = VTOI(vp);
+       ufs_ihashins(ip);
+
+       /*
+        * XXX
+        * This may not need to be here, logically it should go down with
+        * the i_devvp initialization.
+        * Ask Kirk.
+        */
+       ip->i_lfs = ump->um_lfs;
+
+       /* Read in the disk contents for the inode, copy into the inode. */
+       if (dinp)
+               if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
+                       return (error);
+       else {
+               if (error = bread(ump->um_devvp, daddr,
+                   (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
+                       /*
+                        * The inode does not contain anything useful, so it
+                        * would be misleading to leave it on its hash chain.
+                        * Iput() will return it to the free list.
+                        */
+                       ufs_ihashrem(ip);
+
+                       /* Unlock and discard unneeded inode. */
+                       vput(vp);
+                       brelse(bp);
+                       *vpp = NULL;
+                       return (error);
+               }
+               ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino);
+               brelse(bp);
+       }
+
+       /* Inode was just read from user space or disk, make sure it's locked */
+       ip->i_flag |= ILOCKED;
+
+       /*
+        * Initialize the vnode from the inode, check for aliases.  In all
+        * cases re-init ip, the underlying vnode/inode may have changed.
+        */
+       if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
+               vput(vp);
+               *vpp = NULL;
+               return (error);
+       }
+       /*
+        * Finish inode initialization now that aliasing has been resolved.
+        */
+       ip->i_devvp = ump->um_devvp;
+       ip->i_flag |= IMOD;
+       ++ump->um_lfs->lfs_uinodes;
+       VREF(ip->i_devvp);
+       *vpp = vp;
+       return (0);
+}
+struct buf *
+lfs_fakebuf(vp, lbn, size, uaddr)
+       struct vnode *vp;
+       int lbn;
+       size_t size;
+       caddr_t uaddr;
+{
+       struct buf *bp;
+
+       bp = lfs_newbuf(vp, lbn, 0);
+       bp->b_saveaddr = uaddr;
+       bp->b_bufsize = size;
+       bp->b_bcount = size;
+       bp->b_flags |= B_INVAL;
+       return(bp);
+}