usr/src/sys/ufs/ffs/ffs_inode.c

/*
 * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
 * All rights reserved.
 *
 * %sccs.include.redist.c%
 *
 *      @(#)ffs_inode.c 7.54 (Berkeley) %G%
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/kernel.h>
#include <sys/malloc.h>

#include <vm/vm.h>

#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, int, long *));

extern u_long nextgennumber;

int
ffs_init()
{
        return (ufs_init());
}

/*
 * Look up a UFS dinode number to find its incore vnode.
 * If it is not in core, read it in from the specified device.
 * If it is in core, wait for the lock bit to clear, then
 * return the inode locked. Detection and handling of mount
 * points must be done by the calling routine.
 */
ffs_vget (ap)
        struct vop_vget_args *ap;
{
        register struct fs *fs;
        register struct inode *ip;
        struct ufsmount *ump;
        struct buf *bp;
        struct dinode *dp;
        struct vnode *vp;
        union ihead *ih;
        dev_t dev;
        int i, type, error;

        ump = VFSTOUFS(ap->a_mp);
        dev = ump->um_dev;
        if ((*ap->a_vpp = ufs_ihashget(dev, ap->a_ino)) != NULL)
                return (0);

        /* Allocate a new vnode/inode. */
        if (error = getnewvnode(VT_UFS, ap->a_mp, ffs_vnodeop_p, &vp)) {
                *ap->a_vpp = NULL;
                return (error);
        }
        type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
        MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
        vp->v_data = ip;
        ip->i_vnode = vp;
        ip->i_flag = 0;
        ip->i_devvp = 0;
        ip->i_mode = 0;
        ip->i_diroff = 0;
        ip->i_lockf = 0;
        ip->i_fs = fs = ump->um_fs;
        ip->i_dev = dev;
        ip->i_number = ap->a_ino;
#ifdef QUOTA
        for (i = 0; i < MAXQUOTAS; i++)
                ip->i_dquot[i] = NODQUOT;
#endif
        /*
         * Put it onto its hash chain and lock it so that other requests for
         * this inode will block if they arrive while we are sleeping waiting
         * for old data structures to be purged or for the contents of the
         * disk portion of this inode to be read.
         */
        ufs_ihashins(ip);

        /* Read in the disk contents for the inode, copy into the inode. */
        if (error = bread(ump->um_devvp, fsbtodb(fs, itod(fs, ap->a_ino)),
            (int)fs->fs_bsize, NOCRED, &bp)) {
                /*
                 * The inode does not contain anything useful, so it would
                 * be misleading to leave it on its hash chain. It will be
                 * returned to the free list by ufs_iput().
                 */
                remque(ip);
                ip->i_forw = ip;
                ip->i_back = ip;

                /* Unlock and discard unneeded inode. */
                ufs_iput(ip);
                brelse(bp);
                *ap->a_vpp = NULL;
                return (error);
        }
        dp = bp->b_un.b_dino;
        dp += itoo(fs, ap->a_ino);
        ip->i_din = *dp;
        brelse(bp);

        /*
         * Initialize the vnode from the inode, check for aliases.
         * Note that the underlying vnode may have changed.
         */
        if (error = ufs_vinit(ap->a_mp, ffs_specop_p, FFS_FIFOOPS, &vp)) {
                ufs_iput(ip);
                *ap->a_vpp = NULL;
                return (error);
        }
        /*
         * Finish inode initialization now that aliasing has been resolved.
         */
        ip->i_devvp = ump->um_devvp;
        VREF(ip->i_devvp);
        /*
         * Set up a generation number for this inode if it does not
         * already have one. This should only happen on old filesystems.
         */
        if (ip->i_gen == 0) {
                if (++nextgennumber < (u_long)time.tv_sec)
                        nextgennumber = time.tv_sec;
                ip->i_gen = nextgennumber;
                if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
                        ip->i_flag |= IMOD;
        }
        /*
         * Ensure that uid and gid are correct. This is a temporary
         * fix until fsck has been changed to do the update.
         */
        if (fs->fs_inodefmt < FS_44INODEFMT) {          /* XXX */
                ip->i_uid = ip->i_din.di_ouid;          /* XXX */
                ip->i_gid = ip->i_din.di_ogid;          /* XXX */
        }                                               /* XXX */

        *ap->a_vpp = vp;
        return (0);
}

/*
 * Update the access, modified, and inode change times as specified
 * by the IACC, IUPD, and ICHG flags respectively. The IMOD flag
 * is used to specify that the inode needs to be updated but that
 * the times have already been set. The access and modified times
 * are taken from the second and third parameters; the inode change
 * time is always taken from the current time. If waitfor is set,
 * then wait for the disk write of the inode to complete.
 */
int
ffs_update (ap)
        struct vop_update_args *ap;
{
        struct buf *bp;
        struct inode *ip;
        struct dinode *dp;
        register struct fs *fs;

        if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
                return (0);
        ip = VTOI(ap->a_vp);
        if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
                return (0);
        if (ip->i_flag&IACC)
                ip->i_atime.tv_sec = ap->a_ta->tv_sec;
        if (ip->i_flag&IUPD) {
                ip->i_mtime.tv_sec = ap->a_tm->tv_sec;
                INCRQUAD(ip->i_modrev);
        }
        if (ip->i_flag&ICHG)
                ip->i_ctime.tv_sec = time.tv_sec;
        ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
        fs = ip->i_fs;
        /*
         * Ensure that uid and gid are correct. This is a temporary
         * fix until fsck has been changed to do the update.
         */
        if (fs->fs_inodefmt < FS_44INODEFMT) {          /* XXX */
                ip->i_din.di_ouid = ip->i_uid;          /* XXX */
                ip->i_din.di_ogid = ip->i_gid;          /* XXX */
        }                                               /* XXX */
        if (error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
                (int)fs->fs_bsize, NOCRED, &bp)) {
                brelse(bp);
                return (error);
        }
        dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
        *dp = ip->i_din;
        if (ap->a_waitfor)
                return (bwrite(bp));
        else {
                bdwrite(bp);
                return (0);
        }
}

#define SINGLE  0       /* index of single indirect block */
#define DOUBLE  1       /* index of double indirect block */
#define TRIPLE  2       /* index of triple indirect block */
/*
 * Truncate the inode ip to at most length size.  Free affected disk
 * blocks -- the blocks of the file are removed in reverse order.
 *
 * NB: triple indirect blocks are untested.
 */
ffs_truncate (ap)
        struct vop_truncate_args *ap;
{
        USES_VOP_UPDATE;
        register struct vnode *ovp = ap->a_vp;
        register daddr_t lastblock;
        register struct inode *oip;
        daddr_t bn, lbn, lastiblock[NIADDR];
        register struct fs *fs;
        register struct inode *ip;
        struct buf *bp;
        int offset, size, level;
        long count, nblocks, blocksreleased = 0;
        register int i;
        int aflags, error, allerror;
        struct inode tip;
        off_t osize;

        vnode_pager_setsize(ovp, (u_long)ap->a_length);
        oip = VTOI(ovp);
        if (oip->i_size <= ap->a_length) {
                oip->i_flag |= ICHG|IUPD;
                error = VOP_UPDATE(ovp, &time, &time, 1);
                return (error);
        }
        /*
         * Calculate index into inode's block list of
         * last direct and indirect blocks (if any)
         * which we want to keep.  Lastblock is -1 when
         * the file is truncated to 0.
         */
        fs = oip->i_fs;
        lastblock = lblkno(fs, ap->a_length + fs->fs_bsize - 1) - 1;
        lastiblock[SINGLE] = lastblock - NDADDR;
        lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
        lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
        nblocks = btodb(fs->fs_bsize);
        /*
         * Update the size of the file. If the file is not being
         * truncated to a block boundry, the contents of the
         * partial block following the end of the file must be
         * zero'ed in case it ever become accessable again because
         * of subsequent file growth.
         */
        osize = oip->i_size;
        offset = blkoff(fs, ap->a_length);
        if (offset == 0) {
                oip->i_size = ap->a_length;
        } else {
                lbn = lblkno(fs, ap->a_length);
                aflags = B_CLRBUF;
                if (ap->a_flags & IO_SYNC)
                        aflags |= B_SYNC;
#ifdef QUOTA
                if (error = getinoquota(oip))
                        return (error);
#endif
                if (error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp, aflags))
                        return (error);
                oip->i_size = ap->a_length;
                size = blksize(fs, oip, lbn);
                (void) vnode_pager_uncache(ovp);
                bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
                allocbuf(bp, size);
                if (ap->a_flags & IO_SYNC)
                        bwrite(bp);
                else
                        bdwrite(bp);
        }
        /*
         * Update file and block pointers on disk before we start freeing
         * blocks.  If we crash before free'ing blocks below, the blocks
         * will be returned to the free list.  lastiblock values are also
         * normalized to -1 for calls to ffs_indirtrunc below.
         */
        tip = *oip;
        tip.i_size = osize;
        for (level = TRIPLE; level >= SINGLE; level--)
                if (lastiblock[level] < 0) {
                        oip->i_ib[level] = 0;
                        lastiblock[level] = -1;
                }
        for (i = NDADDR - 1; i > lastblock; i--)
                oip->i_db[i] = 0;
        oip->i_flag |= ICHG|IUPD;
        vinvalbuf(ovp, (ap->a_length > 0));
        allerror = VOP_UPDATE(ovp, &time, &time, MNT_WAIT);

        /*
         * Indirect blocks first.
         */
        ip = &tip;
        for (level = TRIPLE; level >= SINGLE; level--) {
                bn = ip->i_ib[level];
                if (bn != 0) {
                        error = ffs_indirtrunc(ip,
                            bn, lastiblock[level], level, &count);
                        if (error)
                                allerror = error;
                        blocksreleased += count;
                        if (lastiblock[level] < 0) {
                                ip->i_ib[level] = 0;
                                ffs_blkfree(ip, bn, fs->fs_bsize);
                                blocksreleased += nblocks;
                        }
                }
                if (lastiblock[level] >= 0)
                        goto done;
        }

        /*
         * All whole direct blocks or frags.
         */
        for (i = NDADDR - 1; i > lastblock; i--) {
                register long bsize;

                bn = ip->i_db[i];
                if (bn == 0)
                        continue;
                ip->i_db[i] = 0;
                bsize = blksize(fs, ip, i);
                ffs_blkfree(ip, bn, bsize);
                blocksreleased += btodb(bsize);
        }
        if (lastblock < 0)
                goto done;

        /*
         * Finally, look for a change in size of the
         * last direct block; release any frags.
         */
        bn = ip->i_db[lastblock];
        if (bn != 0) {
                long oldspace, newspace;

                /*
                 * Calculate amount of space we're giving
                 * back as old block size minus new block size.
                 */
                oldspace = blksize(fs, ip, lastblock);
                ip->i_size = ap->a_length;
                newspace = blksize(fs, ip, lastblock);
                if (newspace == 0)
                        panic("itrunc: newspace");
                if (oldspace - newspace > 0) {
                        /*
                         * Block number of space to be free'd is
                         * the old block # plus the number of frags
                         * required for the storage we're keeping.
                         */
                        bn += numfrags(fs, newspace);
                        ffs_blkfree(ip, bn, oldspace - newspace);
                        blocksreleased += btodb(oldspace - newspace);
                }
        }
done:
/* BEGIN PARANOIA */
        for (level = SINGLE; level <= TRIPLE; level++)
                if (ip->i_ib[level] != oip->i_ib[level])
                        panic("itrunc1");
        for (i = 0; i < NDADDR; i++)
                if (ip->i_db[i] != oip->i_db[i])
                        panic("itrunc2");
/* END PARANOIA */
        oip->i_blocks -= blocksreleased;
        if (oip->i_blocks < 0)                  /* sanity */
                oip->i_blocks = 0;
        oip->i_flag |= ICHG;
#ifdef QUOTA
        if (!getinoquota(oip))
                (void) chkdq(oip, -blocksreleased, NOCRED, 0);
#endif
        return (allerror);
}

/*
 * Release blocks associated with the inode ip and stored in the indirect
 * block bn.  Blocks are free'd in LIFO order up to (but not including)
 * lastbn.  If level is greater than SINGLE, the block is an indirect block
 * and recursive calls to indirtrunc must be used to cleanse other indirect
 * blocks.
 *
 * NB: triple indirect blocks are untested.
 */
static int
ffs_indirtrunc(ip, bn, lastbn, level, countp)
        register struct inode *ip;
        daddr_t bn, lastbn;
        int level;
        long *countp;
{
        register int i;
        struct buf *bp;
        register struct fs *fs = ip->i_fs;
        register daddr_t *bap;
        daddr_t *copy, nb, last;
        long blkcount, factor;
        int nblocks, blocksreleased = 0;
        int error, allerror = 0;

        /*
         * Calculate index in current block of last
         * block to be kept.  -1 indicates the entire
         * block so we need not calculate the index.
         */
        factor = 1;
        for (i = SINGLE; i < level; i++)
                factor *= NINDIR(fs);
        last = lastbn;
        if (lastbn > 0)
                last /= factor;
        nblocks = btodb(fs->fs_bsize);
        /*
         * Get buffer of block pointers, zero those
         * entries corresponding to blocks to be free'd,
         * and update on disk copy first.
         */
#ifdef SECSIZE
        bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize,
            fs->fs_dbsize);
#else SECSIZE
        error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize,
                NOCRED, &bp);
        if (error) {
                brelse(bp);
                *countp = 0;
                return (error);
        }
        bap = bp->b_un.b_daddr;
        MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
        bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
        bzero((caddr_t)&bap[last + 1],
          (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
        if (last == -1)
                bp->b_flags |= B_INVAL;
        error = bwrite(bp);
        if (error)
                allerror = error;
        bap = copy;

        /*
         * Recursively free totally unused blocks.
         */
        for (i = NINDIR(fs) - 1; i > last; i--) {
                nb = bap[i];
                if (nb == 0)
                        continue;
                if (level > SINGLE) {
                        if (error = ffs_indirtrunc(ip,
                            nb, (daddr_t)-1, level - 1, &blkcount))
                                allerror = error;
                        blocksreleased += blkcount;
                }
                ffs_blkfree(ip, nb, fs->fs_bsize);
                blocksreleased += nblocks;
        }

        /*
         * Recursively free last partial block.
         */
        if (level > SINGLE && lastbn >= 0) {
                last = lastbn % factor;
                nb = bap[i];
                if (nb != 0) {
                        if (error =
                            ffs_indirtrunc(ip, nb, last, level - 1, &blkcount))
                                allerror = error;
                        blocksreleased += blkcount;
                }
        }
        FREE(copy, M_TEMP);
        *countp = blocksreleased;
        return (allerror);
}