LFS version 2; minor cleanups, nothing important
[unix-history] / usr / src / sys / ufs / lfs / lfs_balloc.c
index 8954954..ca75b9b 100644 (file)
 /*
 /*
- * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
+ * Copyright (c) 1989, 1991 Regents of the University of California.
  * All rights reserved.
  *
  * %sccs.include.redist.c%
  *
  * All rights reserved.
  *
  * %sccs.include.redist.c%
  *
- *     @(#)lfs_balloc.c        7.13 (Berkeley) %G%
+ *     @(#)lfs_balloc.c        7.22 (Berkeley) %G%
  */
 
  */
 
-#include "param.h"
-#include "systm.h"
-#include "buf.h"
-#include "proc.h"
-#include "file.h"
-#include "vnode.h"
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/specdev.h>
+#include <sys/trace.h>
 
 
-#include "quota.h"
-#include "inode.h"
-#include "fs.h"
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+
+#include <ufs/lfs/lfs.h>
+#include <ufs/lfs/lfs_extern.h>
 
 /*
 
 /*
- * Bmap converts a the logical block number of a file
- * to its physical block number on the disk. The conversion
- * is done by using the logical block number to index into
- * the array of block pointers described by the dinode.
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ *
+ * LFS has a different version of bmap from FFS because of a naming conflict.
+ * In FFS, meta blocks are given real disk addresses at allocation time, and
+ * are linked into the device vnode, using a logical block number which is
+ * the same as the physical block number.  This can't be done by LFS because
+ * blocks aren't given disk addresses until they're written, so there's no
+ * way to distinguish the meta-data blocks for one file from any other file.
+ * This means that meta-data blocks have to be on the vnode for the file so
+ * they can be found, and have to have "names" different from the standard
+ * data blocks.  To do this, we divide the name space into positive and
+ * negative block numbers, and give the meta-data blocks negative logical
+ * numbers.  Indirect blocks are addressed by the negative address of the
+ * first data block to which they point.  Double indirect blocks are addressed
+ * by one less than the address of the first indirect block to which they
+ * point.  Triple indirect blocks are addressed by one less than the address
+ * of the first double indirect block to which they point.
  */
  */
-bmap(ip, bn, bnp)
-       register struct inode *ip;
+int
+lfs_bmap(vp, bn, vpp, bnp)
+       struct vnode *vp;
        register daddr_t bn;
        register daddr_t bn;
-       daddr_t *bnp;
+       struct vnode **vpp;
+       daddr_t *bnp;
 {
 {
-       register struct fs *fs;
+       register struct inode *ip;
+       register struct lfs *fs;
        register daddr_t nb;
        struct buf *bp;
        register daddr_t nb;
        struct buf *bp;
-       daddr_t *bap;
-       int i, j, sh;
-       int error;
+       struct vnode *devvp;
+       daddr_t *bap, daddr, metalbn;
+       long realbn;
+       int error, j, off, sh;
 
 
-       if (bn < 0)
-               return (EFBIG);
-       fs = ip->i_fs;
-
-       /*
-        * The first NDADDR blocks are direct blocks
-        */
-       if (bn < NDADDR) {
-               nb = ip->i_db[bn];
-               if (nb == 0) {
-                       *bnp = (daddr_t)-1;
-                       return (0);
-               }
-               *bnp = fsbtodb(fs, nb);
-               return (0);
-       }
-       /*
-        * Determine the number of levels of indirection.
-        */
-       sh = 1;
-       bn -= NDADDR;
-       for (j = NIADDR; j > 0; j--) {
-               sh *= NINDIR(fs);
-               if (bn < sh)
-                       break;
-               bn -= sh;
-       }
-       if (j == 0)
-               return (EFBIG);
        /*
        /*
-        * Fetch through the indirect blocks.
+        * Check for underlying vnode requests and ensure that logical
+        * to physical mapping is requested.
         */
         */
-       nb = ip->i_ib[NIADDR - j];
-       if (nb == 0) {
-               *bnp = (daddr_t)-1;
+       ip = VTOI(vp);
+       if (vpp != NULL)
+               *vpp = ip->i_devvp;
+       if (bnp == NULL)
                return (0);
                return (0);
-       }
-       for (; j <= NIADDR; j++) {
-               if (error = bread(ip->i_devvp, fsbtodb(fs, nb),
-                   (int)fs->fs_bsize, NOCRED, &bp)) {
-                       brelse(bp);
-                       return (error);
-               }
-               bap = bp->b_un.b_daddr;
-               sh /= NINDIR(fs);
-               i = (bn / sh) % NINDIR(fs);
-               nb = bap[i];
-               if (nb == 0) {
-                       *bnp = (daddr_t)-1;
-                       brelse(bp);
-                       return (0);
-               }
-               brelse(bp);
-       }
-       *bnp = fsbtodb(fs, nb);
-       return (0);
-}
 
 
-/*
- * Balloc defines the structure of file system storage
- * by allocating the physical blocks on a device given
- * the inode and the logical block number in a file.
- */
-balloc(ip, bn, size, bpp, flags)
-       register struct inode *ip;
-       register daddr_t bn;
-       int size;
-       struct buf **bpp;
-       int flags;
-{
-       register struct fs *fs;
-       register daddr_t nb;
-       struct buf *bp, *nbp;
-       struct vnode *vp = ITOV(ip);
-       int osize, nsize, i, j, sh, error;
-       daddr_t newb, lbn, *bap, pref, blkpref();
-
-       *bpp = (struct buf *)0;
-       if (bn < 0)
-               return (EFBIG);
-       fs = ip->i_fs;
+#ifdef VERBOSE
+printf("lfs_bmap: block number %d, inode %d\n", bn, ip->i_number);
+#endif
+       realbn = bn;
+       if ((long)bn < 0)
+               bn = -(long)bn;
 
 
-       /*
-        * If the next write will extend the file into a new block,
-        * and the file is currently composed of a fragment
-        * this fragment has to be extended to be a full block.
-        */
-       nb = lblkno(fs, ip->i_size);
-       if (nb < NDADDR && nb < bn) {
-               osize = blksize(fs, ip, nb);
-               if (osize < fs->fs_bsize && osize > 0) {
-                       error = realloccg(ip, nb,
-                               blkpref(ip, nb, (int)nb, &ip->i_db[0]),
-                               osize, (int)fs->fs_bsize, &bp);
-                       if (error)
-                               return (error);
-                       ip->i_size = (nb + 1) * fs->fs_bsize;
-                       vnode_pager_setsize(ITOV(ip), (u_long)ip->i_size);
-                       ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
-                       ip->i_flag |= IUPD|ICHG;
-                       if (flags & B_SYNC)
-                               bwrite(bp);
-                       else
-                               bawrite(bp);
-               }
-       }
-       /*
-        * The first NDADDR blocks are direct blocks
-        */
+       /* The first NDADDR blocks are direct blocks. */
        if (bn < NDADDR) {
                nb = ip->i_db[bn];
        if (bn < NDADDR) {
                nb = ip->i_db[bn];
-               if (nb != 0 && ip->i_size >= (bn + 1) * fs->fs_bsize) {
-                       error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp);
-                       if (error) {
-                               brelse(bp);
-                               return (error);
-                       }
-                       *bpp = bp;
+               if (nb == 0) {
+                       *bnp = UNASSIGNED;
                        return (0);
                }
                        return (0);
                }
-               if (nb != 0) {
-                       /*
-                        * Consider need to reallocate a fragment.
-                        */
-                       osize = fragroundup(fs, blkoff(fs, ip->i_size));
-                       nsize = fragroundup(fs, size);
-                       if (nsize <= osize) {
-                               error = bread(vp, bn, osize, NOCRED, &bp);
-                               if (error) {
-                                       brelse(bp);
-                                       return (error);
-                               }
-                       } else {
-                               error = realloccg(ip, bn,
-                                       blkpref(ip, bn, (int)bn, &ip->i_db[0]),
-                                       osize, nsize, &bp);
-                               if (error)
-                                       return (error);
-                       }
-               } else {
-                       if (ip->i_size < (bn + 1) * fs->fs_bsize)
-                               nsize = fragroundup(fs, size);
-                       else
-                               nsize = fs->fs_bsize;
-                       error = alloc(ip, bn,
-                               blkpref(ip, bn, (int)bn, &ip->i_db[0]),
-                               nsize, &newb);
-                       if (error)
-                               return (error);
-                       bp = getblk(vp, bn, nsize);
-                       bp->b_blkno = fsbtodb(fs, newb);
-                       if (flags & B_CLRBUF)
-                               clrbuf(bp);
-               }
-               ip->i_db[bn] = dbtofsb(fs, bp->b_blkno);
-               ip->i_flag |= IUPD|ICHG;
-               *bpp = bp;
+               *bnp = nb;
                return (0);
        }
                return (0);
        }
-       /*
-        * Determine the number of levels of indirection.
+
+       /* 
+        * Determine the number of levels of indirection.  After this loop
+        * is done, sh indicates the number of data blocks possible at the
+        * given level of indirection, and NIADDR - j is the number of levels
+        * of indirection needed to locate the requested block.
         */
         */
-       pref = 0;
-       sh = 1;
-       lbn = bn;
        bn -= NDADDR;
        bn -= NDADDR;
+       fs = ip->i_lfs;
+       sh = 1;
        for (j = NIADDR; j > 0; j--) {
                sh *= NINDIR(fs);
                if (bn < sh)
        for (j = NIADDR; j > 0; j--) {
                sh *= NINDIR(fs);
                if (bn < sh)
@@ -210,125 +105,72 @@ balloc(ip, bn, size, bpp, flags)
        }
        if (j == 0)
                return (EFBIG);
        }
        if (j == 0)
                return (EFBIG);
-       /*
-        * Fetch the first indirect block allocating if necessary.
+
+       /* Calculate the address of the first meta-block. */
+       if (realbn >= 0)
+               metalbn = -(realbn - bn + NIADDR - j);
+       else
+               metalbn = -(-realbn - bn + NIADDR - j);
+
+       /* 
+        * Fetch through the indirect blocks.  At each iteration, off is the
+        * offset into the bap array which is an array of disk addresses at
+        * the current level of indirection.
         */
         */
-       nb = ip->i_ib[NIADDR - j];
-       if (nb == 0) {
-               pref = blkpref(ip, lbn, 0, (daddr_t *)0);
-               if (error = alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb))
-                       return (error);
-               nb = newb;
-               bp = getblk(ip->i_devvp, fsbtodb(fs, nb), fs->fs_bsize);
-               clrbuf(bp);
+       bp = NULL;
+       devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+       for (off = NIADDR - j, bap = ip->i_ib; j <= NIADDR; j++) {
                /*
                /*
-                * Write synchronously so that indirect blocks
-                * never point at garbage.
+                * In LFS, it's possible to have a block appended to a file
+                * for which the meta-blocks have not yet been allocated.
+                * This is a win if the file never gets written or if the
+                * file's growing.
                 */
                 */
-               if (error = bwrite(bp)) {
-                       blkfree(ip, nb, fs->fs_bsize);
-                       return (error);
-               }
-               ip->i_ib[NIADDR - j] = nb;
-               ip->i_flag |= IUPD|ICHG;
-       }
-       /*
-        * Fetch through the indirect blocks, allocating as necessary.
-        */
-       for (; ; j++) {
-               error = bread(ip->i_devvp, fsbtodb(fs, nb),
-                   (int)fs->fs_bsize, NOCRED, &bp);
-               if (error) {
-                       brelse(bp);
-                       return (error);
-               }
-               bap = bp->b_un.b_daddr;
-               sh /= NINDIR(fs);
-               i = (bn / sh) % NINDIR(fs);
-               nb = bap[i];
-               if (j == NIADDR)
+               if ((daddr = bap[off]) == 0) {
+                       daddr = UNASSIGNED;
                        break;
                        break;
-               if (nb != 0) {
-                       brelse(bp);
-                       continue;
-               }
-               if (pref == 0)
-                       pref = blkpref(ip, lbn, 0, (daddr_t *)0);
-               if (error = alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb)) {
-                       brelse(bp);
-                       return (error);
                }
                }
-               nb = newb;
-               nbp = getblk(ip->i_devvp, fsbtodb(fs, nb), fs->fs_bsize);
-               clrbuf(nbp);
+
+               /* If searching for a meta-data block, quit when found. */
+               if (metalbn == realbn)
+                       break;
+
                /*
                /*
-                * Write synchronously so that indirect blocks
-                * never point at garbage.
+                * Read in the appropriate indirect block.  LFS can't do a
+                * bread because bread knows that FFS will hand it the device
+                * vnode, not the file vnode, so the b_dev and b_blkno would
+                * be wrong.
+                *
+                * XXX
+                * This REALLY needs to be fixed, at the very least it needs
+                * to be rethought when the buffer cache goes away.
                 */
                 */
-               if (error = bwrite(nbp)) {
-                       blkfree(ip, nb, fs->fs_bsize);
+               if (bp)
                        brelse(bp);
                        brelse(bp);
-                       return (error);
-               }
-               bap[i] = nb;
-               /*
-                * If required, write synchronously, otherwise use
-                * delayed write. If this is the first instance of
-                * the delayed write, reassociate the buffer with the
-                * file so it will be written if the file is sync'ed.
-                */
-               if (flags & B_SYNC) {
-                       bwrite(bp);
-               } else if (bp->b_flags & B_DELWRI) {
-                       bdwrite(bp);
-               } else {
-                       bdwrite(bp);
-                       reassignbuf(bp, vp);
-               }
-       }
-       /*
-        * Get the data block, allocating if necessary.
-        */
-       if (nb == 0) {
-               pref = blkpref(ip, lbn, i, &bap[0]);
-               if (error = alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb)) {
-                       brelse(bp);
-                       return (error);
-               }
-               nb = newb;
-               nbp = getblk(vp, lbn, fs->fs_bsize);
-               nbp->b_blkno = fsbtodb(fs, nb);
-               if (flags & B_CLRBUF)
-                       clrbuf(nbp);
-               bap[i] = nb;
-               /*
-                * If required, write synchronously, otherwise use
-                * delayed write. If this is the first instance of
-                * the delayed write, reassociate the buffer with the
-                * file so it will be written if the file is sync'ed.
-                */
-               if (flags & B_SYNC) {
-                       bwrite(bp);
-               } else if (bp->b_flags & B_DELWRI) {
-                       bdwrite(bp);
+               bp = getblk(vp, metalbn, fs->lfs_bsize);
+               if (bp->b_flags & (B_DONE | B_DELWRI)) {
+                       trace(TR_BREADHIT, pack(vp, size), metalbn);
                } else {
                } else {
-                       bdwrite(bp);
-                       reassignbuf(bp, vp);
-               }
-               *bpp = nbp;
-               return (0);
-       }
-       brelse(bp);
-       if (flags & B_CLRBUF) {
-               error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
-               if (error) {
-                       brelse(nbp);
-                       return (error);
+                       trace(TR_BREADMISS, pack(vp, size), metalbn);
+                       bp->b_blkno = daddr;
+                       bp->b_flags |= B_READ;
+                       bp->b_dev = devvp->v_rdev;
+                       (devvp->v_op->vop_strategy)(bp);
+                       curproc->p_stats->p_ru.ru_inblock++;    /* XXX */
+                       if (error = biowait(bp)) {
+                               brelse(bp);
+                               return (error);
+                       }
                }
                }
-       } else {
-               nbp = getblk(vp, lbn, fs->fs_bsize);
-               nbp->b_blkno = fsbtodb(fs, nb);
+
+               bap = bp->b_un.b_daddr;
+               sh /= NINDIR(fs);
+               off = (bn / sh) % NINDIR(fs);
+               metalbn -= -1 + off * sh;
        }
        }
-       *bpp = nbp;
+       if (bp)
+               brelse(bp);
+
+       *bnp = daddr;
        return (0);
 }
        return (0);
 }