debugging, print out information on processes holding and waiting for lock
[unix-history] / usr / src / sys / ufs / lfs / lfs_vnops.c
index c2dd828..2e07812 100644 (file)
-/*     lfs_vnops.c     4.31    82/07/25        */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/dir.h"
-#include "../h/user.h"
-#include "../h/file.h"
-#include "../h/stat.h"
-#include "../h/inode.h"
-#include "../h/fs.h"
-#include "../h/buf.h"
-#include "../h/proc.h"
-#include "../h/inline.h"
-#ifdef EFS
-#include "../net/in.h"
-#include "../h/efs.h"
-#endif
-#include "../h/quota.h"
-#include "../h/descrip.h"
+/*
+ * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *     @(#)lfs_vnops.c 7.28 (Berkeley) %G%
+ */
 
 
-chdir()
-{
+#include "param.h"
+#include "systm.h"
+#include "user.h"
+#include "kernel.h"
+#include "file.h"
+#include "stat.h"
+#include "buf.h"
+#include "proc.h"
+#include "uio.h"
+#include "socket.h"
+#include "socketvar.h"
+#include "conf.h"
+#include "mount.h"
+#include "vnode.h"
+#include "../ufs/inode.h"
+#include "../ufs/fs.h"
+#include "../ufs/quota.h"
 
 
-       chdirec(&u.u_cdir);
-}
+/*
+ * Global vfs data structures for ufs
+ */
+
+int    ufs_lookup(),
+       ufs_create(),
+       ufs_mknod(),
+       ufs_open(),
+       ufs_close(),
+       ufs_access(),
+       ufs_getattr(),
+       ufs_setattr(),
+       ufs_read(),
+       ufs_write(),
+       ufs_ioctl(),
+       ufs_select(),
+       ufs_mmap(),
+       ufs_fsync(),
+       ufs_seek(),
+       ufs_remove(),
+       ufs_link(),
+       ufs_rename(),
+       ufs_mkdir(),
+       ufs_rmdir(),
+       ufs_symlink(),
+       ufs_readdir(),
+       ufs_readlink(),
+       ufs_abortop(),
+       ufs_inactive(),
+       ufs_reclaim(),
+       ufs_lock(),
+       ufs_unlock(),
+       ufs_bmap(),
+       ufs_strategy(),
+       ufs_print();
+
+struct vnodeops ufs_vnodeops = {
+       ufs_lookup,             /* lookup */
+       ufs_create,             /* create */
+       ufs_mknod,              /* mknod */
+       ufs_open,               /* open */
+       ufs_close,              /* close */
+       ufs_access,             /* access */
+       ufs_getattr,            /* getattr */
+       ufs_setattr,            /* setattr */
+       ufs_read,               /* read */
+       ufs_write,              /* write */
+       ufs_ioctl,              /* ioctl */
+       ufs_select,             /* select */
+       ufs_mmap,               /* mmap */
+       ufs_fsync,              /* fsync */
+       ufs_seek,               /* seek */
+       ufs_remove,             /* remove */
+       ufs_link,               /* link */
+       ufs_rename,             /* rename */
+       ufs_mkdir,              /* mkdir */
+       ufs_rmdir,              /* rmdir */
+       ufs_symlink,            /* symlink */
+       ufs_readdir,            /* readdir */
+       ufs_readlink,           /* readlink */
+       ufs_abortop,            /* abortop */
+       ufs_inactive,           /* inactive */
+       ufs_reclaim,            /* reclaim */
+       ufs_lock,               /* lock */
+       ufs_unlock,             /* unlock */
+       ufs_bmap,               /* bmap */
+       ufs_strategy,           /* strategy */
+       ufs_print,              /* print */
+};
+
+int    spec_lookup(),
+       spec_open(),
+       ufsspec_read(),
+       ufsspec_write(),
+       spec_strategy(),
+       spec_bmap(),
+       spec_ioctl(),
+       spec_select(),
+       ufsspec_close(),
+       spec_badop(),
+       spec_nullop();
+
+struct vnodeops spec_inodeops = {
+       spec_lookup,            /* lookup */
+       spec_badop,             /* create */
+       spec_badop,             /* mknod */
+       spec_open,              /* open */
+       ufsspec_close,          /* close */
+       ufs_access,             /* access */
+       ufs_getattr,            /* getattr */
+       ufs_setattr,            /* setattr */
+       ufsspec_read,           /* read */
+       ufsspec_write,          /* write */
+       spec_ioctl,             /* ioctl */
+       spec_select,            /* select */
+       spec_badop,             /* mmap */
+       spec_nullop,            /* fsync */
+       spec_badop,             /* seek */
+       spec_badop,             /* remove */
+       spec_badop,             /* link */
+       spec_badop,             /* rename */
+       spec_badop,             /* mkdir */
+       spec_badop,             /* rmdir */
+       spec_badop,             /* symlink */
+       spec_badop,             /* readdir */
+       spec_badop,             /* readlink */
+       spec_badop,             /* abortop */
+       ufs_inactive,           /* inactive */
+       ufs_reclaim,            /* reclaim */
+       ufs_lock,               /* lock */
+       ufs_unlock,             /* unlock */
+       spec_bmap,              /* bmap */
+       spec_strategy,          /* strategy */
+       ufs_print,              /* print */
+};
+
+enum vtype iftovt_tab[8] = {
+       VNON, VCHR, VDIR, VBLK, VREG, VLNK, VSOCK, VBAD,
+};
+int    vttoif_tab[8] = {
+       0, IFREG, IFDIR, IFBLK, IFCHR, IFLNK, IFSOCK, IFMT,
+};
 
 
-chroot()
+/*
+ * Create a regular file
+ */
+ufs_create(ndp, vap)
+       struct nameidata *ndp;
+       struct vattr *vap;
 {
 {
+       struct inode *ip;
+       int error;
 
 
-       if (suser())
-               chdirec(&u.u_rdir);
+       if (error = maknode(MAKEIMODE(vap->va_type, vap->va_mode), ndp, &ip))
+               return (error);
+       ndp->ni_vp = ITOV(ip);
+       return (0);
 }
 
 }
 
-chdirec(ipp)
-register struct inode **ipp;
+/*
+ * Mknod vnode call
+ */
+/* ARGSUSED */
+ufs_mknod(ndp, vap, cred)
+       struct nameidata *ndp;
+       struct ucred *cred;
+       struct vattr *vap;
 {
 {
-       register struct inode *ip;
-       struct a {
-               char    *fname;
-       };
+       register struct vnode *vp;
+       struct inode *ip;
+       int error;
 
 
-       ip = namei(uchar, 0, 1);
-       if(ip == NULL)
-               return;
-       if((ip->i_mode&IFMT) != IFDIR) {
-               u.u_error = ENOTDIR;
-               goto bad;
+       if (error = maknode(MAKEIMODE(vap->va_type, vap->va_mode), ndp, &ip))
+               return (error);
+       vp = ITOV(ip);
+       if (vap->va_rdev) {
+               /*
+                * Want to be able to use this to make badblock
+                * inodes, so don't truncate the dev number.
+                */
+               ip->i_rdev = vap->va_rdev;
+               ip->i_flag |= IACC|IUPD|ICHG;
        }
        }
-       if(access(ip, IEXEC))
-               goto bad;
-       iunlock(ip);
-       if (*ipp)
-               irele(*ipp);
-       *ipp = ip;
-       return;
-
-bad:
+       /*
+        * Remove inode so that it will be reloaded by iget and
+        * checked to see if it is an alias of an existing entry
+        * in the inode cache.
+        */
        iput(ip);
        iput(ip);
+       vp->v_type = VNON;
+       vgone(vp);
+       return (0);
 }
 
 /*
 }
 
 /*
- * Open system call.
+ * Open called.
+ *
+ * Nothing to do.
  */
  */
-open()
+/* ARGSUSED */
+ufs_open(vp, mode, cred)
+       struct vnode *vp;
+       int mode;
+       struct ucred *cred;
 {
 {
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               int     rwmode;
-       } *uap;
-
-       uap = (struct a *)u.u_ap;
-       ip = namei(uchar, 0, 1);
-       if (ip == NULL)
-               return;
-       open1(ip, ++uap->rwmode, 0);
+
+       return (0);
 }
 
 /*
 }
 
 /*
- * Creat system call.
+ * Close called
+ *
+ * Update the times on the inode.
  */
  */
-ocreat()
+/* ARGSUSED */
+ufs_close(vp, fflag, cred)
+       struct vnode *vp;
+       int fflag;
+       struct ucred *cred;
 {
 {
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               int     fmode;
-       } *uap;
-
-       uap = (struct a *)u.u_ap;
-       ip = namei(uchar, 1, 1);
-       if (ip == NULL) {
-               if (u.u_error)
-                       return;
-               ip = maknode(uap->fmode&07777&(~ISVTX));
-               if (ip==NULL)
-                       return;
-               open1(ip, FWRITE, 2);
-       } else
-               open1(ip, FWRITE, 1);
+       register struct inode *ip = VTOI(vp);
+
+       if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED))
+               ITIMES(ip, &time, &time);
+       return (0);
+}
+
+ufs_access(vp, mode, cred)
+       struct vnode *vp;
+       int mode;
+       struct ucred *cred;
+{
+
+       return (iaccess(VTOI(vp), mode, cred));
+}
+
+/* ARGSUSED */
+ufs_getattr(vp, vap, cred)
+       struct vnode *vp;
+       register struct vattr *vap;
+       struct ucred *cred;
+{
+       register struct inode *ip = VTOI(vp);
+
+       ITIMES(ip, &time, &time);
+       /*
+        * Copy from inode table
+        */
+       vap->va_fsid = ip->i_dev;
+       vap->va_fileid = ip->i_number;
+       vap->va_mode = ip->i_mode & ~IFMT;
+       vap->va_nlink = ip->i_nlink;
+       vap->va_uid = ip->i_uid;
+       vap->va_gid = ip->i_gid;
+       vap->va_rdev = (dev_t)ip->i_rdev;
+       vap->va_size = ip->i_din.di_qsize.val[0];
+       vap->va_size1 = ip->i_din.di_qsize.val[1];
+       vap->va_atime.tv_sec = ip->i_atime;
+       vap->va_atime.tv_usec = 0;
+       vap->va_mtime.tv_sec = ip->i_mtime;
+       vap->va_mtime.tv_usec = 0;
+       vap->va_ctime.tv_sec = ip->i_ctime;
+       vap->va_ctime.tv_usec = 0;
+       vap->va_flags = ip->i_flags;
+       vap->va_gen = ip->i_gen;
+       /* this doesn't belong here */
+       if (vp->v_type == VBLK)
+               vap->va_blocksize = BLKDEV_IOSIZE;
+       else if (vp->v_type == VCHR)
+               vap->va_blocksize = MAXBSIZE;
+       else
+               vap->va_blocksize = ip->i_fs->fs_bsize;
+       vap->va_bytes = dbtob(ip->i_blocks);
+       vap->va_bytes1 = -1;
+       vap->va_type = vp->v_type;
+       return (0);
 }
 
 /*
 }
 
 /*
- * Common code for open and creat.
- * Check permissions, allocate an open file structure,
- * and call the device open routine if any.
+ * Set attribute vnode op. called from several syscalls
  */
  */
-open1(ip, mode, trf)
-       register struct inode *ip;
-       register mode;
-{
-       register struct file *fp;
-       int i;
-
-       if (trf != 2) {
-               if (mode&FREAD)
-                       (void) access(ip, IREAD);
-               if (mode&FWRITE) {
-                       (void) access(ip, IWRITE);
-                       if ((ip->i_mode&IFMT) == IFDIR)
-                               u.u_error = EISDIR;
+ufs_setattr(vp, vap, cred)
+       register struct vnode *vp;
+       register struct vattr *vap;
+       register struct ucred *cred;
+{
+       register struct inode *ip = VTOI(vp);
+       int error = 0;
+
+       /*
+        * Check for unsetable attributes.
+        */
+       if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+           (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+           (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+           ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+               return (EINVAL);
+       }
+       /*
+        * Go through the fields and update iff not VNOVAL.
+        */
+       if (vap->va_uid != (u_short)VNOVAL || vap->va_gid != (u_short)VNOVAL)
+               if (error = chown1(vp, vap->va_uid, vap->va_gid, cred))
+                       return (error);
+       if (vap->va_size != VNOVAL) {
+               if (vp->v_type == VDIR)
+                       return (EISDIR);
+               if (error = itrunc(ip, vap->va_size, 0)) /* XXX IO_SYNC? */
+                       return (error);
+       }
+       if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
+               if (cred->cr_uid != ip->i_uid &&
+                   (error = suser(cred, &u.u_acflag)))
+                       return (error);
+               if (vap->va_atime.tv_sec != VNOVAL)
+                       ip->i_flag |= IACC;
+               if (vap->va_mtime.tv_sec != VNOVAL)
+                       ip->i_flag |= IUPD;
+               ip->i_flag |= ICHG;
+               if (error = iupdat(ip, &vap->va_atime, &vap->va_mtime, 1))
+                       return (error);
+       }
+       if (vap->va_mode != (u_short)VNOVAL)
+               error = chmod1(vp, (int)vap->va_mode, cred);
+       if (vap->va_flags != VNOVAL) {
+               if (cred->cr_uid != ip->i_uid &&
+                   (error = suser(cred, &u.u_acflag)))
+                       return (error);
+               if (cred->cr_uid == 0) {
+                       ip->i_flags = vap->va_flags;
+               } else {
+                       ip->i_flags &= 0xffff0000;
+                       ip->i_flags |= (vap->va_flags & 0xffff);
                }
                }
+               ip->i_flag |= ICHG;
        }
        }
-       if (u.u_error) {
-               iput(ip);
-               return;
+       return (error);
+}
+
+/*
+ * Change the mode on a file.
+ * Inode must be locked before calling.
+ */
+chmod1(vp, mode, cred)
+       register struct vnode *vp;
+       register int mode;
+       struct ucred *cred;
+{
+       register struct inode *ip = VTOI(vp);
+       int error;
+
+       if (cred->cr_uid != ip->i_uid &&
+           (error = suser(cred, &u.u_acflag)))
+               return (error);
+       ip->i_mode &= ~07777;
+       if (cred->cr_uid) {
+               if (vp->v_type != VDIR)
+                       mode &= ~ISVTX;
+               if (!groupmember(ip->i_gid, cred))
+                       mode &= ~ISGID;
        }
        }
-       if (trf == 1)
-               itrunc(ip);
-       iunlock(ip);
-       if ((fp = falloc()) == NULL)
-               goto out;
-       fp->f_flag = mode&(FREAD|FWRITE);
-       fp->f_type = DTYPE_FILE;
-       i = u.u_r.r_val1;
-       fp->f_inode = ip;
-#ifdef EFS
-       openi(ip, mode&(FREAD|FWRITE), trf);
+       ip->i_mode |= mode & 07777;
+       ip->i_flag |= ICHG;
+       if ((vp->v_flag & VTEXT) && (ip->i_mode & ISVTX) == 0)
+               xrele(vp);
+       return (0);
+}
+
+/*
+ * Perform chown operation on inode ip;
+ * inode must be locked prior to call.
+ */
+chown1(vp, uid, gid, cred)
+       register struct vnode *vp;
+       uid_t uid;
+       gid_t gid;
+       struct ucred *cred;
+{
+       register struct inode *ip = VTOI(vp);
+#ifdef QUOTA
+       register long change;
+#endif
+       int error;
+
+       if (uid == (u_short)VNOVAL)
+               uid = ip->i_uid;
+       if (gid == (u_short)VNOVAL)
+               gid = ip->i_gid;
+       /*
+        * If we don't own the file, are trying to change the owner
+        * of the file, or are not a member of the target group,
+        * the caller must be superuser or the call fails.
+        */
+       if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
+           !groupmember((gid_t)gid, cred)) &&
+           (error = suser(cred, &u.u_acflag)))
+               return (error);
+#ifdef QUOTA
+       if (ip->i_uid == uid)           /* this just speeds things a little */
+               change = 0;
+       else
+               change = ip->i_blocks;
+       (void) chkdq(ip, -change, 1);
+       (void) chkiq(ip->i_dev, ip, ip->i_uid, 1);
+       dqrele(ip->i_dquot);
+#endif
+       if (ip->i_uid != uid && cred->cr_uid != 0)
+               ip->i_mode &= ~ISUID;
+       if (ip->i_gid != gid && cred->cr_uid != 0)
+               ip->i_mode &= ~ISGID;
+       ip->i_uid = uid;
+       ip->i_gid = gid;
+       ip->i_flag |= ICHG;
+#ifdef QUOTA
+       ip->i_dquot = inoquota(ip);
+       (void) chkdq(ip, change, 1);
+       (void) chkiq(ip->i_dev, (struct inode *)NULL, (uid_t)uid, 1);
+       return (u.u_error);             /* should == 0 ALWAYS !! */
 #else
 #else
-       openi(ip, mode&(FREAD|FWRITE));
+       return (0);
 #endif
 #endif
-       if (u.u_error == 0)
-               return;
-       u.u_ofile[i] = NULL;
-       fp->f_count--;
-out:
-       irele(ip);
 }
 
 /*
 }
 
 /*
- * Mknod system call
+ * Vnode op for reading.
  */
  */
-mknod()
+/* ARGSUSED */
+ufs_read(vp, uio, ioflag, cred)
+       struct vnode *vp;
+       register struct uio *uio;
+       int ioflag;
+       struct ucred *cred;
 {
 {
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               int     fmode;
-               int     dev;
-       } *uap;
-
-       uap = (struct a *)u.u_ap;
-       if (suser()) {
-               ip = namei(uchar, 1, 0);
-               if (ip != NULL) {
-                       u.u_error = EEXIST;
-                       goto out;
-               }
-       }
-       if (u.u_error)
-               return;
-       ip = maknode(uap->fmode);
-       if (ip == NULL)
-               return;
-       if (uap->dev) {
-               /*
-                * Want to be able to use this to make badblock
-                * inodes, so don't truncate the dev number.
-                */
-               ip->i_rdev = uap->dev;
-               ip->i_flag |= IACC|IUPD|ICHG;
-       }
+       register struct inode *ip = VTOI(vp);
+       register struct fs *fs;
+       struct buf *bp;
+       daddr_t lbn, bn, rablock;
+       int size, diff, error = 0;
+       long n, on, type;
 
 
-out:
-       iput(ip);
+       if (uio->uio_rw != UIO_READ)
+               panic("ufs_read mode");
+       type = ip->i_mode & IFMT;
+       if (type != IFDIR && type != IFREG && type != IFLNK)
+               panic("ufs_read type");
+       if (uio->uio_resid == 0)
+               return (0);
+       if (uio->uio_offset < 0)
+               return (EINVAL);
+       ip->i_flag |= IACC;
+       fs = ip->i_fs;
+       do {
+               lbn = lblkno(fs, uio->uio_offset);
+               on = blkoff(fs, uio->uio_offset);
+               n = MIN((unsigned)(fs->fs_bsize - on), uio->uio_resid);
+               diff = ip->i_size - uio->uio_offset;
+               if (diff <= 0)
+                       return (0);
+               if (diff < n)
+                       n = diff;
+               size = blksize(fs, ip, lbn);
+               rablock = lbn + 1;
+               if (vp->v_lastr + 1 == lbn &&
+                   lblktosize(fs, rablock) < ip->i_size)
+                       error = breada(ITOV(ip), lbn, size, rablock,
+                               blksize(fs, ip, rablock), NOCRED, &bp);
+               else
+                       error = bread(ITOV(ip), lbn, size, NOCRED, &bp);
+               vp->v_lastr = lbn;
+               n = MIN(n, size - bp->b_resid);
+               if (error) {
+                       brelse(bp);
+                       return (error);
+               }
+               error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
+               if (n + on == fs->fs_bsize || uio->uio_offset == ip->i_size)
+                       bp->b_flags |= B_AGE;
+               brelse(bp);
+       } while (error == 0 && uio->uio_resid > 0 && n != 0);
+       return (error);
 }
 
 /*
 }
 
 /*
- * link system call
+ * Vnode op for writing.
  */
  */
-link()
+ufs_write(vp, uio, ioflag, cred)
+       register struct vnode *vp;
+       struct uio *uio;
+       int ioflag;
+       struct ucred *cred;
 {
 {
-       register struct inode *ip, *xp;
-       register struct a {
-               char    *target;
-               char    *linkname;
-       } *uap;
+       register struct inode *ip = VTOI(vp);
+       register struct fs *fs;
+       struct buf *bp;
+       daddr_t lbn, bn;
+       u_long osize;
+       int i, n, on, flags;
+       int count, size, resid, error = 0;
 
 
-       uap = (struct a *)u.u_ap;
-       ip = namei(uchar, 0, 1);    /* well, this routine is doomed anyhow */
-       if (ip == NULL)
-               return;
-       if ((ip->i_mode&IFMT)==IFDIR && !suser()) {
-               iput(ip);
-               return;
+       if (uio->uio_rw != UIO_WRITE)
+               panic("ufs_write mode");
+       switch (vp->v_type) {
+       case VREG:
+               if (ioflag & IO_APPEND)
+                       uio->uio_offset = ip->i_size;
+               /* fall through */
+       case VLNK:
+               break;
+
+       case VDIR:
+               if ((ioflag & IO_SYNC) == 0)
+                       panic("ufs_write nonsync dir write");
+               break;
+
+       default:
+               panic("ufs_write type");
        }
        }
-       ip->i_nlink++;
-       ip->i_flag |= ICHG;
-       iupdat(ip, &time, &time, 1);
-       iunlock(ip);
-       u.u_dirp = (caddr_t)uap->linkname;
-       xp = namei(uchar, 1, 0);
-       if (xp != NULL) {
-               u.u_error = EEXIST;
-               iput(xp);
-               goto out;
+       if (uio->uio_offset < 0)
+               return (EINVAL);
+       if (uio->uio_resid == 0)
+               return (0);
+       /*
+        * Maybe this should be above the vnode op call, but so long as
+        * file servers have no limits, i don't think it matters
+        */
+       if (vp->v_type == VREG &&
+           uio->uio_offset + uio->uio_resid >
+             u.u_rlimit[RLIMIT_FSIZE].rlim_cur) {
+               psignal(u.u_procp, SIGXFSZ);
+               return (EFBIG);
        }
        }
-       if (u.u_error)
-               goto out;
-       if (u.u_pdir->i_dev != ip->i_dev) {
-               iput(u.u_pdir);
-               u.u_error = EXDEV;
-               goto out;
+       resid = uio->uio_resid;
+       osize = ip->i_size;
+       fs = ip->i_fs;
+       flags = 0;
+       if (ioflag & IO_SYNC)
+               flags = B_SYNC;
+       do {
+               lbn = lblkno(fs, uio->uio_offset);
+               on = blkoff(fs, uio->uio_offset);
+               n = MIN((unsigned)(fs->fs_bsize - on), uio->uio_resid);
+               if (n < fs->fs_bsize)
+                       flags |= B_CLRBUF;
+               else
+                       flags &= ~B_CLRBUF;
+               if (error = balloc(ip, lbn, (int)(on + n), &bp, flags))
+                       break;
+               bn = bp->b_blkno;
+               if (uio->uio_offset + n > ip->i_size)
+                       ip->i_size = uio->uio_offset + n;
+               size = blksize(fs, ip, lbn);
+               count = howmany(size, CLBYTES);
+               for (i = 0; i < count; i++)
+                       munhash(ip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
+               n = MIN(n, size - bp->b_resid);
+               error = uiomove(bp->b_un.b_addr + on, n, uio);
+               if (ioflag & IO_SYNC)
+                       (void) bwrite(bp);
+               else if (n + on == fs->fs_bsize) {
+                       bp->b_flags |= B_AGE;
+                       bawrite(bp);
+               } else
+                       bdwrite(bp);
+               ip->i_flag |= IUPD|ICHG;
+               if (cred->cr_uid != 0)
+                       ip->i_mode &= ~(ISUID|ISGID);
+       } while (error == 0 && uio->uio_resid > 0 && n != 0);
+       if (error && (ioflag & IO_UNIT)) {
+               (void) itrunc(ip, osize, ioflag & IO_SYNC);
+               uio->uio_offset -= resid - uio->uio_resid;
+               uio->uio_resid = resid;
        }
        }
-       direnter(ip);
-out:
-       if (u.u_error) {
+       return (error);
+}
+
+/* ARGSUSED */
+ufs_ioctl(vp, com, data, fflag, cred)
+       struct vnode *vp;
+       int com;
+       caddr_t data;
+       int fflag;
+       struct ucred *cred;
+{
+
+       return (ENOTTY);
+}
+
+/* ARGSUSED */
+ufs_select(vp, which, cred)
+       struct vnode *vp;
+       int which;
+       struct ucred *cred;
+{
+
+       return (1);             /* XXX */
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+ufs_mmap(vp, fflags, cred)
+       struct vnode *vp;
+       int fflags;
+       struct ucred *cred;
+{
+
+       return (EINVAL);
+}
+
+/*
+ * Synch an open file.
+ */
+/* ARGSUSED */
+ufs_fsync(vp, fflags, cred, waitfor)
+       struct vnode *vp;
+       int fflags;
+       struct ucred *cred;
+       int waitfor;
+{
+       struct inode *ip = VTOI(vp);
+
+       if (fflags&FWRITE)
+               ip->i_flag |= ICHG;
+       vflushbuf(vp, waitfor == MNT_WAIT ? B_SYNC : 0);
+       return (iupdat(ip, &time, &time, waitfor == MNT_WAIT));
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+ufs_seek(vp, oldoff, newoff, cred)
+       struct vnode *vp;
+       off_t oldoff, newoff;
+       struct ucred *cred;
+{
+
+       return (0);
+}
+
+/*
+ * ufs remove
+ * Hard to avoid races here, especially
+ * in unlinking directories.
+ */
+ufs_remove(ndp)
+       struct nameidata *ndp;
+{
+       register struct inode *ip, *dp;
+       int error;
+
+       ip = VTOI(ndp->ni_vp);
+       dp = VTOI(ndp->ni_dvp);
+       error = dirremove(ndp);
+       if (!error) {
                ip->i_nlink--;
                ip->i_flag |= ICHG;
        }
                ip->i_nlink--;
                ip->i_flag |= ICHG;
        }
-out1:
-       irele(ip);
+       if (dp == ip)
+               vrele(ITOV(ip));
+       else
+               iput(ip);
+       iput(dp);
+       return (error);
 }
 
 /*
 }
 
 /*
- * symlink -- make a symbolic link
+ * link vnode call
  */
  */
-symlink()
+ufs_link(vp, ndp)
+       register struct vnode *vp;
+       register struct nameidata *ndp;
 {
 {
-       register struct a {
-               char    *target;
-               char    *linkname;
-       } *uap;
-       register struct inode *ip;
-       register char *tp;
-       register c, nc;
-
-       uap = (struct a *)u.u_ap;
-       tp = uap->target;
-       nc = 0;
-       while (c = fubyte(tp)) {
-               if (c < 0) {
-                       u.u_error = EFAULT;
-                       return;
-               }
-               tp++;
-               nc++;
+       register struct inode *ip = VTOI(vp);
+       int error;
+
+       if (ndp->ni_dvp != vp)
+               ILOCK(ip);
+       if (ip->i_nlink == LINK_MAX - 1) {
+               error = EMLINK;
+               goto out;
        }
        }
-       u.u_dirp = uap->linkname;
-       ip = namei(uchar, 1, 0);
-       if (ip) {
-               iput(ip);
-               u.u_error = EEXIST;
-               return;
+       ip->i_nlink++;
+       ip->i_flag |= ICHG;
+       error = iupdat(ip, &time, &time, 1);
+       if (!error)
+               error = direnter(ip, ndp);
+out:
+       if (ndp->ni_dvp != vp)
+               IUNLOCK(ip);
+       if (error) {
+               ip->i_nlink--;
+               ip->i_flag |= ICHG;
        }
        }
-       if (u.u_error)
-               return;
-       ip = maknode(IFLNK | 0777);
-       if (ip == NULL)
-               return;
-       u.u_base = uap->target;
-       u.u_count = nc;
-       u.u_offset = 0;
-       u.u_segflg = 0;
-       writei(ip);
-       iput(ip);
+       return (error);
 }
 
 /*
 }
 
 /*
- * Unlink system call.
- * Hard to avoid races here, especially
- * in unlinking directories.
+ * Rename system call.
+ *     rename("foo", "bar");
+ * is essentially
+ *     unlink("bar");
+ *     link("foo", "bar");
+ *     unlink("foo");
+ * but ``atomically''.  Can't do full commit without saving state in the
+ * inode on disk which isn't feasible at this time.  Best we can do is
+ * always guarantee the target exists.
+ *
+ * Basic algorithm is:
+ *
+ * 1) Bump link count on source while we're linking it to the
+ *    target.  This also ensure the inode won't be deleted out
+ *    from underneath us while we work (it may be truncated by
+ *    a concurrent `trunc' or `open' for creation).
+ * 2) Link source to destination.  If destination already exists,
+ *    delete it first.
+ * 3) Unlink source reference to inode if still around. If a
+ *    directory was moved and the parent of the destination
+ *    is different from the source, patch the ".." entry in the
+ *    directory.
  */
  */
-unlink()
+ufs_rename(fndp, tndp)
+       register struct nameidata *fndp, *tndp;
 {
 {
-       register struct inode *ip, *pp;
-       struct a {
-               char    *fname;
-       };
-       struct fs *fs;
-       struct buf *bp;
-       int lbn, bn, base;
-       int unlinkingdot = 0;
+       register struct inode *ip, *xp, *dp;
+       struct dirtemplate dirbuf;
+       int doingdirectory = 0, oldparent = 0, newparent = 0;
+       int error = 0;
 
 
-       pp = namei(uchar, 2, 0);
-       if (pp == NULL)
-               return;
-#ifdef EFS
-       /* divert to extended file system if off machine. */
-       if (efsinode(pp)) {
-               dev_t ndev = pp->i_rdev;
-
-               iput(pp);       /* avoid recursive hang on inode */
-               efsunlink(ndev);
-               if (u.u_error != EEXIST)
-                       return;
+       dp = VTOI(fndp->ni_dvp);
+       ip = VTOI(fndp->ni_vp);
+       ILOCK(ip);
+       if ((ip->i_mode&IFMT) == IFDIR) {
+               register struct direct *d = &fndp->ni_dent;
 
                /*
 
                /*
-                * If a null pathname remainder, then do
-                * the unlink locally after restoring state.
+                * Avoid ".", "..", and aliases of "." for obvious reasons.
                 */
                 */
-               u.u_error = 0;
-               u.u_dirp = (caddr_t)u.u_arg[0];
-               pp = namei(uchar, 2, 0);
+               if ((d->d_namlen == 1 && d->d_name[0] == '.') || dp == ip ||
+                   fndp->ni_isdotdot || (ip->i_flag & IRENAME)) {
+                       IUNLOCK(ip);
+                       ufs_abortop(fndp);
+                       ufs_abortop(tndp);
+                       return (EINVAL);
+               }
+               ip->i_flag |= IRENAME;
+               oldparent = dp->i_number;
+               doingdirectory++;
        }
        }
-#endif
+       vrele(fndp->ni_dvp);
 
        /*
 
        /*
-        * Check for unlink(".")
-        * to avoid hanging on the iget
+        * 1) Bump link count while we're moving stuff
+        *    around.  If we crash somewhere before
+        *    completing our work, the link count
+        *    may be wrong, but correctable.
         */
         */
-       if (pp->i_number == u.u_dent.d_ino) {
-               ip = pp;
-               ip->i_count++;
-               unlinkingdot++;
-       } else
-               ip = iget(pp->i_dev, pp->i_fs, u.u_dent.d_ino);
-       if(ip == NULL)
-               goto out1;
-       if((ip->i_mode&IFMT)==IFDIR && !suser())
-               goto out;
+       ip->i_nlink++;
+       ip->i_flag |= ICHG;
+       error = iupdat(ip, &time, &time, 1);
+       IUNLOCK(ip);
+
        /*
        /*
-        * Don't unlink a mounted file.
+        * When the target exists, both the directory
+        * and target vnodes are returned locked.
         */
         */
-       if (ip->i_dev != pp->i_dev) {
-               u.u_error = EBUSY;
-               goto out;
+       dp = VTOI(tndp->ni_dvp);
+       xp = NULL;
+       if (tndp->ni_vp)
+               xp = VTOI(tndp->ni_vp);
+       /*
+        * If ".." must be changed (ie the directory gets a new
+        * parent) then the source directory must not be in the
+        * directory heirarchy above the target, as this would
+        * orphan everything below the source directory. Also
+        * the user must have write permission in the source so
+        * as to be able to change "..". We must repeat the call 
+        * to namei, as the parent directory is unlocked by the
+        * call to checkpath().
+        */
+       if (oldparent != dp->i_number)
+               newparent = dp->i_number;
+       if (doingdirectory && newparent) {
+               if (error = iaccess(ip, IWRITE, tndp->ni_cred))
+                       goto bad;
+               tndp->ni_nameiop = RENAME | LOCKPARENT | LOCKLEAF | NOCACHE;
+               do {
+                       dp = VTOI(tndp->ni_dvp);
+                       if (xp != NULL)
+                               iput(xp);
+                       if (error = checkpath(ip, dp, tndp->ni_cred))
+                               goto out;
+                       if (error = namei(tndp))
+                               goto out;
+                       xp = NULL;
+                       if (tndp->ni_vp)
+                               xp = VTOI(tndp->ni_vp);
+               } while (dp != VTOI(tndp->ni_dvp));
        }
        }
-       if (ip->i_flag&ITEXT)
-               xrele(ip);      /* try once to free text */
-       if (dirremove()) {
-               ip->i_nlink--;
-               ip->i_flag |= ICHG;
+       /*
+        * 2) If target doesn't exist, link the target
+        *    to the source and unlink the source. 
+        *    Otherwise, rewrite the target directory
+        *    entry to reference the source inode and
+        *    expunge the original entry's existence.
+        */
+       if (xp == NULL) {
+               if (dp->i_dev != ip->i_dev)
+                       panic("rename: EXDEV");
+               /*
+                * Account for ".." in new directory.
+                * When source and destination have the same
+                * parent we don't fool with the link count.
+                */
+               if (doingdirectory && newparent) {
+                       dp->i_nlink++;
+                       dp->i_flag |= ICHG;
+                       error = iupdat(dp, &time, &time, 1);
+               }
+               if (error = direnter(ip, tndp))
+                       goto out;
+       } else {
+               if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
+                       panic("rename: EXDEV");
+               /*
+                * Short circuit rename(foo, foo).
+                */
+               if (xp->i_number == ip->i_number)
+                       panic("rename: same file");
+               /*
+                * If the parent directory is "sticky", then the user must
+                * own the parent directory, or the destination of the rename,
+                * otherwise the destination may not be changed (except by
+                * root). This implements append-only directories.
+                */
+               if ((dp->i_mode & ISVTX) && tndp->ni_cred->cr_uid != 0 &&
+                   tndp->ni_cred->cr_uid != dp->i_uid &&
+                   xp->i_uid != tndp->ni_cred->cr_uid) {
+                       error = EPERM;
+                       goto bad;
+               }
+               /*
+                * Target must be empty if a directory
+                * and have no links to it.
+                * Also, insure source and target are
+                * compatible (both directories, or both
+                * not directories).
+                */
+               if ((xp->i_mode&IFMT) == IFDIR) {
+                       if (!dirempty(xp, dp->i_number, tndp->ni_cred) || 
+                           xp->i_nlink > 2) {
+                               error = ENOTEMPTY;
+                               goto bad;
+                       }
+                       if (!doingdirectory) {
+                               error = ENOTDIR;
+                               goto bad;
+                       }
+                       cache_purge(ITOV(dp));
+               } else if (doingdirectory) {
+                       error = EISDIR;
+                       goto bad;
+               }
+               if (error = dirrewrite(dp, ip, tndp))
+                       goto bad;
+               vput(ITOV(dp));
+               /*
+                * Adjust the link count of the target to
+                * reflect the dirrewrite above.  If this is
+                * a directory it is empty and there are
+                * no links to it, so we can squash the inode and
+                * any space associated with it.  We disallowed
+                * renaming over top of a directory with links to
+                * it above, as the remaining link would point to
+                * a directory without "." or ".." entries.
+                */
+               xp->i_nlink--;
+               if (doingdirectory) {
+                       if (--xp->i_nlink != 0)
+                               panic("rename: linked directory");
+                       error = itrunc(xp, (u_long)0, IO_SYNC);
+               }
+               xp->i_flag |= ICHG;
+               iput(xp);
+               xp = NULL;
+       }
+
+       /*
+        * 3) Unlink the source.
+        */
+       fndp->ni_nameiop = DELETE | LOCKPARENT | LOCKLEAF;
+       (void)namei(fndp);
+       if (fndp->ni_vp != NULL) {
+               xp = VTOI(fndp->ni_vp);
+               dp = VTOI(fndp->ni_dvp);
+       } else {
+               if (fndp->ni_dvp != NULL)
+                       vput(fndp->ni_dvp);
+               xp = NULL;
+               dp = NULL;
        }
        }
+       /*
+        * Ensure that the directory entry still exists and has not
+        * changed while the new name has been entered. If the source is
+        * a file then the entry may have been unlinked or renamed. In
+        * either case there is no further work to be done. If the source
+        * is a directory then it cannot have been rmdir'ed; its link
+        * count of three would cause a rmdir to fail with ENOTEMPTY.
+        * The IRENAME flag ensures that it cannot be moved by another
+        * rename.
+        */
+       if (xp != ip) {
+               if (doingdirectory)
+                       panic("rename: lost dir entry");
+       } else {
+               /*
+                * If the source is a directory with a
+                * new parent, the link count of the old
+                * parent directory must be decremented
+                * and ".." set to point to the new parent.
+                */
+               if (doingdirectory && newparent) {
+                       dp->i_nlink--;
+                       dp->i_flag |= ICHG;
+                       error = vn_rdwr(UIO_READ, ITOV(xp), (caddr_t)&dirbuf,
+                               sizeof (struct dirtemplate), (off_t)0,
+                               UIO_SYSSPACE, IO_NODELOCKED, 
+                               tndp->ni_cred, (int *)0);
+                       if (error == 0) {
+                               if (dirbuf.dotdot_namlen != 2 ||
+                                   dirbuf.dotdot_name[0] != '.' ||
+                                   dirbuf.dotdot_name[1] != '.') {
+                                       dirbad(xp, 12, "rename: mangled dir");
+                               } else {
+                                       dirbuf.dotdot_ino = newparent;
+                                       (void) vn_rdwr(UIO_WRITE, ITOV(xp),
+                                           (caddr_t)&dirbuf,
+                                           sizeof (struct dirtemplate),
+                                           (off_t)0, UIO_SYSSPACE,
+                                           IO_NODELOCKED|IO_SYNC,
+                                           tndp->ni_cred, (int *)0);
+                                       cache_purge(ITOV(dp));
+                               }
+                       }
+               }
+               error = dirremove(fndp);
+               if (!error) {
+                       xp->i_nlink--;
+                       xp->i_flag |= ICHG;
+               }
+               xp->i_flag &= ~IRENAME;
+       }
+       if (dp)
+               vput(ITOV(dp));
+       if (xp)
+               vput(ITOV(xp));
+       vrele(ITOV(ip));
+       return (error);
+
+bad:
+       if (xp)
+               vput(ITOV(xp));
+       vput(ITOV(dp));
 out:
 out:
-       if (unlinkingdot)
-               irele(ip);
-       else
-               iput(ip);
-out1:
-       iput(pp);
+       ip->i_nlink--;
+       ip->i_flag |= ICHG;
+       vrele(ITOV(ip));
+       return (error);
 }
 
 /*
 }
 
 /*
- * Seek system call
+ * A virgin directory (no blushing please).
  */
  */
-seek()
-{
-       register struct file *fp;
-       register struct a {
-               int     fdes;
-               off_t   off;
-               int     sbase;
-       } *uap;
-
-       uap = (struct a *)u.u_ap;
-       fp = getf(uap->fdes);
-       if (fp == NULL)
-               return;
-       if (fp->f_type == DTYPE_SOCKET) {
-               u.u_error = ESPIPE;
-               return;
-       }
-       if (uap->sbase == 1)
-               uap->off += fp->f_offset;
-       else if (uap->sbase == 2) {
-#ifdef EFS
-               struct inode *ip = fp->f_inode;
-               uap->off += efsinode(ip) ? efsfilesize(fp) : ip->i_size;
-#else
-               uap->off += fp->f_inode->i_size;
-#endif
-       }
-       fp->f_offset = uap->off;
-       u.u_r.r_off = uap->off;
-}
+struct dirtemplate mastertemplate = {
+       0, 12, 1, ".",
+       0, DIRBLKSIZ - 12, 2, ".."
+};
 
 /*
 
 /*
- * Access system call
+ * Mkdir system call
  */
  */
-saccess()
+ufs_mkdir(ndp, vap)
+       struct nameidata *ndp;
+       struct vattr *vap;
 {
 {
-       register svuid, svgid;
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               int     fmode;
-       } *uap;
-
-       uap = (struct a *)u.u_ap;
-       svuid = u.u_uid;
-       svgid = u.u_gid;
-       u.u_uid = u.u_ruid;
-       u.u_gid = u.u_rgid;
-       ip = namei(uchar, 0, 1);
-#ifdef EFS
-       if (efsinode(ip)) {
-               dev_t ndev = ip->i_rdev;
+       register struct inode *ip, *dp;
+       struct inode *tip;
+       struct vnode *dvp;
+       struct dirtemplate dirtemplate;
+       int error;
+       int dmode;
 
 
-               iput(ip);
-               efssaccess(ndev);
-               if (u.u_error != EEXIST)
-                       return;
-               u.u_error = 0;
-               u.u_dirp = (caddr_t)u.u_arg[0];
-               ip = namei(uchar, 0, 1);
+       dvp = ndp->ni_dvp;
+       dp = VTOI(dvp);
+       dmode = vap->va_mode&0777;
+       dmode |= IFDIR;
+       /*
+        * Must simulate part of maknode here
+        * in order to acquire the inode, but
+        * not have it entered in the parent
+        * directory.  The entry is made later
+        * after writing "." and ".." entries out.
+        */
+       error = ialloc(dp, dirpref(dp->i_fs), dmode, &tip);
+       if (error) {
+               iput(dp);
+               return (error);
        }
        }
+       ip = tip;
+#ifdef QUOTA
+       if (ip->i_dquot != NODQUOT)
+               panic("mkdir: dquot");
 #endif
 #endif
-       if (ip != NULL) {
-               if (uap->fmode&(IREAD>>6))
-                       (void) access(ip, IREAD);
-               if (uap->fmode&(IWRITE>>6))
-                       (void) access(ip, IWRITE);
-               if (uap->fmode&(IEXEC>>6))
-                       (void) access(ip, IEXEC);
-               iput(ip);
-       }
-       u.u_uid = svuid;
-       u.u_gid = svgid;
-}
+       ip->i_flag |= IACC|IUPD|ICHG;
+       ip->i_mode = dmode;
+       ITOV(ip)->v_type = VDIR;        /* Rest init'd in iget() */
+       ip->i_nlink = 2;
+       ip->i_uid = ndp->ni_cred->cr_uid;
+       ip->i_gid = dp->i_gid;
+#ifdef QUOTA
+       ip->i_dquot = inoquota(ip);
+#endif
+       error = iupdat(ip, &time, &time, 1);
 
 
-/*
- * the fstat system call.
- */
-fstat()
-{
-       register struct file *fp;
-       register struct a {
-               int     fdes;
-               struct stat *sb;
-       } *uap;
+       /*
+        * Bump link count in parent directory
+        * to reflect work done below.  Should
+        * be done before reference is created
+        * so reparation is possible if we crash.
+        */
+       dp->i_nlink++;
+       dp->i_flag |= ICHG;
+       error = iupdat(dp, &time, &time, 1);
 
 
-       uap = (struct a *)u.u_ap;
-       fp = getf(uap->fdes);
-       if (fp == NULL)
-               return;
-#ifdef EFS
-       if (efsinode(fp->f_inode)) {
-               efsfstat(fp->f_inode->i_rdev, fp);
-               return;
+       /*
+        * Initialize directory with "."
+        * and ".." from static template.
+        */
+       dirtemplate = mastertemplate;
+       dirtemplate.dot_ino = ip->i_number;
+       dirtemplate.dotdot_ino = dp->i_number;
+       error = vn_rdwr(UIO_WRITE, ITOV(ip), (caddr_t)&dirtemplate,
+               sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
+               IO_NODELOCKED|IO_SYNC, ndp->ni_cred, (int *)0);
+       if (error) {
+               dp->i_nlink--;
+               dp->i_flag |= ICHG;
+               goto bad;
        }
        }
-#endif
-       if (fp->f_type == DTYPE_SOCKET)
-               u.u_error = sostat(fp->f_socket, uap->sb);
+       if (DIRBLKSIZ > dp->i_fs->fs_fsize)
+               panic("mkdir: blksize");     /* XXX - should grow w/balloc() */
        else
        else
-               stat1(fp->f_inode, uap->sb);
+               ip->i_size = DIRBLKSIZ;
+       /*
+        * Directory all set up, now
+        * install the entry for it in
+        * the parent directory.
+        */
+       error = direnter(ip, ndp);
+       dp = NULL;
+       if (error) {
+               ndp->ni_nameiop = LOOKUP | NOCACHE;
+               error = namei(ndp);
+               if (!error) {
+                       dp = VTOI(ndp->ni_vp);
+                       dp->i_nlink--;
+                       dp->i_flag |= ICHG;
+               }
+       }
+bad:
+       /*
+        * No need to do an explicit itrunc here,
+        * vrele will do this for us because we set
+        * the link count to 0.
+        */
+       if (error) {
+               ip->i_nlink = 0;
+               ip->i_flag |= ICHG;
+               iput(ip);
+       } else
+               ndp->ni_vp = ITOV(ip);
+       if (dp)
+               iput(dp);
+       return (error);
 }
 
 /*
 }
 
 /*
- * Stat system call.  This version follows links.
+ * Rmdir system call.
  */
  */
-stat()
+ufs_rmdir(ndp)
+       register struct nameidata *ndp;
 {
 {
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               struct stat *sb;
-       } *uap;
-
-       uap = (struct a *)u.u_ap;
-       ip = namei(uchar, 0, 1);
-       if (ip == NULL)
-               return;
-#ifdef EFS
-       if (efsinode(ip)) {
-               dev_t ndev = ip->i_rdev;
+       register struct inode *ip, *dp;
+       int error = 0;
 
 
+       ip = VTOI(ndp->ni_vp);
+       dp = VTOI(ndp->ni_dvp);
+       /*
+        * No rmdir "." please.
+        */
+       if (dp == ip) {
+               vrele(ITOV(dp));
                iput(ip);
                iput(ip);
-               efsstat(ndev);
-               if (u.u_error != EEXIST)
-                       return;
-               u.u_error = 0;
-               u.u_dirp = (caddr_t)u.u_arg[0];
-               ip = namei(uchar, 0, 1);
+               return (EINVAL);
        }
        }
-#endif
-       stat1(ip, uap->sb);
+       /*
+        * Verify the directory is empty (and valid).
+        * (Rmdir ".." won't be valid since
+        *  ".." will contain a reference to
+        *  the current directory and thus be
+        *  non-empty.)
+        */
+       if (ip->i_nlink != 2 || !dirempty(ip, dp->i_number, ndp->ni_cred)) {
+               error = ENOTEMPTY;
+               goto out;
+       }
+       /*
+        * Delete reference to directory before purging
+        * inode.  If we crash in between, the directory
+        * will be reattached to lost+found,
+        */
+       if (error = dirremove(ndp))
+               goto out;
+       dp->i_nlink--;
+       dp->i_flag |= ICHG;
+       cache_purge(ITOV(dp));
+       iput(dp);
+       ndp->ni_dvp = NULL;
+       /*
+        * Truncate inode.  The only stuff left
+        * in the directory is "." and "..".  The
+        * "." reference is inconsequential since
+        * we're quashing it.  The ".." reference
+        * has already been adjusted above.  We've
+        * removed the "." reference and the reference
+        * in the parent directory, but there may be
+        * other hard links so decrement by 2 and
+        * worry about them later.
+        */
+       ip->i_nlink -= 2;
+       error = itrunc(ip, (u_long)0, IO_SYNC);
+       cache_purge(ITOV(ip));
+out:
+       if (ndp->ni_dvp)
+               iput(dp);
        iput(ip);
        iput(ip);
+       return (error);
 }
 
 /*
 }
 
 /*
- * Lstat system call.  This version does not follow links.
+ * symlink -- make a symbolic link
  */
  */
-lstat()
+ufs_symlink(ndp, vap, target)
+       struct nameidata *ndp;
+       struct vattr *vap;
+       char *target;
 {
 {
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               struct stat *sb;
-       } *uap;
-
-       uap = (struct a *)u.u_ap;
-       ip = namei(uchar, 0, 0);
-       if (ip == NULL)
-               return;
-#ifdef EFS
-       if (efsinode(ip)) {
-               dev_t ndev = ip->i_rdev;
+       struct inode *ip;
+       int error;
 
 
-               iput(ip);
-               efslstat(ndev);
-               if (u.u_error != EEXIST)
-                       return;
-               u.u_error = 0;
-               u.u_dirp = (caddr_t)u.u_arg[0];
-               ip = namei(uchar, 0, 0);
-       }
-#endif
-       stat1(ip, uap->sb);
+       error = maknode(IFLNK | vap->va_mode, ndp, &ip);
+       if (error)
+               return (error);
+       error = vn_rdwr(UIO_WRITE, ITOV(ip), target, strlen(target), (off_t)0,
+               UIO_SYSSPACE, IO_NODELOCKED, ndp->ni_cred, (int *)0);
        iput(ip);
        iput(ip);
+       return (error);
 }
 
 /*
 }
 
 /*
- * The basic routine for fstat and stat:
- * get the inode and pass appropriate parts back.
+ * Vnode op for read and write
  */
  */
-stat1(ip, ub)
-       register struct inode *ip;
-       struct stat *ub;
+ufs_readdir(vp, uio, cred)
+       struct vnode *vp;
+       register struct uio *uio;
+       struct ucred *cred;
 {
 {
-       struct stat ds;
+       int count, lost, error;
 
 
-       IUPDAT(ip, &time, &time, 0);
-       /*
-        * Copy from inode table
-        */
-       ds.st_dev = ip->i_dev;
-       ds.st_ino = ip->i_number;
-       ds.st_mode = ip->i_mode;
-       ds.st_nlink = ip->i_nlink;
-       ds.st_uid = ip->i_uid;
-       ds.st_gid = ip->i_gid;
-       ds.st_rdev = (dev_t)ip->i_rdev;
-       ds.st_size = ip->i_size;
-       ds.st_atime = ip->i_atime;
-       ds.st_mtime = ip->i_mtime;
-       ds.st_ctime = ip->i_ctime;
-       ds.st_blksize = ip->i_fs->fs_bsize;
-       if (copyout((caddr_t)&ds, (caddr_t)ub, sizeof(ds)) < 0)
-               u.u_error = EFAULT;
+       count = uio->uio_resid;
+       count &= ~(DIRBLKSIZ - 1);
+       lost = uio->uio_resid - count;
+       if (count < DIRBLKSIZ || (uio->uio_offset & (DIRBLKSIZ -1)))
+               return (EINVAL);
+       uio->uio_resid = count;
+       uio->uio_iov->iov_len = count;
+       error = ufs_read(vp, uio, 0, cred);
+       uio->uio_resid += lost;
+       return (error);
 }
 
 /*
  * Return target name of a symbolic link
  */
 }
 
 /*
  * Return target name of a symbolic link
  */
-readlink()
+ufs_readlink(vp, uiop, cred)
+       struct vnode *vp;
+       struct uio *uiop;
+       struct ucred *cred;
+{
+
+       return (ufs_read(vp, uiop, 0, cred));
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. Iff ni_vp/ni_dvp not null and locked, unlock.
+ */
+ufs_abortop(ndp)
+       register struct nameidata *ndp;
 {
        register struct inode *ip;
 {
        register struct inode *ip;
-       register struct a {
-               char    *name;
-               char    *buf;
-               int     count;
-       } *uap;
-
-       ip = namei(uchar, 0, 0);
-       if (ip == NULL)
-               return;
-#ifdef EFS
-       if (efsinode(ip)) {
-               dev_t ndev = ip->i_rdev;
 
 
-               iput(ip);
-               efsreadlink(ndev);
-               if (u.u_error != EEXIST)
-                       return;
-               u.u_error = 0;
-               u.u_dirp = (caddr_t)u.u_arg[0];
-               ip = namei(uchar, 0, 0);
-               return (0);
+       if (ndp->ni_vp) {
+               ip = VTOI(ndp->ni_vp);
+               if (ip->i_flag & ILOCKED)
+                       IUNLOCK(ip);
+               vrele(ndp->ni_vp);
        }
        }
-#endif
-       if ((ip->i_mode&IFMT) != IFLNK) {
-               u.u_error = ENXIO;
-               goto out;
+       if (ndp->ni_dvp) {
+               ip = VTOI(ndp->ni_dvp);
+               if (ip->i_flag & ILOCKED)
+                       IUNLOCK(ip);
+               vrele(ndp->ni_dvp);
        }
        }
-       uap = (struct a *)u.u_ap;
-       u.u_offset = 0;
-       u.u_base = uap->buf;
-       u.u_count = uap->count;
-       u.u_segflg = 0;
-       readi(ip);
-out:
-       iput(ip);
-       u.u_r.r_val1 = uap->count - u.u_count;
+       return;
 }
 
 }
 
-chmod()
+ufs_lock(vp)
+       struct vnode *vp;
 {
 {
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               int     fmode;
-       } *uap;
+       register struct inode *ip = VTOI(vp);
 
 
-       uap = (struct a *)u.u_ap;
-       if ((ip = owner(1)) == NULL)
-               return;
-#ifdef EFS
-       if (efsinode(ip)) {
-               dev_t ndev = ip->i_rdev;
+       ILOCK(ip);
+       return (0);
+}
 
 
-               iput(ip);
-               efschmod(ndev);
-               if (u.u_error != EEXIST)
-                       return;
-               u.u_error = 0;
-               u.u_dirp = (caddr_t)u.u_arg[0];
-               ip = owner(1);
+ufs_unlock(vp)
+       struct vnode *vp;
+{
+       register struct inode *ip = VTOI(vp);
+
+       if (!(ip->i_flag & ILOCKED))
+               panic("ufs_unlock NOT LOCKED");
+       IUNLOCK(ip);
+       return (0);
+}
+
+/*
+ * Get access to bmap
+ */
+ufs_bmap(vp, bn, vpp, bnp)
+       struct vnode *vp;
+       daddr_t bn;
+       struct vnode **vpp;
+       daddr_t *bnp;
+{
+       struct inode *ip = VTOI(vp);
+
+       if (vpp != NULL)
+               *vpp = ip->i_devvp;
+       if (bnp == NULL)
+               return (0);
+       return (bmap(ip, bn, bnp, (daddr_t *)0, (int *)0));
+}
+
+/*
+ * Just call the device strategy routine
+ */
+int checkoverlap = 1;
+
+ufs_strategy(bp)
+       register struct buf *bp;
+{
+       register struct inode *ip = VTOI(bp->b_vp);
+       register struct buf *ep;
+       struct vnode *vp;
+       struct buf *ebp;
+       daddr_t start, last;
+       int error;
+
+       if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR)
+               panic("ufs_strategy: spec");
+       if (bp->b_blkno == bp->b_lblkno) {
+               if (error = bmap(ip, bp->b_lblkno, &bp->b_blkno))
+                       return (error);
+               if ((long)bp->b_blkno == -1)
+                       clrbuf(bp);
        }
        }
-#endif
-       ip->i_mode &= ~07777;
-       if (u.u_uid) {
-               uap->fmode &= ~ISVTX;
-               if (ip->i_gid >= NGRPS ||
-                   (u.u_grps[ip->i_gid/(sizeof(int)*8)] &
-                    (1 << ip->i_gid%(sizeof(int)*8))) == 0)
-                       uap->fmode &= ~ISGID;
-#if    MUSH
-               if (u.u_quota->q_syflags & QF_UMASK && u.u_uid != 0 &&
-                   (ip->i_mode & IFMT) != IFCHR)
-                       uap->fmode &= ~u.u_cmask;
-#endif
+       if ((long)bp->b_blkno == -1) {
+               biodone(bp);
+               return (0);
        }
        }
-       ip->i_mode |= uap->fmode&07777;
-       ip->i_flag |= ICHG;
-       if (ip->i_flag&ITEXT && (ip->i_mode&ISVTX)==0)
-               xrele(ip);
-#ifdef MELB
-       if ((ip->i_mode & ISUID) && ip->i_uid == 0)
-               printf("%s: ino %d (%s) setuid root\n"
-                   , getfs(ip->i_dev)->s_fsmnt
-                   , ip->i_number
-                   , u.u_dent.d_name
-               );
-#endif
-       iput(ip);
+       if (checkoverlap) {
+               ebp = &buf[nbuf];
+               start = bp->b_blkno;
+               last = start + btodb(bp->b_bcount) - 1;
+               for (ep = buf; ep < ebp; ep++) {
+                       if (ep == bp || (ep->b_flags & B_INVAL) ||
+                           ep->b_vp == (struct vnode *)0)
+                               continue;
+                       if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0))
+                               continue;
+                       if (vp != ip->i_devvp)
+                               continue;
+                       /* look for overlap */
+                       if (ep->b_bcount == 0 || ep->b_blkno > last ||
+                           ep->b_blkno + btodb(ep->b_bcount) <= start)
+                               continue;
+                       vprint("Disk overlap", vp);
+                       printf("\tstart %d, end %d overlap start %d, end %d\n",
+                               start, last, ep->b_blkno,
+                               ep->b_blkno + btodb(ep->b_bcount) - 1);
+               }
+       }
+       vp = ip->i_devvp;
+       bp->b_dev = vp->v_rdev;
+       (*(vp->v_op->vn_strategy))(bp);
+       return (0);
 }
 
 }
 
-chown()
+/*
+ * Print out the contents of an inode.
+ */
+ufs_print(vp)
+       struct vnode *vp;
 {
 {
-       register struct inode *ip;
-       register struct a {
-               char    *fname;
-               int     uid;
-               int     gid;
-       } *uap;
-#if    QUOTA
-       register long change;
-#endif
+       register struct inode *ip = VTOI(vp);
 
 
-       uap = (struct a *)u.u_ap;
-       if (!suser() || (ip = owner(0)) == NULL)
+       printf("tag VT_UFS, ino %d, on dev %d, %d%s\n", ip->i_number,
+               major(ip->i_dev), minor(ip->i_dev),
+               (ip->i_flag & ILOCKED) ? " (LOCKED)" : "");
+       if (ip->i_spare0 == 0)
                return;
                return;
-#ifdef EFS
-       if (efsinode(ip)) {
-               dev_t ndev = ip->i_rdev;
+       printf("\towner pid %d", ip->i_spare0);
+       if (ip->i_spare1)
+               printf(" waiting pid %d", ip->i_spare1);
+       printf("\n");
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+ufsspec_read(vp, uio, ioflag, cred)
+       struct vnode *vp;
+       struct uio *uio;
+       int ioflag;
+       struct ucred *cred;
+{
 
 
-               iput(ip);
-               efschown(ndev);
-               if (u.u_error != EEXIST)
-                       return;
-               u.u_error = 0;
-               u.u_dirp = (caddr_t)u.u_arg[0];
-               ip = owner(0);
-       }
-#endif
-#if    QUOTA
-       /*
-        * This doesn't allow for holes in files (which hopefully don't
-        * happen often in files that we chown), and is not accurate anyway
-        * (eg: it totally ignores 3 level indir blk files - but hopefully
-        * noone who can make a file that big will have a quota)
-        */
-       if (ip->i_uid == uap->uid)
-               change = 0;
-       else {
-               register struct fs *fs = ip->i_fs;
-
-               if (ip->i_size > (change = NDADDR * fs->fs_bsize)) {
-                       register off_t size;
-
-                       size = blkroundup(fs, ip->i_size) - change;
-                       change += size;
-                       change += fs->fs_bsize;
-                       /* This assumes NIADDR <= 2 */
-                       if (size > NINDIR(fs) * fs->fs_bsize)
-                               change += fs->fs_bsize;
-               } else
-                       change = fragroundup(fs, ip->i_size);
-               change /= DEV_BSIZE;
-       }
-       chkdq(ip, -change, 1);
-       chkiq(ip->i_dev, ip, ip->i_uid, 1);
-       dqrele(ip->i_dquot);
-#endif
        /*
        /*
-        * keep uid/gid's in sane range - no err, so chown(file, uid, -1)
-        * will do something useful
+        * Set access flag.
         */
         */
-       if (uap->uid >= 0 && uap->uid <= 32767) /* should have a const  */
-               ip->i_uid = uap->uid;
-       if (uap->gid >= 0 && uap->gid <= 32767) /* same here            */
-               ip->i_gid = uap->gid;
-       ip->i_flag |= ICHG;
-       if (u.u_ruid != 0)
-               ip->i_mode &= ~(ISUID|ISGID);
-#if    QUOTA
-       ip->i_dquot = inoquota(ip);
-       chkdq(ip, change, 1);
-       chkiq(ip->i_dev, NULL, uap->uid, 1);
-#endif
-       iput(ip);
+       VTOI(vp)->i_flag |= IACC;
+       return (spec_read(vp, uio, ioflag, cred));
 }
 
 /*
 }
 
 /*
- * Set IUPD and IACC times on file.
- * Can't set ICHG.
+ * Write wrapper for special devices.
  */
  */
-utime()
+ufsspec_write(vp, uio, ioflag, cred)
+       struct vnode *vp;
+       struct uio *uio;
+       int ioflag;
+       struct ucred *cred;
 {
 {
-       register struct a {
-               char    *fname;
-               time_t  *tptr;
-       } *uap;
-       register struct inode *ip;
-       time_t tv[2];
 
 
-       uap = (struct a *)u.u_ap;
-       if ((ip = owner(1)) == NULL)
-               return;
-       if (copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof(tv))) {
-               u.u_error = EFAULT;
-       } else {
-#ifdef EFS
-               if (efsinode(ip)) {
-                       dev_t ndev = ip->i_rdev;
-
-                       iput(ip);
-                       efsutime(ndev, uap->fname, tv);
-                       if (u.u_error != EEXIST)
-                               return;
-                       u.u_error = 0;
-                       u.u_dirp = (caddr_t)u.u_arg[0];
-                       ip = owner(1);
-               }
-#endif
-               ip->i_flag |= IACC|IUPD|ICHG;
-               iupdat(ip, &tv[0], &tv[1], 0);
-       }
-       iput(ip);
+       /*
+        * Set update and change flags.
+        */
+       VTOI(vp)->i_flag |= IUPD|ICHG;
+       return (spec_write(vp, uio, ioflag, cred));
 }
 
 }
 
-sync()
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the inode then do device close.
+ */
+ufsspec_close(vp, fflag, cred)
+       struct vnode *vp;
+       int fflag;
+       struct ucred *cred;
 {
 {
+       register struct inode *ip = VTOI(vp);
 
 
-       update(0);
+       if (vp->v_usecount > 1 && !(ip->i_flag & ILOCKED))
+               ITIMES(ip, &time, &time);
+       return (spec_close(vp, fflag, cred));
 }
 
 /*
  * Make a new file.
  */
 }
 
 /*
  * Make a new file.
  */
-struct inode *
-maknode(mode)
+maknode(mode, ndp, ipp)
        int mode;
        int mode;
+       register struct nameidata *ndp;
+       struct inode **ipp;
 {
        register struct inode *ip;
 {
        register struct inode *ip;
+       struct inode *tip;
+       register struct inode *pdir = VTOI(ndp->ni_dvp);
        ino_t ipref;
        ino_t ipref;
+       int error;
 
 
+       *ipp = 0;
        if ((mode & IFMT) == IFDIR)
        if ((mode & IFMT) == IFDIR)
-               ipref = dirpref(u.u_pdir->i_fs);
+               ipref = dirpref(pdir->i_fs);
        else
        else
-               ipref = u.u_pdir->i_number;
-       ip = ialloc(u.u_pdir, ipref, mode);
-       if (ip == NULL) {
-               iput(u.u_pdir);
-               return(NULL);
+               ipref = pdir->i_number;
+       error = ialloc(pdir, ipref, mode, &tip);
+       if (error) {
+               iput(pdir);
+               return (error);
        }
        }
-#ifdef QUOTA
+       ip = tip;
+#ifdef QUOTA
        if (ip->i_dquot != NODQUOT)
                panic("maknode: dquot");
 #endif
        ip->i_flag |= IACC|IUPD|ICHG;
        if ((mode & IFMT) == 0)
                mode |= IFREG;
        if (ip->i_dquot != NODQUOT)
                panic("maknode: dquot");
 #endif
        ip->i_flag |= IACC|IUPD|ICHG;
        if ((mode & IFMT) == 0)
                mode |= IFREG;
-       ip->i_mode = mode & ~u.u_cmask;
+       ip->i_mode = mode;
+       ITOV(ip)->v_type = IFTOVT(mode);        /* Rest init'd in iget() */
        ip->i_nlink = 1;
        ip->i_nlink = 1;
-       ip->i_uid = u.u_uid;
-       ip->i_gid = u.u_pdir->i_gid;
-#ifdef QUOTA
+       ip->i_uid = ndp->ni_cred->cr_uid;
+       ip->i_gid = pdir->i_gid;
+       if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, ndp->ni_cred) &&
+           suser(ndp->ni_cred, NULL))
+               ip->i_mode &= ~ISGID;
+#ifdef QUOTA
        ip->i_dquot = inoquota(ip);
 #endif
 
        /*
         * Make sure inode goes to disk before directory entry.
         */
        ip->i_dquot = inoquota(ip);
 #endif
 
        /*
         * Make sure inode goes to disk before directory entry.
         */
-       iupdat(ip, &time, &time, 1);
-       direnter(ip);
-       if (u.u_error) {
+       if ((error = iupdat(ip, &time, &time, 1)) ||
+           (error = direnter(ip, ndp))) {
                /*
                /*
-                * write error occurred trying to update directory
-                * so must deallocate the inode
+                * Write error occurred trying to update the inode
+                * or the directory so must deallocate the inode.
                 */
                ip->i_nlink = 0;
                ip->i_flag |= ICHG;
                iput(ip);
                 */
                ip->i_nlink = 0;
                ip->i_flag |= ICHG;
                iput(ip);
-               return(NULL);
+               return (error);
        }
        }
-       return(ip);
+       *ipp = ip;
+       return (0);
 }
 }