convert VOP_UNLOCK and vrele into vput's; add proc parameter to union_dircache
[unix-history] / usr / src / sys / kern / vfs_vnops.c
index 39e00a7..736c330 100644 (file)
-/*     vfs_vnops.c     4.34    83/03/31        */
-
-#include "../machine/reg.h"
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/dir.h"
-#include "../h/user.h"
-#include "../h/fs.h"
-#include "../h/file.h"
-#include "../h/conf.h"
-#include "../h/inode.h"
-#include "../h/acct.h"
-#include "../h/mount.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../h/proc.h"
-#include "../h/nami.h"
-
 /*
 /*
- * Openi called to allow handler
- * of special files to initialize and
- * validate before actual IO.
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * %sccs.include.redist.c%
+ *
+ *     @(#)vfs_vnops.c 8.12 (Berkeley) %G%
  */
  */
-openi(ip, mode)
-       register struct inode *ip;
-{
-       dev_t dev = (dev_t)ip->i_rdev;
-       register u_int maj = major(dev);
 
 
-       switch (ip->i_mode&IFMT) {
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
 
 
-       case IFCHR:
-               if (maj >= nchrdev)
-                       return (ENXIO);
-               return ((*cdevsw[maj].d_open)(dev, mode));
+#include <vm/vm.h>
 
 
-       case IFBLK:
-               if (maj >= nblkdev)
-                       return (ENXIO);
-               return ((*bdevsw[maj].d_open)(dev, mode));
-       }
-       return (0);
-}
+struct         fileops vnops =
+       { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile };
 
 /*
 
 /*
- * Check mode permission on inode pointer.
- * Mode is READ, WRITE or EXEC.
- * In the case of WRITE, the
- * read-only status of the file
- * system is checked.
- * Also in WRITE, prototype text
- * segments cannot be written.
- * The mode is shifted to select
- * the owner/group/other fields.
- * The super user is granted all
- * permissions.
+ * Common code for vnode open operations.
+ * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
  */
  */
-access(ip, mode)
-       register struct inode *ip;
-       int mode;
+vn_open(ndp, fmode, cmode)
+       register struct nameidata *ndp;
+       int fmode, cmode;
 {
 {
-       register m;
-       register int *gp;
-
-       m = mode;
-       if (m == IWRITE) {
-               /*
-                * Disallow write attempts on read-only
-                * file systems; unless the file is a block
-                * or character device resident on the
-                * file system.
-                */
-               if (ip->i_fs->fs_ronly != 0) {
-                       if ((ip->i_mode & IFMT) != IFCHR &&
-                           (ip->i_mode & IFMT) != IFBLK) {
-                               u.u_error = EROFS;
-                               return (1);
+       register struct vnode *vp;
+       register struct proc *p = ndp->ni_cnd.cn_proc;
+       register struct ucred *cred = p->p_ucred;
+       struct vattr vat;
+       struct vattr *vap = &vat;
+       int error;
+
+       if (fmode & O_CREAT) {
+               ndp->ni_cnd.cn_nameiop = CREATE;
+               ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+               if ((fmode & O_EXCL) == 0)
+                       ndp->ni_cnd.cn_flags |= FOLLOW;
+               if (error = namei(ndp))
+                       return (error);
+               if (ndp->ni_vp == NULL) {
+                       VATTR_NULL(vap);
+                       vap->va_type = VREG;
+                       vap->va_mode = cmode;
+                       if (fmode & O_EXCL)
+                               vap->va_vaflags |= VA_EXCLUSIVE;
+                       VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
+                       if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
+                           &ndp->ni_cnd, vap))
+                               return (error);
+                       fmode &= ~O_TRUNC;
+                       vp = ndp->ni_vp;
+               } else {
+                       VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
+                       if (ndp->ni_dvp == ndp->ni_vp)
+                               vrele(ndp->ni_dvp);
+                       else
+                               vput(ndp->ni_dvp);
+                       ndp->ni_dvp = NULL;
+                       vp = ndp->ni_vp;
+                       if (fmode & O_EXCL) {
+                               error = EEXIST;
+                               goto bad;
                        }
                        }
+                       fmode &= ~O_CREAT;
                }
                }
-               /*
-                * If there's shared text associated with
-                * the inode, try to free it up once.  If
-                * we fail, we can't allow writing.
-                */
-               if (ip->i_flag&ITEXT)
-                       xrele(ip);
-               if (ip->i_flag & ITEXT) {
-                       u.u_error = ETXTBSY;
-                       return (1);
+       } else {
+               ndp->ni_cnd.cn_nameiop = LOOKUP;
+               ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF;
+               if (error = namei(ndp))
+                       return (error);
+               vp = ndp->ni_vp;
+       }
+       if (vp->v_type == VSOCK) {
+               error = EOPNOTSUPP;
+               goto bad;
+       }
+       if ((fmode & O_CREAT) == 0) {
+               if (fmode & FREAD) {
+                       if (error = VOP_ACCESS(vp, VREAD, cred, p))
+                               goto bad;
+               }
+               if (fmode & (FWRITE | O_TRUNC)) {
+                       if (vp->v_type == VDIR) {
+                               error = EISDIR;
+                               goto bad;
+                       }
+                       if ((error = vn_writechk(vp)) ||
+                           (error = VOP_ACCESS(vp, VWRITE, cred, p)))
+                               goto bad;
                }
        }
                }
        }
+       if (fmode & O_TRUNC) {
+               VOP_UNLOCK(vp, 0, p);                           /* XXX */
+               VOP_LEASE(vp, p, cred, LEASE_WRITE);
+               vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);        /* XXX */
+               VATTR_NULL(vap);
+               vap->va_size = 0;
+               if (error = VOP_SETATTR(vp, vap, cred, p))
+                       goto bad;
+       }
+       if (error = VOP_OPEN(vp, fmode, cred, p))
+               goto bad;
+       if (fmode & FWRITE)
+               vp->v_writecount++;
+       return (0);
+bad:
+       vput(vp);
+       return (error);
+}
+
+/*
+ * Check for write permissions on the specified vnode.
+ * The read-only status of the file system is checked.
+ * Also, prototype text segments cannot be written.
+ */
+vn_writechk(vp)
+       register struct vnode *vp;
+{
+
        /*
        /*
-        * If you're the super-user,
-        * you always get access.
+        * Disallow write attempts on read-only file systems;
+        * unless the file is a socket or a block or character
+        * device resident on the file system.
         */
         */
-       if (u.u_uid == 0)
-               return (0);
+       if (vp->v_mount->mnt_flag & MNT_RDONLY) {
+               switch (vp->v_type) {
+               case VREG: case VDIR: case VLNK:
+                       return (EROFS);
+               }
+       }
        /*
        /*
-        * Access check is based on only
-        * one of owner, group, public.
-        * If not owner, then check group.
-        * If not a member of the group, then
-        * check public access.
+        * If there's shared text associated with
+        * the vnode, try to free it up once.  If
+        * we fail, we can't allow writing.
         */
         */
-       if (u.u_uid != ip->i_uid) {
-               m >>= 3;
-               if (u.u_gid == ip->i_gid)
-                       goto found;
-               gp = u.u_groups;
-               for (; gp < &u.u_groups[NGROUPS] && *gp != NOGROUP; gp++)
-                       if (ip->i_gid == *gp)
-                               goto found;
-               m >>= 3;
-found:
-               ;
-       }
-       if ((ip->i_mode&m) != 0)
-               return (0);
-       u.u_error = EACCES;
-       return (1);
+       if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+               return (ETXTBSY);
+       return (0);
 }
 
 /*
 }
 
 /*
- * Look up a pathname and test if
- * the resultant inode is owned by the
- * current user.
- * If not, try for super-user.
- * If permission is granted,
- * return inode pointer.
+ * Vnode close call
  */
  */
-struct inode *
-owner(follow)
-       int follow;
+vn_close(vp, flags, cred, p)
+       register struct vnode *vp;
+       int flags;
+       struct ucred *cred;
+       struct proc *p;
 {
 {
-       register struct inode *ip;
-
-       ip = namei(uchar, LOOKUP, follow);
-       if (ip == NULL)
-               return (NULL);
-       if (u.u_uid == ip->i_uid)
-               return (ip);
-       if (suser())
-               return (ip);
-       iput(ip);
-       return (NULL);
+       int error;
+
+       if (flags & FWRITE)
+               vp->v_writecount--;
+       error = VOP_CLOSE(vp, flags, cred, p);
+       vrele(vp);
+       return (error);
 }
 
 /*
 }
 
 /*
- * Test if the current user is the
- * super user.
+ * Package up an I/O request on a vnode into a uio and do it.
  */
  */
-suser()
+vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
+       enum uio_rw rw;
+       struct vnode *vp;
+       caddr_t base;
+       int len;
+       off_t offset;
+       enum uio_seg segflg;
+       int ioflg;
+       struct ucred *cred;
+       int *aresid;
+       struct proc *p;
 {
 {
+       struct uio auio;
+       struct iovec aiov;
+       int error;
 
 
-       if (u.u_uid == 0) {
-               u.u_acflag |= ASU;
-               return (1);
+       if ((ioflg & IO_NODELOCKED) == 0)
+               vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+       auio.uio_iov = &aiov;
+       auio.uio_iovcnt = 1;
+       aiov.iov_base = base;
+       aiov.iov_len = len;
+       auio.uio_resid = len;
+       auio.uio_offset = offset;
+       auio.uio_segflg = segflg;
+       auio.uio_rw = rw;
+       auio.uio_procp = p;
+       if (rw == UIO_READ) {
+               error = VOP_READ(vp, &auio, ioflg, cred);
+       } else {
+               error = VOP_WRITE(vp, &auio, ioflg, cred);
        }
        }
-       u.u_error = EPERM;
+       if (aresid)
+               *aresid = auio.uio_resid;
+       else
+               if (auio.uio_resid && error == 0)
+                       error = EIO;
+       if ((ioflg & IO_NODELOCKED) == 0)
+               VOP_UNLOCK(vp, 0, p);
+       return (error);
+}
+
+/*
+ * File table vnode read routine.
+ */
+vn_read(fp, uio, cred)
+       struct file *fp;
+       struct uio *uio;
+       struct ucred *cred;
+{
+       struct vnode *vp = (struct vnode *)fp->f_data;
+       struct proc *p = uio->uio_procp;
+       int count, error;
+
+       VOP_LEASE(vp, p, cred, LEASE_READ);
+       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+       uio->uio_offset = fp->f_offset;
+       count = uio->uio_resid;
+       error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0,
+               cred);
+       fp->f_offset += count - uio->uio_resid;
+       VOP_UNLOCK(vp, 0, p);
+       return (error);
+}
+
+/*
+ * File table vnode write routine.
+ */
+vn_write(fp, uio, cred)
+       struct file *fp;
+       struct uio *uio;
+       struct ucred *cred;
+{
+       struct vnode *vp = (struct vnode *)fp->f_data;
+       struct proc *p = uio->uio_procp;
+       int count, error, ioflag = IO_UNIT;
+
+       if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
+               ioflag |= IO_APPEND;
+       if (fp->f_flag & FNONBLOCK)
+               ioflag |= IO_NDELAY;
+       if ((fp->f_flag & O_FSYNC) || (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
+               ioflag |= IO_SYNC;
+       VOP_LEASE(vp, p, cred, LEASE_WRITE);
+       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+       uio->uio_offset = fp->f_offset;
+       count = uio->uio_resid;
+       error = VOP_WRITE(vp, uio, ioflag, cred);
+       if (ioflag & IO_APPEND)
+               fp->f_offset = uio->uio_offset;
+       else
+               fp->f_offset += count - uio->uio_resid;
+       VOP_UNLOCK(vp, 0, p);
+       return (error);
+}
+
+/*
+ * File table vnode stat routine.
+ */
+vn_stat(vp, sb, p)
+       struct vnode *vp;
+       register struct stat *sb;
+       struct proc *p;
+{
+       struct vattr vattr;
+       register struct vattr *vap;
+       int error;
+       u_short mode;
+
+       vap = &vattr;
+       error = VOP_GETATTR(vp, vap, p->p_ucred, p);
+       if (error)
+               return (error);
+       /*
+        * Copy from vattr table
+        */
+       sb->st_dev = vap->va_fsid;
+       sb->st_ino = vap->va_fileid;
+       mode = vap->va_mode;
+       switch (vp->v_type) {
+       case VREG:
+               mode |= S_IFREG;
+               break;
+       case VDIR:
+               mode |= S_IFDIR;
+               break;
+       case VBLK:
+               mode |= S_IFBLK;
+               break;
+       case VCHR:
+               mode |= S_IFCHR;
+               break;
+       case VLNK:
+               mode |= S_IFLNK;
+               break;
+       case VSOCK:
+               mode |= S_IFSOCK;
+               break;
+       case VFIFO:
+               mode |= S_IFIFO;
+               break;
+       default:
+               return (EBADF);
+       };
+       sb->st_mode = mode;
+       sb->st_nlink = vap->va_nlink;
+       sb->st_uid = vap->va_uid;
+       sb->st_gid = vap->va_gid;
+       sb->st_rdev = vap->va_rdev;
+       sb->st_size = vap->va_size;
+       sb->st_atimespec = vap->va_atime;
+       sb->st_mtimespec = vap->va_mtime;
+       sb->st_ctimespec = vap->va_ctime;
+       sb->st_blksize = vap->va_blocksize;
+       sb->st_flags = vap->va_flags;
+       sb->st_gen = vap->va_gen;
+       sb->st_blocks = vap->va_bytes / S_BLKSIZE;
        return (0);
 }
        return (0);
 }
+
+/*
+ * File table vnode ioctl routine.
+ */
+vn_ioctl(fp, com, data, p)
+       struct file *fp;
+       u_long com;
+       caddr_t data;
+       struct proc *p;
+{
+       register struct vnode *vp = ((struct vnode *)fp->f_data);
+       struct vattr vattr;
+       int error;
+
+       switch (vp->v_type) {
+
+       case VREG:
+       case VDIR:
+               if (com == FIONREAD) {
+                       if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+                               return (error);
+                       *(int *)data = vattr.va_size - fp->f_offset;
+                       return (0);
+               }
+               if (com == FIONBIO || com == FIOASYNC)  /* XXX */
+                       return (0);                     /* XXX */
+               /* fall into ... */
+
+       default:
+               return (ENOTTY);
+
+       case VFIFO:
+       case VCHR:
+       case VBLK:
+               error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
+               if (error == 0 && com == TIOCSCTTY) {
+                       if (p->p_session->s_ttyvp)
+                               vrele(p->p_session->s_ttyvp);
+                       p->p_session->s_ttyvp = vp;
+                       VREF(vp);
+               }
+               return (error);
+       }
+}
+
+/*
+ * File table vnode select routine.
+ */
+vn_select(fp, which, p)
+       struct file *fp;
+       int which;
+       struct proc *p;
+{
+
+       return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag,
+               fp->f_cred, p));
+}
+
+/*
+ * Check that the vnode is still valid, and if so
+ * acquire requested lock.
+ */
+int
+vn_lock(vp, flags, p)
+       struct vnode *vp;
+       int flags;
+       struct proc *p;
+{
+       int error;
+       
+       do {
+               if ((flags & LK_INTERLOCK) == 0)
+                       simple_lock(&vp->v_interlock);
+               if (vp->v_flag & VXLOCK) {
+                       vp->v_flag |= VXWANT;
+                       simple_unlock(&vp->v_interlock);
+                       tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
+                       error = ENOENT;
+               } else {
+                       error = VOP_LOCK(vp, flags | LK_INTERLOCK, p);
+                       if (error == 0)
+                               return (error);
+               }
+               flags &= ~LK_INTERLOCK;
+       } while (flags & LK_RETRY);
+       return (error);
+}
+
+/*
+ * File table vnode close routine.
+ */
+vn_closefile(fp, p)
+       struct file *fp;
+       struct proc *p;
+{
+
+       return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
+               fp->f_cred, p));
+}