X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/c6fd5824aaad3a5ad7573ebfbd29123d09eba1f1..e0ed5a0763e4a389986f0387404fa8abb7a9d214:/usr/src/sys/kern/kern_descrip.c

diff --git a/usr/src/sys/kern/kern_descrip.c b/usr/src/sys/kern/kern_descrip.c
index e932326cc4..a3517d7dac 100644
--- a/usr/src/sys/kern/kern_descrip.c
+++ b/usr/src/sys/kern/kern_descrip.c
@@ -1,350 +1,538 @@
 /*
- * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
+ * Copyright (c) 1982, 1986, 1989, 1991 Regents of the University of California.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms are permitted
- * provided that the above copyright notice and this paragraph are
- * duplicated in all such forms and that any documentation,
- * advertising materials, and other materials related to such
- * distribution and use acknowledge that the software was developed
- * by the University of California, Berkeley.  The name of the
- * University may not be used to endorse or promote products derived
- * from this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ * %sccs.include.redist.c%
  *
- *	@(#)kern_descrip.c	7.6 (Berkeley) %G%
+ *	@(#)kern_descrip.c	7.41 (Berkeley) %G%
  */
 
-#include "param.h"
-#include "systm.h"
-#include "syscontext.h"
-#include "kernel.h"
-#include "vnode.h"
-#include "proc.h"
-#include "file.h"
-#include "socket.h"
-#include "socketvar.h"
-#include "mount.h"
-#include "stat.h"
-
-#include "ioctl.h"
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/syslog.h>
+#include <sys/resourcevar.h>
 
 /*
  * Descriptor management.
  */
+struct file *filehead;	/* head of list of open files */
+int nfiles;		/* actual number of open files */
 
 /*
  * System calls on descriptors.
  */
-getdtablesize()
-{
-
-	u.u_r.r_val1 = NOFILE;
-}
-
-dup()
+struct getdtablesize_args {
+	int	dummy;
+};
+/* ARGSUSED */
+getdtablesize(p, uap, retval)
+	struct proc *p;
+	struct getdtablesize_args *uap;
+	int *retval;
 {
-	register struct a {
-		int	i;
-	} *uap = (struct a *) u.u_ap;
-	struct file *fp;
-	int j;
-
-	if (uap->i &~ 077) { uap->i &= 077; dup2(); return; }	/* XXX */
 
-	if ((unsigned)uap->i >= NOFILE || (fp = u.u_ofile[uap->i]) == NULL)
-		RETURN (EBADF);
-	if (u.u_error = ufalloc(0, &j))
-		return;
-	u.u_r.r_val1 = j;
-	dupit(j, fp, u.u_pofile[uap->i] &~ UF_EXCLOSE);
+	*retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	return (0);
 }
 
-dup2()
+/*
+ * Duplicate a file descriptor.
+ */
+struct dup_args {
+	u_int	fd;
+};
+/* ARGSUSED */
+dup(p, uap, retval)
+	struct proc *p;
+	struct dup_args *uap;
+	int *retval;
 {
-	register struct a {
-		int	i, j;
-	} *uap = (struct a *) u.u_ap;
-	register struct file *fp;
-	int error;
+	register struct filedesc *fdp;
+	u_int old;
+	int new, error;
 
-	if ((unsigned)uap->i >= NOFILE || (fp = u.u_ofile[uap->i]) == NULL)
-		RETURN (EBADF);
-	if (uap->j < 0 || uap->j >= NOFILE)
-		RETURN (EBADF);
-	u.u_r.r_val1 = uap->j;
-	if (uap->i == uap->j)
-		RETURN (0);
-	if (u.u_ofile[uap->j]) {
-		if (u.u_pofile[uap->j] & UF_MAPPED)
-			munmapfd(uap->j);
-		error = closef(u.u_ofile[uap->j]);
-	}
-	dupit(uap->j, fp, u.u_pofile[uap->i] &~ UF_EXCLOSE);
+	old = uap->fd;
 	/*
-	 * dup2() must succeed even though the close had an error.
+	 * XXX Compatibility
 	 */
-	error = 0;		/* XXX */
-	RETURN (error);
+	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); }
+
+	fdp = p->p_fd;
+	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
+		return (EBADF);
+	if (error = fdalloc(p, 0, &new))
+		return (error);
+	return (finishdup(fdp, (int)old, new, retval));
 }
 
-dupit(fd, fp, flags)
-	int fd;
-	register struct file *fp;
-	register int flags;
+/*
+ * Duplicate a file descriptor to a particular value.
+ */
+struct dup2_args {
+	u_int	from;
+	u_int	to;
+};
+/* ARGSUSED */
+dup2(p, uap, retval)
+	struct proc *p;
+	struct dup2_args *uap;
+	int *retval;
 {
-
-	u.u_ofile[fd] = fp;
-	u.u_pofile[fd] = flags;
-	fp->f_count++;
-	if (fd > u.u_lastfile)
-		u.u_lastfile = fd;
+	register struct filedesc *fdp = p->p_fd;
+	register u_int old = uap->from, new = uap->to;
+	int i, error;
+
+	if (old >= fdp->fd_nfiles ||
+	    fdp->fd_ofiles[old] == NULL ||
+	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+	    new >= maxfiles)
+		return (EBADF);
+	if (old == new) {
+		*retval = new;
+		return (0);
+	}
+	if (new >= fdp->fd_nfiles) {
+		if (error = fdalloc(p, new, &i))
+			return (error);
+		if (new != i)
+			panic("dup2: fdalloc");
+	} else if (fdp->fd_ofiles[new]) {
+		if (fdp->fd_ofileflags[new] & UF_MAPPED)
+			(void) munmapfd(p, new);
+		/*
+		 * dup2() must succeed even if the close has an error.
+		 */
+		(void) closef(fdp->fd_ofiles[new], p);
+	}
+	return (finishdup(fdp, (int)old, (int)new, retval));
 }
 
 /*
  * The file control system call.
  */
-fcntl()
+struct fcntl_args {
+	int	fd;
+	int	cmd;
+	int	arg;
+};
+/* ARGSUSED */
+fcntl(p, uap, retval)
+	struct proc *p;
+	register struct fcntl_args *uap;
+	int *retval;
 {
+	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
-	register struct a {
-		int	fdes;
-		int	cmd;
-		int	arg;
-	} *uap = (struct a *)u.u_ap;
 	register char *pop;
-	int i;
+	struct vnode *vp;
+	int i, tmp, error, flg = F_POSIX;
+	struct flock fl;
+	u_int newmin;
+
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	pop = &fdp->fd_ofileflags[uap->fd];
+	switch (uap->cmd) {
 
-	if ((unsigned)uap->fdes >= NOFILE ||
-	    (fp = u.u_ofile[uap->fdes]) == NULL)
-		RETURN (EBADF);
-	pop = &u.u_pofile[uap->fdes];
-	switch(uap->cmd) {
 	case F_DUPFD:
-		if (uap->arg < 0 || uap->arg >= NOFILE) {
-			u.u_error = EINVAL;
-			return;
-		}
-		if (u.u_error = ufalloc(uap->arg, &i))
-			return;
-		u.u_r.r_val1 = i;
-		dupit(i, fp, *pop &~ UF_EXCLOSE);
-		break;
+		newmin = uap->arg;
+		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
+		    newmin >= maxfiles)
+			return (EINVAL);
+		if (error = fdalloc(p, newmin, &i))
+			return (error);
+		return (finishdup(fdp, uap->fd, i, retval));
 
 	case F_GETFD:
-		u.u_r.r_val1 = *pop & 1;
-		break;
+		*retval = *pop & 1;
+		return (0);
 
 	case F_SETFD:
 		*pop = (*pop &~ 1) | (uap->arg & 1);
-		break;
+		return (0);
 
 	case F_GETFL:
-		u.u_r.r_val1 = fp->f_flag+FOPEN;
-		break;
+		*retval = OFLAGS(fp->f_flag);
+		return (0);
 
 	case F_SETFL:
-		fp->f_flag &= FCNTLCANT;
-		fp->f_flag |= (uap->arg-FOPEN) &~ FCNTLCANT;
-		u.u_error = fset(fp, FNDELAY, fp->f_flag & FNDELAY);
-		if (u.u_error)
-			break;
-		u.u_error = fset(fp, FASYNC, fp->f_flag & FASYNC);
-		if (u.u_error)
-			(void) fset(fp, FNDELAY, 0);
-		break;
+		fp->f_flag &= ~FCNTLFLAGS;
+		fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS;
+		tmp = fp->f_flag & FNONBLOCK;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+		if (error)
+			return (error);
+		tmp = fp->f_flag & FASYNC;
+		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
+		if (!error)
+			return (0);
+		fp->f_flag &= ~FNONBLOCK;
+		tmp = 0;
+		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
+		return (error);
 
 	case F_GETOWN:
-		u.u_error = fgetown(fp, &u.u_r.r_val1);
-		break;
+		if (fp->f_type == DTYPE_SOCKET) {
+			*retval = ((struct socket *)fp->f_data)->so_pgid;
+			return (0);
+		}
+		error = (*fp->f_ops->fo_ioctl)
+			(fp, (int)TIOCGPGRP, (caddr_t)retval, p);
+		*retval = -*retval;
+		return (error);
 
 	case F_SETOWN:
-		u.u_error = fsetown(fp, uap->arg);
-		break;
+		if (fp->f_type == DTYPE_SOCKET) {
+			((struct socket *)fp->f_data)->so_pgid = uap->arg;
+			return (0);
+		}
+		if (uap->arg <= 0) {
+			uap->arg = -uap->arg;
+		} else {
+			struct proc *p1 = pfind(uap->arg);
+			if (p1 == 0)
+				return (ESRCH);
+			uap->arg = p1->p_pgrp->pg_id;
+		}
+		return ((*fp->f_ops->fo_ioctl)
+			(fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p));
+
+	case F_SETLKW:
+		flg |= F_WAIT;
+		/* Fall into F_SETLK */
+
+	case F_SETLK:
+		if (fp->f_type != DTYPE_VNODE)
+			return (EBADF);
+		vp = (struct vnode *)fp->f_data;
+		/* Copy in the lock structure */
+		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
+		if (error)
+			return (error);
+		if (fl.l_whence == SEEK_CUR)
+			fl.l_start += fp->f_offset;
+		switch (fl.l_type) {
+
+		case F_RDLCK:
+			if ((fp->f_flag & FREAD) == 0)
+				return (EBADF);
+			p->p_flag |= SADVLCK;
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+		case F_WRLCK:
+			if ((fp->f_flag & FWRITE) == 0)
+				return (EBADF);
+			p->p_flag |= SADVLCK;
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
+
+		case F_UNLCK:
+			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
+				F_POSIX));
+
+		default:
+			return (EINVAL);
+		}
+
+	case F_GETLK:
+		if (fp->f_type != DTYPE_VNODE)
+			return (EBADF);
+		vp = (struct vnode *)fp->f_data;
+		/* Copy in the lock structure */
+		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
+		if (error)
+			return (error);
+		if (fl.l_whence == SEEK_CUR)
+			fl.l_start += fp->f_offset;
+		if (error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX))
+			return (error);
+		return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
 
 	default:
-		u.u_error = EINVAL;
+		return (EINVAL);
 	}
+	/* NOTREACHED */
 }
 
-fset(fp, bit, value)
-	struct file *fp;
-	int bit, value;
+/*
+ * Common code for dup, dup2, and fcntl(F_DUPFD).
+ */
+int
+finishdup(fdp, old, new, retval)
+	register struct filedesc *fdp;
+	register int old, new, *retval;
 {
+	register struct file *fp;
 
-	if (value)
-		fp->f_flag |= bit;
-	else
-		fp->f_flag &= ~bit;
-	return (fioctl(fp, (int)(bit == FNDELAY ? FIONBIO : FIOASYNC),
-	    (caddr_t)&value));
+	fp = fdp->fd_ofiles[old];
+	fdp->fd_ofiles[new] = fp;
+	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+	fp->f_count++;
+	if (new > fdp->fd_lastfile)
+		fdp->fd_lastfile = new;
+	*retval = new;
+	return (0);
 }
 
-fgetown(fp, valuep)
-	struct file *fp;
-	int *valuep;
+/*
+ * Close a file descriptor.
+ */
+struct close_args {
+	int	fd;
+};
+/* ARGSUSED */
+close(p, uap, retval)
+	struct proc *p;
+	struct close_args *uap;
+	int *retval;
 {
-	int error;
-
-	switch (fp->f_type) {
-
-	case DTYPE_SOCKET:
-		*valuep = ((struct socket *)fp->f_data)->so_pgid;
-		return (0);
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	register int fd = uap->fd;
+	register u_char *pf;
 
-	default:
-		error = fioctl(fp, (int)TIOCGPGRP, (caddr_t)valuep);
-		*valuep = -*valuep;
-		return (error);
-	}
+	if ((unsigned)fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+	pf = (u_char *)&fdp->fd_ofileflags[fd];
+	if (*pf & UF_MAPPED)
+		(void) munmapfd(p, fd);
+	fdp->fd_ofiles[fd] = NULL;
+	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+		fdp->fd_lastfile--;
+	if (fd < fdp->fd_freefile)
+		fdp->fd_freefile = fd;
+	*pf = 0;
+	return (closef(fp, p));
 }
 
-fsetown(fp, value)
-	struct file *fp;
-	int value;
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Return status information about a file descriptor.
+ */
+struct ofstat_args {
+	int	fd;
+	struct	ostat *sb;
+};
+/* ARGSUSED */
+ofstat(p, uap, retval)
+	struct proc *p;
+	register struct ofstat_args *uap;
+	int *retval;
 {
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct stat ub;
+	struct ostat oub;
+	int error;
 
-	if (fp->f_type == DTYPE_SOCKET) {
-		((struct socket *)fp->f_data)->so_pgid = value;
-		return (0);
-	}
-	if (value > 0) {
-		struct proc *p = pfind(value);
-		if (p == 0)
-			return (ESRCH);
-		value = p->p_pgrp->pg_id;
-	} else
-		value = -value;
-	return (fioctl(fp, (int)TIOCSPGRP, (caddr_t)&value));
-}
-
-fioctl(fp, cmd, value)
-	struct file *fp;
-	int cmd;
-	caddr_t value;
-{
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	switch (fp->f_type) {
 
-	return ((*fp->f_ops->fo_ioctl)(fp, cmd, value));
-}
+	case DTYPE_VNODE:
+		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
+		break;
 
-close()
-{
-	struct a {
-		int	fdes;
-	} *uap = (struct a *)u.u_ap;
-	register struct file *fp;
-	register u_char *pf;
+	case DTYPE_SOCKET:
+		error = soo_stat((struct socket *)fp->f_data, &ub);
+		break;
 
-	if ((unsigned)uap->fdes >= NOFILE ||
-	    (fp = u.u_ofile[uap->fdes]) == NULL)
-		RETURN (EBADF);
-	pf = (u_char *)&u.u_pofile[uap->fdes];
-	if (*pf & UF_MAPPED)
-		munmapfd(uap->fdes);
-	u.u_ofile[uap->fdes] = NULL;
-	while (u.u_lastfile >= 0 && u.u_ofile[u.u_lastfile] == NULL)
-		u.u_lastfile--;
-	*pf = 0;
-	RETURN (closef(fp));
+	default:
+		panic("ofstat");
+		/*NOTREACHED*/
+	}
+	cvtstat(&ub, &oub);
+	if (error == 0)
+		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
+	return (error);
 }
+#endif /* COMPAT_43 || COMPAT_SUNOS */
 
-fstat()
+/*
+ * Return status information about a file descriptor.
+ */
+struct fstat_args {
+	int	fd;
+	struct	stat *sb;
+};
+/* ARGSUSED */
+fstat(p, uap, retval)
+	struct proc *p;
+	register struct fstat_args *uap;
+	int *retval;
 {
+	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
-	register struct a {
-		int	fdes;
-		struct	stat *sb;
-	} *uap = (struct a *)u.u_ap;
 	struct stat ub;
+	int error;
 
-	if ((unsigned)uap->fdes >= NOFILE ||
-	    (fp = u.u_ofile[uap->fdes]) == NULL)
-		RETURN (EBADF);
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
 	switch (fp->f_type) {
 
 	case DTYPE_VNODE:
-		u.u_error = vn_stat((struct vnode *)fp->f_data, &ub);
+		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
 		break;
 
 	case DTYPE_SOCKET:
-		u.u_error = soo_stat((struct socket *)fp->f_data, &ub);
+		error = soo_stat((struct socket *)fp->f_data, &ub);
 		break;
 
 	default:
 		panic("fstat");
 		/*NOTREACHED*/
 	}
-	if (u.u_error == 0)
-		u.u_error = copyout((caddr_t)&ub, (caddr_t)uap->sb,
-		    sizeof (ub));
+	if (error == 0)
+		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
+	return (error);
 }
 
 /*
- * Allocate a user file descriptor.
+ * Allocate a file descriptor for the process.
  */
-ufalloc(want, result)
-	register int want;
+int fdexpand;
+
+fdalloc(p, want, result)
+	struct proc *p;
+	int want;
 	int *result;
 {
+	register struct filedesc *fdp = p->p_fd;
+	register int i;
+	int lim, last, nfiles;
+	struct file **newofile;
+	char *newofileflags;
 
-	for (; want < NOFILE; want++)
-		if (u.u_ofile[want] == NULL) {
-			u.u_pofile[want] = 0;
-			if (want > u.u_lastfile)
-				u.u_lastfile = want;
-			if (result)
-				*result = want;
-			return (0);
+	/*
+	 * Search for a free descriptor starting at the higher
+	 * of want or fd_freefile.  If that fails, consider
+	 * expanding the ofile array.
+	 */
+	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	for (;;) {
+		last = min(fdp->fd_nfiles, lim);
+		if ((i = want) < fdp->fd_freefile)
+			i = fdp->fd_freefile;
+		for (; i < last; i++) {
+			if (fdp->fd_ofiles[i] == NULL) {
+				fdp->fd_ofileflags[i] = 0;
+				if (i > fdp->fd_lastfile)
+					fdp->fd_lastfile = i;
+				if (want <= fdp->fd_freefile)
+					fdp->fd_freefile = i;
+				*result = i;
+				return (0);
+			}
 		}
-	return (EMFILE);
+
+		/*
+		 * No space in current array.  Expand?
+		 */
+		if (fdp->fd_nfiles >= lim)
+			return (EMFILE);
+		if (fdp->fd_nfiles < NDEXTENT)
+			nfiles = NDEXTENT;
+		else
+			nfiles = 2 * fdp->fd_nfiles;
+		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
+		    M_FILEDESC, M_WAITOK);
+		newofileflags = (char *) &newofile[nfiles];
+		/*
+		 * Copy the existing ofile and ofileflags arrays
+		 * and zero the new portion of each array.
+		 */
+		bcopy(fdp->fd_ofiles, newofile,
+			(i = sizeof(struct file *) * fdp->fd_nfiles));
+		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
+		bcopy(fdp->fd_ofileflags, newofileflags,
+			(i = sizeof(char) * fdp->fd_nfiles));
+		bzero(newofileflags + i, nfiles * sizeof(char) - i);
+		if (fdp->fd_nfiles > NDFILE)
+			FREE(fdp->fd_ofiles, M_FILEDESC);
+		fdp->fd_ofiles = newofile;
+		fdp->fd_ofileflags = newofileflags;
+		fdp->fd_nfiles = nfiles;
+		fdexpand++;
+	}
 }
 
-ufavail()
+/*
+ * Check to see whether n user file descriptors
+ * are available to the process p.
+ */
+fdavail(p, n)
+	struct proc *p;
+	register int n;
 {
-	register int i, avail = 0;
-
-	for (i = 0; i < NOFILE; i++)
-		if (u.u_ofile[i] == NULL)
-			avail++;
-	return (avail);
+	register struct filedesc *fdp = p->p_fd;
+	register struct file **fpp;
+	register int i, lim;
+
+	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
+	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+		return (1);
+	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++)
+		if (*fpp == NULL && --n <= 0)
+			return (1);
+	return (0);
 }
 
-struct	file *lastf;
 /*
- * Allocate a user file descriptor
- * and a file structure.
- * Initialize the descriptor
- * to point at the file structure.
+ * Create a new open file structure and allocate
+ * a file decriptor for the process that refers to it.
  */
-falloc(resultfp, resultfd)
+falloc(p, resultfp, resultfd)
+	register struct proc *p;
 	struct file **resultfp;
 	int *resultfd;
 {
-	register struct file *fp;
+	register struct file *fp, *fq, **fpp;
 	int error, i;
 
-	if (error = ufalloc(0, &i))
+	if (error = fdalloc(p, 0, &i))
 		return (error);
-	if (lastf == 0)
-		lastf = file;
-	for (fp = lastf; fp < fileNFILE; fp++)
-		if (fp->f_count == 0)
-			goto slot;
-	for (fp = file; fp < lastf; fp++)
-		if (fp->f_count == 0)
-			goto slot;
-	tablefull("file");
-	return (ENFILE);
-slot:
-	u.u_ofile[i] = fp;
+	if (nfiles >= maxfiles) {
+		tablefull("file");
+		return (ENFILE);
+	}
+	/*
+	 * Allocate a new file descriptor.
+	 * If the process has file descriptor zero open, add to the list
+	 * of open files at that point, otherwise put it at the front of
+	 * the list of open files.
+	 */
+	nfiles++;
+	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
+	if (fq = p->p_fd->fd_ofiles[0])
+		fpp = &fq->f_filef;
+	else
+		fpp = &filehead;
+	p->p_fd->fd_ofiles[i] = fp;
+	if (fq = *fpp)
+		fq->f_fileb = &fp->f_filef;
+	fp->f_filef = fq;
+	fp->f_fileb = fpp;
+	*fpp = fp;
 	fp->f_count = 1;
-	fp->f_data = 0;
+	fp->f_msgcount = 0;
 	fp->f_offset = 0;
-	fp->f_cred = u.u_cred;
+	fp->f_cred = p->p_ucred;
 	crhold(fp->f_cred);
-	lastf = fp + 1;
 	if (resultfp)
 		*resultfp = fp;
 	if (resultfd)
@@ -352,119 +540,304 @@ slot:
 	return (0);
 }
 
+/*
+ * Free a file descriptor.
+ */
+ffree(fp)
+	register struct file *fp;
+{
+	register struct file *fq;
+
+	if (fq = fp->f_filef)
+		fq->f_fileb = fp->f_fileb;
+	*fp->f_fileb = fq;
+	crfree(fp->f_cred);
+#ifdef DIAGNOSTIC
+	fp->f_filef = NULL;
+	fp->f_fileb = NULL;
+	fp->f_count = 0;
+#endif
+	nfiles--;
+	FREE(fp, M_FILE);
+}
+
+/*
+ * Copy a filedesc structure.
+ */
+struct filedesc *
+fdcopy(p)
+	struct proc *p;
+{
+	register struct filedesc *newfdp, *fdp = p->p_fd;
+	register struct file **fpp;
+	register int i;
+
+	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
+	    M_FILEDESC, M_WAITOK);
+	bcopy(fdp, newfdp, sizeof(struct filedesc));
+	VREF(newfdp->fd_cdir);
+	if (newfdp->fd_rdir)
+		VREF(newfdp->fd_rdir);
+	newfdp->fd_refcnt = 1;
+
+	/*
+	 * If the number of open files fits in the internal arrays
+	 * of the open file structure, use them, otherwise allocate
+	 * additional memory for the number of descriptors currently
+	 * in use.
+	 */
+	if (newfdp->fd_lastfile < NDFILE) {
+		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
+		newfdp->fd_ofileflags =
+		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
+		i = NDFILE;
+	} else {
+		/*
+		 * Compute the smallest multiple of NDEXTENT needed
+		 * for the file descriptors currently in use,
+		 * allowing the table to shrink.
+		 */
+		i = newfdp->fd_nfiles;
+		while (i > 2 * NDEXTENT && i >= newfdp->fd_lastfile * 2)
+			i /= 2;
+		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
+		    M_FILEDESC, M_WAITOK);
+		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
+	}
+	newfdp->fd_nfiles = i;
+	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
+	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
+	fpp = newfdp->fd_ofiles;
+	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
+		if (*fpp != NULL)
+			(*fpp)->f_count++;
+	return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(p)
+	struct proc *p;
+{
+	register struct filedesc *fdp = p->p_fd;
+	struct file **fpp;
+	register int i;
+
+	if (--fdp->fd_refcnt > 0)
+		return;
+	fpp = fdp->fd_ofiles;
+	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
+		if (*fpp)
+			(void) closef(*fpp, p);
+	if (fdp->fd_nfiles > NDFILE)
+		FREE(fdp->fd_ofiles, M_FILEDESC);
+	vrele(fdp->fd_cdir);
+	if (fdp->fd_rdir)
+		vrele(fdp->fd_rdir);
+	FREE(fdp, M_FILEDESC);
+}
+
 /*
  * Internal form of close.
  * Decrement reference count on file structure.
+ * Note: p may be NULL when closing a file
+ * that was being passed in a message.
  */
-closef(fp)
+closef(fp, p)
 	register struct file *fp;
+	register struct proc *p;
 {
+	struct vnode *vp;
+	struct flock lf;
 	int error;
 
 	if (fp == NULL)
 		return (0);
-	if (fp->f_count > 1) {
-		fp->f_count--;
+	/*
+	 * POSIX record locking dictates that any close releases ALL
+	 * locks owned by this process.  This is handled by setting
+	 * a flag in the unlock to free ONLY locks obeying POSIX
+	 * semantics, and not to free BSD-style file locks.
+	 * If the descriptor was in a message, POSIX-style locks
+	 * aren't passed with the descriptor.
+	 */
+	if (p && (p->p_flag & SADVLCK) && fp->f_type == DTYPE_VNODE) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		lf.l_type = F_UNLCK;
+		vp = (struct vnode *)fp->f_data;
+		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
+	}
+	if (--fp->f_count > 0)
 		return (0);
+	if (fp->f_count < 0)
+		panic("closef: count < 0");
+	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		lf.l_type = F_UNLCK;
+		vp = (struct vnode *)fp->f_data;
+		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
 	}
-	if (fp->f_count < 1)
-		panic("closef: count < 1");
-	error = (*fp->f_ops->fo_close)(fp);
-	crfree(fp->f_cred);
-	fp->f_count = 0;
+	error = (*fp->f_ops->fo_close)(fp, p);
+	ffree(fp);
 	return (error);
 }
 
 /*
  * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on
+ * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
  */
-flock()
+struct flock_args {
+	int	fd;
+	int	how;
+};
+/* ARGSUSED */
+flock(p, uap, retval)
+	struct proc *p;
+	register struct flock_args *uap;
+	int *retval;
 {
-	register struct a {
-		int	fdes;
-		int	how;
-	} *uap = (struct a *)u.u_ap;
+	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
+	struct vnode *vp;
+	struct flock lf;
+	int error;
 
-	if ((unsigned)uap->fdes >= NOFILE ||
-	    (fp = u.u_ofile[uap->fdes]) == NULL)
-		RETURN (EBADF);
-	if (fp->f_type != DTYPE_VNODE) {
-		u.u_error = EOPNOTSUPP;
-		return;
-	}
+	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
+	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+		return (EBADF);
+	if (fp->f_type != DTYPE_VNODE)
+		return (EOPNOTSUPP);
+	vp = (struct vnode *)fp->f_data;
+	lf.l_whence = SEEK_SET;
+	lf.l_start = 0;
+	lf.l_len = 0;
 	if (uap->how & LOCK_UN) {
-		vn_unlock(fp, FSHLOCK|FEXLOCK);
-		return;
+		lf.l_type = F_UNLCK;
+		fp->f_flag &= ~FHASLOCK;
+		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
 	}
-	if ((uap->how & (LOCK_SH | LOCK_EX)) == 0)
-		return;					/* error? */
 	if (uap->how & LOCK_EX)
-		uap->how &= ~LOCK_SH;
-	/* avoid work... */
-	if ((fp->f_flag & FEXLOCK) && (uap->how & LOCK_EX) ||
-	    (fp->f_flag & FSHLOCK) && (uap->how & LOCK_SH))
-		return;
-	u.u_error = vn_lock(fp, uap->how);
+		lf.l_type = F_WRLCK;
+	else if (uap->how & LOCK_SH)
+		lf.l_type = F_RDLCK;
+	else
+		return (EBADF);
+	fp->f_flag |= FHASLOCK;
+	if (uap->how & LOCK_NB)
+		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
+	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
 }
 
 /*
  * File Descriptor pseudo-device driver (/dev/fd/).
  *
- * Fred Blonder - U of Maryland	11-Sep-1984
- *
  * Opening minor device N dup()s the file (if any) connected to file
  * descriptor N belonging to the calling process.  Note that this driver
  * consists of only the ``open()'' routine, because all subsequent
  * references to this file will be direct to the other driver.
  */
 /* ARGSUSED */
-fdopen(dev, mode, type)
+fdopen(dev, mode, type, p)
 	dev_t dev;
 	int mode, type;
+	struct proc *p;
 {
-	struct file *fp, *wfp;
-	int indx, dfd, rwmode;
 
 	/*
-	 * Note the horrid kludge here: u.u_r.r_val1 contains the value
-	 * of the new file descriptor, which was set before the call to
-	 * vn_open() by copen() in vfs_syscalls.c
+	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
+	 * the file descriptor being sought for duplication. The error 
+	 * return ensures that the vnode for this device will be released
+	 * by vn_open. Open will detect this special error and take the
+	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+	 * will simply report the error.
 	 */
-	indx = u.u_r.r_val1;		/* XXX from copen */
-	if ((unsigned)indx >= NOFILE || (fp = u.u_ofile[indx]) == NULL)
-		return (EBADF);
-	dfd = minor(dev);
-	if ((unsigned)dfd >= NOFILE || (wfp = u.u_ofile[dfd]) == NULL)
-		return (EBADF);
+	p->p_dupfd = minor(dev);
+	return (ENODEV);
+}
+
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+dupfdopen(fdp, indx, dfd, mode, error)
+	register struct filedesc *fdp;
+	register int indx, dfd;
+	int mode;
+	int error;
+{
+	register struct file *wfp;
+	struct file *fp;
+	
 	/*
-	 * We must explicitly test for this case because ufalloc() may
-	 * have allocated us the same file desriptor we are referring
-	 * to, if the proccess referred to an invalid (closed) descriptor.
-	 * Ordinarily this would be caught by the check for NULL above,
-	 * but by the time we reach this routine u_pofile[minor(dev)]
-	 * could already be set to point to our file struct.
+	 * If the to-be-dup'd fd number is greater than the allowed number
+	 * of file descriptors, or the fd to be dup'd has already been
+	 * closed, reject.  Note, check for new == old is necessary as
+	 * falloc could allocate an already closed to-be-dup'd descriptor
+	 * as the new descriptor.
 	 */
-	if (fp == wfp)
+	fp = fdp->fd_ofiles[indx];
+	if ((u_int)dfd >= fdp->fd_nfiles ||
+	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
 		return (EBADF);
+
 	/*
-	 * Fake a ``dup()'' sys call.
-	 * Check that the mode the file is being opened
-	 * for is consistent with the mode of the existing
-	 * descriptor. This isn't as clean as it should be,
-	 * but this entire driver is a real kludge anyway.
-	 */
-	rwmode = mode & (FREAD|FWRITE);
-	if ((fp->f_flag & rwmode) != rwmode)
-		return (EACCES);
-	/* 
-	 * Dup the file descriptor. 
-	 */
-	dupit(indx, wfp, u.u_pofile[dfd]);
-	/*
-	 * Delete references to this pseudo-device by returning
-	 * a special error (-1) that will cause all resources to
-	 * be freed, then detected and cleared by copen.
+	 * There are two cases of interest here.
+	 *
+	 * For ENODEV simply dup (dfd) to file descriptor
+	 * (indx) and return.
+	 *
+	 * For ENXIO steal away the file structure from (dfd) and
+	 * store it in (indx).  (dfd) is effectively closed by
+	 * this operation.
+	 *
+	 * Any other error code is just returned.
 	 */
-	return (-1);
+	switch (error) {
+	case ENODEV:
+		/*
+		 * Check that the mode the file is being opened for is a
+		 * subset of the mode of the existing descriptor.
+		 */
+		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
+			return (EACCES);
+		fdp->fd_ofiles[indx] = wfp;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		wfp->f_count++;
+		if (indx > fdp->fd_lastfile)
+			fdp->fd_lastfile = indx;
+		return (0);
+
+	case ENXIO:
+		/*
+		 * Steal away the file pointer from dfd, and stuff it into indx.
+		 */
+		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+		fdp->fd_ofiles[dfd] = NULL;
+		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+		fdp->fd_ofileflags[dfd] = 0;
+		/*
+		 * Complete the clean up of the filedesc structure by
+		 * recomputing the various hints.
+		 */
+		if (indx > fdp->fd_lastfile)
+			fdp->fd_lastfile = indx;
+		else
+			while (fdp->fd_lastfile > 0 &&
+			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
+				fdp->fd_lastfile--;
+			if (dfd < fdp->fd_freefile)
+				fdp->fd_freefile = dfd;
+		return (0);
+
+	default:
+		return (error);
+	}
+	/* NOTREACHED */
 }