From 4f083fd7a06675f7d351f02e7d69eb5c7b9fcc8f Mon Sep 17 00:00:00 2001 From: Sam Leffler Date: Sun, 14 Nov 1982 06:55:08 -0800 Subject: [PATCH] merge of 4.1b and 4.1c SCCS-vsn: sys/kern/init_main.c 4.43 SCCS-vsn: sys/kern/kern_clock.c 4.45 SCCS-vsn: sys/kern/kern_descrip.c 5.18 SCCS-vsn: sys/kern/kern_proc.c 4.48 SCCS-vsn: sys/kern/kern_prot.c 5.11 SCCS-vsn: sys/kern/kern_sig.c 5.12 SCCS-vsn: sys/kern/subr_prof.c 4.2 SCCS-vsn: sys/kern/uipc_domain.c 5.6 SCCS-vsn: sys/kern/uipc_syscalls.c 4.36 SCCS-vsn: sys/kern/tty_subr.c 4.17 SCCS-vsn: sys/ufs/ffs/ffs_alloc.c 2.19 SCCS-vsn: sys/ufs/lfs/lfs_alloc.c 2.19 SCCS-vsn: sys/kern/vfs_bio.c 4.39 SCCS-vsn: sys/kern/vfs_cluster.c 4.39 SCCS-vsn: sys/ufs/ffs/ffs_balloc.c 5.3 SCCS-vsn: sys/ufs/lfs/lfs_balloc.c 5.3 SCCS-vsn: sys/kern/vfs_vnops.c 4.30 SCCS-vsn: sys/ufs/ffs/ffs_inode.c 4.31 SCCS-vsn: sys/ufs/ffs/ufs_inode.c 4.31 SCCS-vsn: sys/ufs/lfs/lfs_inode.c 4.31 SCCS-vsn: sys/ufs/ufs/ufs_inode.c 4.31 SCCS-vsn: sys/kern/vfs_lookup.c 4.30 SCCS-vsn: sys/ufs/ffs/ufs_lookup.c 4.30 SCCS-vsn: sys/ufs/ufs/ufs_lookup.c 4.30 SCCS-vsn: sys/ufs/ffs/ffs_subr.c 4.3 SCCS-vsn: sys/kern/vfs_syscalls.c 4.42 SCCS-vsn: sys/ufs/ffs/ffs_vnops.c 4.42 SCCS-vsn: sys/ufs/ffs/ufs_vnops.c 4.42 SCCS-vsn: sys/ufs/lfs/lfs_vnops.c 4.42 SCCS-vsn: sys/ufs/ufs/ufs_vnops.c 4.42 SCCS-vsn: sys/kern/uipc_mu_msg.c 4.2 SCCS-vsn: sys/kern/uipc_socket.c 4.64 SCCS-vsn: sys/kern/uipc_usrreq.c 1.3 SCCS-vsn: sys/vm/vm_swap.c 4.15 --- usr/src/sys/kern/init_main.c | 18 +- usr/src/sys/kern/kern_clock.c | 4 +- usr/src/sys/kern/kern_descrip.c | 16 +- usr/src/sys/kern/kern_proc.c | 15 +- usr/src/sys/kern/kern_prot.c | 75 +++++- usr/src/sys/kern/kern_sig.c | 7 +- usr/src/sys/kern/subr_prof.c | 8 +- usr/src/sys/kern/tty_subr.c | 5 +- usr/src/sys/kern/uipc_domain.c | 25 +- usr/src/sys/kern/uipc_mu_msg.c | 11 +- usr/src/sys/kern/uipc_socket.c | 12 +- usr/src/sys/kern/uipc_syscalls.c | 10 +- usr/src/sys/kern/uipc_usrreq.c | 229 ++++++++--------- usr/src/sys/kern/vfs_bio.c | 258 +++++++++++++------ usr/src/sys/kern/vfs_cluster.c | 258 +++++++++++++------ usr/src/sys/kern/vfs_lookup.c | 164 ++++++++++-- usr/src/sys/kern/vfs_syscalls.c | 423 +++++++++++++++++++++++++------ usr/src/sys/kern/vfs_vnops.c | 5 +- usr/src/sys/ufs/ffs/ffs_alloc.c | 251 +++++++----------- usr/src/sys/ufs/ffs/ffs_balloc.c | 21 +- usr/src/sys/ufs/ffs/ffs_inode.c | 261 +++++++++++++------ usr/src/sys/ufs/ffs/ffs_subr.c | 189 +++++++++++++- usr/src/sys/ufs/ffs/ffs_vnops.c | 423 +++++++++++++++++++++++++------ usr/src/sys/ufs/ffs/ufs_inode.c | 261 +++++++++++++------ usr/src/sys/ufs/ffs/ufs_lookup.c | 164 ++++++++++-- usr/src/sys/ufs/ffs/ufs_vnops.c | 423 +++++++++++++++++++++++++------ usr/src/sys/ufs/lfs/lfs_alloc.c | 251 +++++++----------- usr/src/sys/ufs/lfs/lfs_balloc.c | 21 +- usr/src/sys/ufs/lfs/lfs_inode.c | 261 +++++++++++++------ usr/src/sys/ufs/lfs/lfs_vnops.c | 423 +++++++++++++++++++++++++------ usr/src/sys/ufs/ufs/ufs_inode.c | 261 +++++++++++++------ usr/src/sys/ufs/ufs/ufs_lookup.c | 164 ++++++++++-- usr/src/sys/ufs/ufs/ufs_vnops.c | 423 +++++++++++++++++++++++++------ usr/src/sys/vm/vm_swap.c | 5 +- 34 files changed, 3914 insertions(+), 1431 deletions(-) diff --git a/usr/src/sys/kern/init_main.c b/usr/src/sys/kern/init_main.c index 5fa962fb6b..5dc0edb08c 100644 --- a/usr/src/sys/kern/init_main.c +++ b/usr/src/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* init_main.c 4.42 82/11/02 */ +/* init_main.c 4.43 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -38,7 +38,7 @@ extern struct user u; /* have to declare it somewhere! */ * - process 2 to page out * - process 1 execute bootstrap * - * loop at loc something in user mode -- /etc/init + * loop at loc 13 (0xd) in user mode -- /etc/init * cannot be executed. */ #ifdef vax @@ -53,6 +53,7 @@ main(regs) register int i; register struct proc *p; struct fs *fs; + int s; rqinit(); #include "loop.h" @@ -104,9 +105,17 @@ main(regs) #if NLOOP > 0 loattach(); /* XXX */ #endif + /* + * Block reception of incoming packets + * until protocols have been initialized. + */ + s = splimp(); ifinit(); #endif domaininit(); +#ifdef INET + splx(s); +#endif ihinit(); bhinit(); binit(); @@ -235,12 +244,13 @@ binit() dp->b_forw = dp->b_back = dp->av_forw = dp->av_back = dp; dp->b_flags = B_HEAD; } - dp--; /* dp = &bfreelist[BQUEUES-1]; */ + dp = &bfreelist[BQ_AGE]; for (i = 0; i < nbuf; i++) { bp = &buf[i]; bp->b_dev = NODEV; + bp->b_bcount = 0; bp->b_un.b_addr = buffers + i * MAXBSIZE; - bp->b_bcount = MAXBSIZE; + bp->b_bufsize = 2 * CLBYTES; bp->b_back = dp; bp->b_forw = dp->b_forw; dp->b_forw->b_back = bp; diff --git a/usr/src/sys/kern/kern_clock.c b/usr/src/sys/kern/kern_clock.c index ccb7c2e7a2..d795a92f1b 100644 --- a/usr/src/sys/kern/kern_clock.c +++ b/usr/src/sys/kern/kern_clock.c @@ -1,4 +1,4 @@ -/* kern_clock.c 4.44 82/10/31 */ +/* kern_clock.c 4.45 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -247,7 +247,7 @@ softclock(sirret, regs) calltodo.c_next = p1->c_next; p1->c_next = callfree; callfree = p1; - (void) splx(s); + splx(s); (*func)(arg, a); } } diff --git a/usr/src/sys/kern/kern_descrip.c b/usr/src/sys/kern/kern_descrip.c index 1f1566f0de..068c12f6c2 100644 --- a/usr/src/sys/kern/kern_descrip.c +++ b/usr/src/sys/kern/kern_descrip.c @@ -1,4 +1,4 @@ -/* kern_descrip.c 5.17 82/10/30 */ +/* kern_descrip.c 5.18 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -81,7 +81,7 @@ dup() j = ufalloc(); if (j < 0) return; - dupit(j, fp, u.u_pofile[uap->i] & (RDLOCK|WRLOCK)); + dupit(j, fp, u.u_pofile[uap->i] & (SHLOCK|EXLOCK)); } dup2() @@ -112,7 +112,7 @@ dup2() /* u.u_ofile[uap->j] = 0; */ /* u.u_pofile[uap->j] = 0; */ } - dupit(uap->j, fp, u.u_pofile[uap->i] & (RDLOCK|WRLOCK)); + dupit(uap->j, fp, u.u_pofile[uap->i] & (SHLOCK|EXLOCK)); } dupit(fd, fp, lockflags) @@ -124,10 +124,10 @@ dupit(fd, fp, lockflags) u.u_ofile[fd] = fp; u.u_pofile[fd] = lockflags; fp->f_count++; - if (lockflags&RDLOCK) - fp->f_inode->i_rdlockc++; - if (lockflags&WRLOCK) - fp->f_inode->i_wrlockc++; + if (lockflags&SHLOCK) + fp->f_inode->i_shlockc++; + if (lockflags&EXLOCK) + fp->f_inode->i_exlockc++; } close() @@ -493,7 +493,7 @@ closef(fp, nouser, flags) ip = fp->f_inode; dev = (dev_t)ip->i_rdev; mode = ip->i_mode & IFMT; - flags &= RDLOCK|WRLOCK; /* conservative */ + flags &= SHLOCK|EXLOCK; /* conservative */ if (flags) funlocki(ip, flags); ilock(ip); diff --git a/usr/src/sys/kern/kern_proc.c b/usr/src/sys/kern/kern_proc.c index b7b2e1f1b6..e1e2c40a64 100644 --- a/usr/src/sys/kern/kern_proc.c +++ b/usr/src/sys/kern/kern_proc.c @@ -1,4 +1,4 @@ -/* kern_proc.c 4.47 82/11/02 */ +/* kern_proc.c 4.48 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -22,6 +22,7 @@ #include "../h/descrip.h" #include "../h/uio.h" #include "../h/mbuf.h" +#include "../h/nami.h" gethostid() { @@ -103,7 +104,7 @@ execve() char cfarg[SHSIZE]; int resid; - if ((ip = namei(uchar, 0, 1)) == NULL) + if ((ip = namei(uchar, LOOKUP, 1)) == NULL) return; bno = 0; bp = 0; @@ -208,7 +209,7 @@ execve() (unsigned)(u.u_dent.d_namlen + 1)); indir = 1; iput(ip); - ip = namei(schar, 0, 1); + ip = namei(schar, LOOKUP, 1); if (ip == NULL) return; goto again; @@ -1006,10 +1007,10 @@ retry: if (fp == NULL) continue; fp->f_count++; - if (u.u_pofile[n]&RDLOCK) - fp->f_inode->i_rdlockc++; - if (u.u_pofile[n]&WRLOCK) - fp->f_inode->i_wrlockc++; + if (u.u_pofile[n]&SHLOCK) + fp->f_inode->i_shlockc++; + if (u.u_pofile[n]&EXLOCK) + fp->f_inode->i_exlockc++; } u.u_cdir->i_count++; if (u.u_rdir) diff --git a/usr/src/sys/kern/kern_prot.c b/usr/src/sys/kern/kern_prot.c index cf49f9f232..1bfdeeea5f 100644 --- a/usr/src/sys/kern/kern_prot.c +++ b/usr/src/sys/kern/kern_prot.c @@ -1,4 +1,4 @@ -/* kern_prot.c 5.10 82/10/20 */ +/* kern_prot.c 5.11 82/11/13 */ /* * System calls related to processes and protection @@ -105,7 +105,43 @@ setpgrp() p->p_pgrp = uap->pgrp; } -setuid() +setreuid() +{ + struct a { + int ruid; + int euid; + } *uap; + register int ruid, euid; + + uap = (struct a *)u.u_ap; + ruid = uap->ruid; + if (ruid == -1) + ruid = u.u_ruid; + if (u.u_ruid != ruid && u.u_uid != ruid && !suser()) + return; + euid = uap->euid; + if (euid == -1) + euid = u.u_uid; + if (u.u_ruid != euid && u.u_uid != euid && !suser()) + return; + /* + * Everything's okay, do it. + */ + if (ruid != u.u_ruid) { +#ifdef QUOTA + if (u.u_quota->q_uid != ruid) { + qclean(); + qstart(getquota(ruid, 0, 0)); + } +#endif + u.u_procp->p_uid = ruid; + u.u_ruid = ruid; + } + u.u_uid = euid; +} + +#ifndef NOCOMPAT +osetuid() { register uid; register struct a { @@ -126,8 +162,41 @@ setuid() u.u_ruid = uid; } } +#endif + +setregid() +{ + register struct a { + int rgid; + int egid; + } *uap; + register int rgid, egid; + + uap = (struct a *)u.u_ap; + rgid = uap->rgid; + if (rgid == -1) + rgid = u.u_rgid; + if (u.u_rgid != rgid && u.u_gid != rgid && !suser()) + return; + egid = uap->egid; + if (egid == -1) + egid = u.u_gid; + if (u.u_rgid != egid && u.u_gid != egid && !suser()) + return; + if (u.u_rgid != rgid) { + leavegroup(u.u_rgid); + (void) entergroup(u.u_rgid); + u.u_rgid = rgid; + } + if (u.u_gid != egid) { + leavegroup(u.u_gid); + (void) entergroup(egid); + u.u_gid = egid; + } +} -setgid() +#ifndef NOCOMPAT +osetgid() { register gid; register struct a { diff --git a/usr/src/sys/kern/kern_sig.c b/usr/src/sys/kern/kern_sig.c index fa02ea3cc8..2da158a2ed 100644 --- a/usr/src/sys/kern/kern_sig.c +++ b/usr/src/sys/kern/kern_sig.c @@ -1,4 +1,4 @@ -/* kern_sig.c 5.11 82/10/31 */ +/* kern_sig.c 5.12 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -20,6 +20,7 @@ #include "../h/acct.h" #include "../h/uio.h" #include "../h/kernel.h" +#include "../h/nami.h" /* KILL CODE SHOULDNT KNOW ABOUT PROCESS INTERNALS !?! */ @@ -693,7 +694,7 @@ core() return (0); u.u_error = 0; u.u_dirp = "core"; - ip = namei(schar, 1, 1); + ip = namei(schar, CREATE, 1); if (ip == NULL) { if (u.u_error) return (0); @@ -707,7 +708,7 @@ core() u.u_error = EFAULT; goto out; } - itrunc(ip, 0); + itrunc(ip, (u_long)0); u.u_acflag |= ACORE; /* if (u.u_error == 0) */ u.u_error = rdwri(UIO_WRITE, ip, diff --git a/usr/src/sys/kern/subr_prof.c b/usr/src/sys/kern/subr_prof.c index cbfc9a83f7..def3affa33 100644 --- a/usr/src/sys/kern/subr_prof.c +++ b/usr/src/sys/kern/subr_prof.c @@ -1,4 +1,4 @@ -/* subr_prof.c 4.1 82/06/28 */ +/* subr_prof.c 4.2 82/11/13 */ #ifdef GPROF #include "../h/crt0.h" @@ -28,13 +28,13 @@ kmstartup() ssiz = s_textsize + sizeof(struct phdr); printf("Profiling kernel, s_textsize=%d [%x..%x]\n", s_textsize, s_lowpc, s_highpc); - sbuf = (u_short *)wmemall(vmemall, ssiz); + sbuf = (u_short *)wmemall(memall, ssiz); if (sbuf == 0) { printf("No space for monitor buffer(s)\n"); return; } blkclr((caddr_t)sbuf, ssiz); - froms = (u_short *)wmemall(vmemall, s_textsize); + froms = (u_short *)wmemall(memall, s_textsize); if (froms == 0) { printf("No space for monitor buffer(s)\n"); wmemfree(sbuf, ssiz); @@ -42,7 +42,7 @@ kmstartup() return; } blkclr((caddr_t)froms, s_textsize); - tos = (struct tostruct *)wmemall(vmemall, s_textsize); + tos = (struct tostruct *)wmemall(memall, s_textsize); if (tos == 0) { printf("No space for monitor buffer(s)\n"); wmemfree(sbuf, ssiz); diff --git a/usr/src/sys/kern/tty_subr.c b/usr/src/sys/kern/tty_subr.c index a28677f700..5a4b3ae9a5 100644 --- a/usr/src/sys/kern/tty_subr.c +++ b/usr/src/sys/kern/tty_subr.c @@ -1,4 +1,4 @@ -/* tty_subr.c 4.16 82/10/31 */ +/* tty_subr.c 4.17 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -382,6 +382,8 @@ struct clist *from, *to; (void) putc(c, to); } +#include "dmc.h" +#ifdef NDMC > 0 /* * integer (2-byte) get/put * using clists @@ -412,3 +414,4 @@ putw(c, p) splx(s); return(0); } +#endif diff --git a/usr/src/sys/kern/uipc_domain.c b/usr/src/sys/kern/uipc_domain.c index ffca28c56b..21e2f1e603 100644 --- a/usr/src/sys/kern/uipc_domain.c +++ b/usr/src/sys/kern/uipc_domain.c @@ -1,9 +1,11 @@ -/* uipc_domain.c 5.5 82/11/02 */ +/* uipc_domain.c 5.6 82/11/13 */ #include "../h/param.h" #include "../h/socket.h" #include "../h/protosw.h" #include "../h/domain.h" +#include +#include "../h/kernel.h" #define ADDDOMAIN(x) { \ extern struct domain x/**/domain; \ @@ -13,7 +15,10 @@ domaininit() { + register struct domain *dp; + register struct protosw *pr; +#ifndef lint ADDDOMAIN(unix); #ifdef INET ADDDOMAIN(inet); @@ -24,22 +29,14 @@ domaininit() #ifdef IMP ADDDOMAIN(imp); #endif - pfinit(); -} - -/* - * Operations applying to the sets of protocols - * defined by the available communications domains. - */ -pfinit() -{ - register struct domain *dp; - register struct protosw *pr; +#endif for (dp = domains; dp; dp = dp->dom_next) for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_init) (*pr->pr_init)(); + pffasttimo(); + pfslowtimo(); } struct protosw * @@ -74,7 +71,7 @@ pffindproto(family, protocol) goto found; return (0); found: - for (pr = dp->dom_protosw; pr <= dp->dom_protoswNPROTOSW; pr++) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_protocol == protocol) return (pr); return (0); @@ -102,6 +99,7 @@ pfslowtimo() for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_slowtimo) (*pr->pr_slowtimo)(); + timeout(pfslowtimo, (caddr_t)0, hz/2); } pffasttimo() @@ -113,4 +111,5 @@ pffasttimo() for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) if (pr->pr_fasttimo) (*pr->pr_fasttimo)(); + timeout(pffasttimo, (caddr_t)0, hz/5); } diff --git a/usr/src/sys/kern/uipc_mu_msg.c b/usr/src/sys/kern/uipc_mu_msg.c index 429bcb7435..5e95b16730 100644 --- a/usr/src/sys/kern/uipc_mu_msg.c +++ b/usr/src/sys/kern/uipc_mu_msg.c @@ -1,4 +1,4 @@ -/* uipc_mu_msg.c Melb 4.1 82/07/16 */ +/* uipc_mu_msg.c Melb 4.2 82/11/13 */ #ifdef MUSH #include "../h/param.h" @@ -93,7 +93,7 @@ mu_msg() } else { if (p->p_msgflgs & MSGRPLY) { while (pp = mu_send(&p->p_mb, - p->p_mb.msg_pid, 0)) { + (int)p->p_mb.msg_pid, 0)) { pp->p_msgflgs |= MSGWAIT; sleep((caddr_t)&pp->p_mb, MSGPRI); @@ -119,7 +119,7 @@ mu_msg() p->p_msgflgs |= MSGWRPLY; } mb.msg_uid = u.u_uid; - while ((pp = mu_send(&mb, mb.msg_pid, p->p_pid)) && + while ((pp = mu_send(&mb, (int)mb.msg_pid, p->p_pid)) && uap->wait & MSG_W_POST) { pp->p_msgflgs |= MSGWAIT; sleep((caddr_t)&pp->p_mb, MSGPRI); @@ -146,9 +146,8 @@ mu_msg() struct proc * mu_send(mp, pid, from) -register mmsgbuf *mp; -register int pid; -register int from; + register mmsgbuf *mp; + register int pid, from; { register struct proc *p; diff --git a/usr/src/sys/kern/uipc_socket.c b/usr/src/sys/kern/uipc_socket.c index e888219674..70ee1d576a 100644 --- a/usr/src/sys/kern/uipc_socket.c +++ b/usr/src/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* uipc_socket.c 4.63 82/11/02 */ +/* uipc_socket.c 4.64 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -34,13 +34,12 @@ socreate(dom, aso, type, proto, opt) register struct protosw *prp; register struct socket *so; struct mbuf *m; - int pf, error; + int error; - pf = dom ? PF_UNIX : PF_INET; /* should be u.u_protof */ if (proto) - prp = pffindproto(pf, proto); + prp = pffindproto(dom, proto); else - prp = pffindtype(pf, type); + prp = pffindtype(dom, type); if (prp == 0) return (EPROTONOSUPPORT); if (prp->pr_type != type) @@ -51,6 +50,7 @@ socreate(dom, aso, type, proto, opt) so = mtod(m, struct socket *); so->so_options = 0; so->so_state = 0; + so->so_type = type; if (u.u_uid == 0) so->so_state = SS_PRIV; so->so_proto = prp; @@ -187,7 +187,7 @@ sostat(so, ub) struct stat sb; bzero((caddr_t)&sb, sizeof (sb)); /* XXX */ - copyout((caddr_t)&sb, (caddr_t)ub, sizeof (sb));/* XXX */ + (void) copyout((caddr_t)&sb, (caddr_t)ub, sizeof (sb));/* XXX */ return (0); /* XXX */ } diff --git a/usr/src/sys/kern/uipc_syscalls.c b/usr/src/sys/kern/uipc_syscalls.c index 2b735cc3f7..940e1966a7 100644 --- a/usr/src/sys/kern/uipc_syscalls.c +++ b/usr/src/sys/kern/uipc_syscalls.c @@ -1,4 +1,4 @@ -/* uipc_syscalls.c 4.35 82/10/21 */ +/* uipc_syscalls.c 4.36 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -38,7 +38,7 @@ socket() goto freeopt; fp->f_flag = FREAD|FWRITE; fp->f_type = DTYPE_SOCKET; - u.u_error = socreate(0, &so, uap->type, uap->protocol, &aopt); + u.u_error = socreate(AF_UNSPEC, &so, uap->type, uap->protocol, &aopt); if (u.u_error) goto bad; fp->f_socket = so; @@ -467,10 +467,12 @@ pipe() struct socket *rso, *wso; int r; - u.u_error = socreate(1, &rso, SOCK_STREAM, 0, (struct socketopt *)0); + u.u_error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0, + (struct socketopt *)0); if (u.u_error) return; - u.u_error = socreate(1, &wso, SOCK_STREAM, 0, (struct socketopt *)0); + u.u_error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0, + (struct socketopt *)0); if (u.u_error) goto free; rf = falloc(); diff --git a/usr/src/sys/kern/uipc_usrreq.c b/usr/src/sys/kern/uipc_usrreq.c index cdbf93ec33..4ac241996e 100644 --- a/usr/src/sys/kern/uipc_usrreq.c +++ b/usr/src/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* uipc_usrreq.c 1.2 82/11/03 */ +/* uipc_usrreq.c 1.3 82/11/13 */ #include "../h/param.h" #include "../h/dir.h" @@ -10,6 +10,7 @@ #include "../h/unpcb.h" #include "../h/un.h" #include "../h/inode.h" +#include "../h/nami.h" /* * Unix communications domain. @@ -32,7 +33,7 @@ uipc_usrreq(so, req, m, nam, opt) case PRU_ATTACH: if (unp) { - error = EINVAL; + error = EISCONN; break; } error = unp_attach(so); @@ -42,6 +43,15 @@ uipc_usrreq(so, req, m, nam, opt) unp_detach(unp); break; + case PRU_BIND: + error = unp_bind(unp, nam); + break; + + case PRU_LISTEN: + if (unp->unp_inode == 0) + error = EINVAL; + break; + case PRU_CONNECT: error = unp_connect(so, nam); break; @@ -50,23 +60,16 @@ uipc_usrreq(so, req, m, nam, opt) unp_disconnect(unp); break; -/* BEGIN QUESTIONABLE */ - case PRU_ACCEPT: { - struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); - - if (soun) { - bzero((caddr_t)soun, sizeof (*soun)); - soun->sun_family = AF_UNIX; - /* XXX */ - } - } + case PRU_ACCEPT: + nam->m_len = unp->unp_remaddr->m_len; + bcopy(mtod(unp->unp_remaddr, caddr_t), + mtod(nam, caddr_t), (unsigned)nam->m_len); break; case PRU_SHUTDOWN: socantsendmore(so); unp_usrclosed(unp); break; -/* END QUESTIONABLE */ case PRU_RCVD: switch (so->so_type) { @@ -118,10 +121,13 @@ uipc_usrreq(so, req, m, nam, opt) } } so2 = unp->unp_conn->unp_socket; - if (sbspace(&so2->so_rcv) > 0) /* XXX */ - sbappendaddr(so2, m, nam); /* XXX */ + /* BEGIN XXX */ + if (sbspace(&so2->so_rcv) > 0) + (void) sbappendaddr(&so2->so_rcv, + mtod(nam, struct sockaddr *), m); + /* END XXX */ if (nam) - unp_disconnect(so); + unp_disconnect(unp); break; case SOCK_STREAM: @@ -185,12 +191,11 @@ uipc_usrreq(so, req, m, nam, opt) int unp_sendspace = 1024*2; int unp_recvspace = 1024*2; -unp_attach(so, soun) +unp_attach(so) struct socket *so; - struct sockaddr_un *soun; { + register struct mbuf *m; register struct unpcb *unp; - struct mbuf *m; int error; error = soreserve(so, unp_sendspace, unp_recvspace); @@ -204,63 +209,13 @@ unp_attach(so, soun) unp = mtod(m, struct unpcb *); so->so_pcb = (caddr_t)unp; unp->unp_socket = so; - if (soun) { - error = unp_bind(unp, soun); - if (error) { - unp_detach(unp); - goto bad; - } - } return (0); bad: return (error); } -unp_disconnect(unp) - struct unpcb *unp; -{ - register struct unpcb *unp2 = unp->unp_conn; - - if (unp2 == 0) - return; - unp->unp_conn = 0; - soisdisconnected(unp->unp_socket); - switch (unp->unp_socket->so_type) { - - case SOCK_DGRAM: - if (unp2->unp_refs == unp) - unp2->unp_refs = unp->unp_nextref; - else { - unp2 = unp2->unp_refs; - for (;;) { - if (unp2 == 0) - panic("unp_disconnect"); - if (unp2->unp_nextref == unp) - break; - unp2 = unp2->unp_nextref; - } - unp2->unp_nextref = unp->unp_nextref; - } - unp->unp_nextref = 0; - break; - - case SOCK_STREAM: - unp2->unp_conn = 0; - soisdisconnected(unp2->unp_socket); - unp_drop(unp2, ECONNRESET); - break; - } -} - -unp_abort(unp) - struct unpcb *unp; -{ - - unp_detach(unp); -} - unp_detach(unp) - struct unpcb *unp; + register struct unpcb *unp; { if (unp->unp_inode) { @@ -273,44 +228,22 @@ unp_detach(unp) unp_drop(unp->unp_refs, ECONNRESET); soisdisconnected(unp->unp_socket); unp->unp_socket->so_pcb = 0; - m_free(dtom(unp)); + m_freem(unp->unp_remaddr); + (void) m_free(dtom(unp)); } -unp_usrclosed(unp) +unp_bind(unp, nam) struct unpcb *unp; + struct mbuf *nam; { - register struct socket *so = unp->unp_socket; - -#ifdef sometimes /* ??? */ - soisdisconnected(unp->unp_socket); -#endif -} - -unp_drop(unp, errno) - struct unpcb *unp; - int errno; -{ - - unp->unp_socket->so_error = errno; - unp_disconnect(unp); -} - -unp_drain() -{ - -} - -unp_bind(unp, soun) - struct unpcb *unp; - struct sockaddr_un *soun; -{ + struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); register struct inode *ip; - int error; extern schar(); + int error; u.u_dirp = soun->sun_path; soun->sun_path[sizeof(soun->sun_path)-1] = 0; - ip = namei(schar, 1, 1); + ip = namei(schar, CREATE, 1); if (ip) { iput(ip); return (EEXIST); @@ -327,34 +260,25 @@ unp_bind(unp, soun) return (0); } -unp_connect(so, soun) +unp_connect(so, nam) struct socket *so; - struct sockaddr_un *soun; + struct mbuf *nam; { - struct inode *ip; + register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); + struct unpcb *unp = sotounpcb(so); + register struct inode *ip; int error; + struct socket *so2; + struct unpcb *unp2; u.u_dirp = soun->sun_path; soun->sun_path[sizeof(soun->sun_path)-1] = 0; - ip = namei(schar, 0, 1); + ip = namei(schar, LOOKUP, 1); if (ip == 0) { error = u.u_error; u.u_error = 0; return (ENOENT); } - error = unp_connectip(so, ip); - return (error); -} - -unp_connectip(so, ip) - struct socket *so; - struct inode *ip; -{ - struct unpcb *unp = sotounpcb(so); - struct socket *so2, *so3; - int error; - struct unpcb *unp2; - if ((ip->i_mode&IFMT) != IFSOCK) { error = ENOTSOCK; goto bad; @@ -379,17 +303,20 @@ unp_connectip(so, ip) case SOCK_STREAM: if ((so2->so_options&SO_ACCEPTCONN) == 0 || - (so3 = sonewconn(so2)) == 0) { + (so2 = sonewconn(so2)) == 0) { error = ECONNREFUSED; goto bad; } - unp->unp_conn = sotounpcb(so3); + unp2 = sotounpcb(so2); + unp->unp_conn = unp2; + unp2->unp_conn = unp; + unp2->unp_remaddr = m_copy(nam, 0, (int)M_COPYALL); break; default: panic("uipc connip"); } - soisconnected(unp->unp_conn->unp_socket); + soisconnected(so2); soisconnected(so); iput(ip); return (0); @@ -397,3 +324,67 @@ bad: iput(ip); return (error); } + +unp_disconnect(unp) + struct unpcb *unp; +{ + register struct unpcb *unp2 = unp->unp_conn; + + if (unp2 == 0) + return; + unp->unp_conn = 0; + soisdisconnected(unp->unp_socket); + switch (unp->unp_socket->so_type) { + + case SOCK_DGRAM: + if (unp2->unp_refs == unp) + unp2->unp_refs = unp->unp_nextref; + else { + unp2 = unp2->unp_refs; + for (;;) { + if (unp2 == 0) + panic("unp_disconnect"); + if (unp2->unp_nextref == unp) + break; + unp2 = unp2->unp_nextref; + } + unp2->unp_nextref = unp->unp_nextref; + } + unp->unp_nextref = 0; + break; + + case SOCK_STREAM: + unp2->unp_conn = 0; + soisdisconnected(unp2->unp_socket); + unp_drop(unp2, ECONNRESET); + break; + } +} + +unp_abort(unp) + struct unpcb *unp; +{ + + unp_detach(unp); +} + +/*ARGSUSED*/ +unp_usrclosed(unp) + struct unpcb *unp; +{ + +} + +unp_drop(unp, errno) + struct unpcb *unp; + int errno; +{ + + unp->unp_socket->so_error = errno; + unp_disconnect(unp); +} + +unp_drain() +{ + +} diff --git a/usr/src/sys/kern/vfs_bio.c b/usr/src/sys/kern/vfs_bio.c index 0e752254e7..b67611b212 100644 --- a/usr/src/sys/kern/vfs_bio.c +++ b/usr/src/sys/kern/vfs_bio.c @@ -1,4 +1,4 @@ -/* vfs_bio.c 4.38 82/10/17 */ +/* vfs_bio.c 4.39 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -12,8 +12,6 @@ #include "../h/vm.h" #include "../h/trace.h" -int bioprintfs = 0; - /* * Read in (if necessary) the block and return a buffer pointer. */ @@ -25,12 +23,16 @@ bread(dev, blkno, size) { register struct buf *bp; + if (size == 0) + panic("bread: size 0"); bp = getblk(dev, blkno, size); if (bp->b_flags&B_DONE) { trace(TR_BREADHIT, dev, blkno); return(bp); } bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("bread"); (*bdevsw[major(dev)].d_strategy)(bp); trace(TR_BREADMISS, dev, blkno); u.u_ru.ru_inblock++; /* pay for read */ @@ -60,6 +62,8 @@ breada(dev, blkno, size, rablkno, rabsize) bp = getblk(dev, blkno, size); if ((bp->b_flags&B_DONE) == 0) { bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("breada"); (*bdevsw[major(dev)].d_strategy)(bp); trace(TR_BREADMISS, dev, blkno); u.u_ru.ru_inblock++; /* pay for read */ @@ -78,6 +82,8 @@ breada(dev, blkno, size, rablkno, rabsize) trace(TR_BREADHITRA, dev, blkno); } else { rabp->b_flags |= B_READ|B_ASYNC; + if (rabp->b_bcount > rabp->b_bufsize) + panic("breadrabp"); (*bdevsw[major(dev)].d_strategy)(rabp); trace(TR_BREADMISSRA, dev, rablock); u.u_ru.ru_inblock++; /* pay in advance */ @@ -109,8 +115,8 @@ bwrite(bp) if ((flag&B_DELWRI) == 0) u.u_ru.ru_oublock++; /* noone paid yet */ trace(TR_BWRITE, bp->b_dev, bp->b_blkno); -if (bioprintfs) -printf("write %x blk %d count %d\n", bp->b_dev, bp->b_blkno, bp->b_bcount); + if (bp->b_bcount > bp->b_bufsize) + panic("bwrite"); (*bdevsw[major(bp->b_dev)].d_strategy)(bp); /* @@ -193,9 +199,13 @@ brelse(bp) * Stick the buffer back on a free list. */ s = spl6(); - if (bp->b_flags & (B_ERROR|B_INVAL)) { + if (bp->b_bufsize <= 0) { + /* block has no buffer ... put at front of unused buffer list */ + flist = &bfreelist[BQ_EMPTY]; + binsheadfree(bp, flist); + } else if (bp->b_flags & (B_ERROR|B_INVAL)) { /* block has no info ... put at front of most free list */ - flist = &bfreelist[BQUEUES-1]; + flist = &bfreelist[BQ_AGE]; binsheadfree(bp, flist); } else { if (bp->b_flags & B_LOCKED) @@ -256,7 +266,7 @@ getblk(dev, blkno, size) daddr_t blkno; int size; { - register struct buf *bp, *dp, *ep; + register struct buf *bp, *dp; int s; if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) @@ -288,35 +298,13 @@ loop: } if (major(dev) >= nblkdev) panic("blkdev"); - /* - * Not found in the cache, select something from - * a free list. Preference is to LRU list, then AGE list. - */ - s = spl6(); - for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) - if (ep->av_forw != ep) - break; - if (ep == bfreelist) { /* no free blocks at all */ - ep->b_flags |= B_WANTED; - sleep((caddr_t)ep, PRIBIO+1); - splx(s); - goto loop; - } - splx(s); - bp = ep->av_forw; - notavail(bp); - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_ASYNC; - bwrite(bp); - goto loop; - } - trace(TR_BRELSE, bp->b_dev, bp->b_blkno); - bp->b_flags = B_BUSY; + bp = getnewbuf(); bfree(bp); bremhash(bp); binshash(bp, dp); bp->b_dev = dev; bp->b_blkno = blkno; + bp->b_error = 0; if (brealloc(bp, size) == 0) goto loop; return(bp); @@ -330,33 +318,17 @@ struct buf * geteblk(size) int size; { - register struct buf *bp, *dp; - int s; + register struct buf *bp, *flist; loop: - s = spl6(); - for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) - if (dp->av_forw != dp) - break; - if (dp == bfreelist) { /* no free blocks */ - dp->b_flags |= B_WANTED; - sleep((caddr_t)dp, PRIBIO+1); - goto loop; - } - splx(s); - bp = dp->av_forw; - notavail(bp); - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_ASYNC; - bwrite(bp); - goto loop; - } - trace(TR_BRELSE, bp->b_dev, bp->b_blkno); - bp->b_flags = B_BUSY|B_INVAL; + bp = getnewbuf(); + bp->b_flags |= B_INVAL; bfree(bp); bremhash(bp); - binshash(bp, dp); + flist = &bfreelist[BQ_AGE]; + binshash(bp, flist); bp->b_dev = (dev_t)NODEV; + bp->b_error = 0; if (brealloc(bp, size) == 0) goto loop; return(bp); @@ -387,11 +359,14 @@ brealloc(bp, size) } if (bp->b_flags & B_LOCKED) panic("brealloc"); - goto allocit; + allocbuf(bp, size); + return (1); } bp->b_flags &= ~B_DONE; - if (bp->b_dev == NODEV) - goto allocit; + if (bp->b_dev == NODEV) { + allocbuf(bp, size); + return (1); + } /* * Search cache for any buffers that overlap the one that we @@ -412,39 +387,90 @@ loop: if (ep->b_bcount == 0 || ep->b_blkno > last || ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start) continue; -if (bioprintfs) -if (ep->b_flags&B_BUSY) -printf("sleeping on:dev 0x%x, blks %d-%d, flg 0%o allocing dev 0x%x, blks %d-%d, flg 0%o\n", -ep->b_dev, ep->b_blkno, ep->b_blkno + (ep->b_bcount / DEV_BSIZE) - 1, -ep->b_flags, bp->b_dev, start, last, bp->b_flags); s = spl6(); if (ep->b_flags&B_BUSY) { ep->b_flags |= B_WANTED; sleep((caddr_t)ep, PRIBIO+1); - (void) splx(s); + splx(s); goto loop; } - (void) splx(s); + splx(s); notavail(ep); if (ep->b_flags & B_DELWRI) { -if (bioprintfs) -printf("DELWRI:dev 0x%x, blks %d-%d, flg 0%o allocing dev 0x%x, blks %d-%d, flg 0%o\n", -ep->b_dev, ep->b_blkno, ep->b_blkno + (ep->b_bcount / DEV_BSIZE) - 1, -ep->b_flags, bp->b_dev, start, last, bp->b_flags); bwrite(ep); goto loop; } ep->b_flags |= B_INVAL; brelse(ep); } -allocit: + allocbuf(bp, size); + return (1); +} + +/* + * Expand or contract the actual memory allocated to a buffer. + */ +allocbuf(tp, size) + register struct buf *tp; + int size; +{ + register struct buf *bp, *ep; + int sizealloc, take; + + sizealloc = roundup(size, CLBYTES); + /* + * Buffer size does not change + */ + if (sizealloc == tp->b_bufsize) + goto out; + /* + * Buffer size is shrinking. + * Place excess space in a buffer header taken from the + * BQ_EMPTY buffer list and placed on the "most free" list. + * If no extra buffer headers are available, leave the + * extra space in the present buffer. + */ + if (sizealloc < tp->b_bufsize) { + ep = bfreelist[BQ_EMPTY].av_forw; + if (ep == &bfreelist[BQ_EMPTY]) + goto out; + notavail(ep); + pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, + (int)tp->b_bufsize - sizealloc); + ep->b_bufsize = tp->b_bufsize - sizealloc; + tp->b_bufsize = sizealloc; + ep->b_flags |= B_INVAL; + ep->b_bcount = 0; + brelse(ep); + goto out; + } /* - * Here the buffer is already available, so all we - * need to do is set the size. Someday a better memory - * management scheme will be implemented. + * More buffer space is needed. Get it out of buffers on + * the "most free" list, placing the empty headers on the + * BQ_EMPTY buffer header list. */ - bp->b_bcount = size; - return (1); + while (tp->b_bufsize < sizealloc) { + take = sizealloc - tp->b_bufsize; + bp = getnewbuf(); + if (take >= bp->b_bufsize) + take = bp->b_bufsize; + pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], + &tp->b_un.b_addr[tp->b_bufsize], take); + tp->b_bufsize += take; + bp->b_bufsize = bp->b_bufsize - take; + if (bp->b_bcount > bp->b_bufsize) + bp->b_bcount = bp->b_bufsize; + if (bp->b_bufsize <= 0) { + bremhash(bp); + binshash(bp, &bfreelist[BQ_EMPTY]); + bp->b_dev = (dev_t)NODEV; + bp->b_error = 0; + bp->b_flags |= B_INVAL; + } + brelse(bp); + } +out: + tp->b_bcount = size; } /* @@ -454,13 +480,48 @@ bfree(bp) struct buf *bp; { /* - * Here the buffer does not change, so all we - * need to do is set the size. Someday a better memory - * management scheme will be implemented. + * This stub is provided to allow the system to reclaim + * memory from the buffer pool. Currently we do not migrate + * memory between the buffer memory pool and the user memory + * pool. */ bp->b_bcount = 0; } +/* + * Find a buffer which is available for use. + * Select something from a free list. + * Preference is to AGE list, then LRU list. + */ +struct buf * +getnewbuf() +{ + register struct buf *bp, *dp; + int s; + +loop: + s = spl6(); + for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) + if (dp->av_forw != dp) + break; + if (dp == bfreelist) { /* no free blocks */ + dp->b_flags |= B_WANTED; + sleep((caddr_t)dp, PRIBIO+1); + goto loop; + } + splx(s); + bp = dp->av_forw; + notavail(bp); + if (bp->b_flags & B_DELWRI) { + bp->b_flags |= B_ASYNC; + bwrite(bp); + goto loop; + } + trace(TR_BRELSE, bp->b_dev, bp->b_blkno); + bp->b_flags = B_BUSY; + return (bp); +} + /* * Wait for I/O completion on the buffer; return errors * to the user. @@ -516,6 +577,47 @@ biodone(bp) } } +/* + * Insure that no part of a specified block is in an incore buffer. + */ +blkflush(dev, blkno, size) + dev_t dev; + daddr_t blkno; + long size; +{ + register struct buf *ep; + struct buf *dp; + daddr_t start, last; + int s; + + start = blkno; + last = start + (size / DEV_BSIZE) - 1; + dp = BUFHASH(dev, blkno); +loop: + for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { + if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) + continue; + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || + ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start) + continue; + s = spl6(); + if (ep->b_flags&B_BUSY) { + ep->b_flags |= B_WANTED; + sleep((caddr_t)ep, PRIBIO+1); + splx(s); + goto loop; + } + if (ep->b_flags & B_DELWRI) { + splx(s); + notavail(ep); + bwrite(ep); + goto loop; + } + splx(s); + } +} + /* * make sure all write-behind blocks * on dev (or NODEV for all) @@ -532,7 +634,7 @@ bflush(dev) loop: s = spl6(); - for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) + for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { if ((bp->b_flags & B_DELWRI) == 0) continue; diff --git a/usr/src/sys/kern/vfs_cluster.c b/usr/src/sys/kern/vfs_cluster.c index 52033ba7e5..8b3282db19 100644 --- a/usr/src/sys/kern/vfs_cluster.c +++ b/usr/src/sys/kern/vfs_cluster.c @@ -1,4 +1,4 @@ -/* vfs_cluster.c 4.38 82/10/17 */ +/* vfs_cluster.c 4.39 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -12,8 +12,6 @@ #include "../h/vm.h" #include "../h/trace.h" -int bioprintfs = 0; - /* * Read in (if necessary) the block and return a buffer pointer. */ @@ -25,12 +23,16 @@ bread(dev, blkno, size) { register struct buf *bp; + if (size == 0) + panic("bread: size 0"); bp = getblk(dev, blkno, size); if (bp->b_flags&B_DONE) { trace(TR_BREADHIT, dev, blkno); return(bp); } bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("bread"); (*bdevsw[major(dev)].d_strategy)(bp); trace(TR_BREADMISS, dev, blkno); u.u_ru.ru_inblock++; /* pay for read */ @@ -60,6 +62,8 @@ breada(dev, blkno, size, rablkno, rabsize) bp = getblk(dev, blkno, size); if ((bp->b_flags&B_DONE) == 0) { bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("breada"); (*bdevsw[major(dev)].d_strategy)(bp); trace(TR_BREADMISS, dev, blkno); u.u_ru.ru_inblock++; /* pay for read */ @@ -78,6 +82,8 @@ breada(dev, blkno, size, rablkno, rabsize) trace(TR_BREADHITRA, dev, blkno); } else { rabp->b_flags |= B_READ|B_ASYNC; + if (rabp->b_bcount > rabp->b_bufsize) + panic("breadrabp"); (*bdevsw[major(dev)].d_strategy)(rabp); trace(TR_BREADMISSRA, dev, rablock); u.u_ru.ru_inblock++; /* pay in advance */ @@ -109,8 +115,8 @@ bwrite(bp) if ((flag&B_DELWRI) == 0) u.u_ru.ru_oublock++; /* noone paid yet */ trace(TR_BWRITE, bp->b_dev, bp->b_blkno); -if (bioprintfs) -printf("write %x blk %d count %d\n", bp->b_dev, bp->b_blkno, bp->b_bcount); + if (bp->b_bcount > bp->b_bufsize) + panic("bwrite"); (*bdevsw[major(bp->b_dev)].d_strategy)(bp); /* @@ -193,9 +199,13 @@ brelse(bp) * Stick the buffer back on a free list. */ s = spl6(); - if (bp->b_flags & (B_ERROR|B_INVAL)) { + if (bp->b_bufsize <= 0) { + /* block has no buffer ... put at front of unused buffer list */ + flist = &bfreelist[BQ_EMPTY]; + binsheadfree(bp, flist); + } else if (bp->b_flags & (B_ERROR|B_INVAL)) { /* block has no info ... put at front of most free list */ - flist = &bfreelist[BQUEUES-1]; + flist = &bfreelist[BQ_AGE]; binsheadfree(bp, flist); } else { if (bp->b_flags & B_LOCKED) @@ -256,7 +266,7 @@ getblk(dev, blkno, size) daddr_t blkno; int size; { - register struct buf *bp, *dp, *ep; + register struct buf *bp, *dp; int s; if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) @@ -288,35 +298,13 @@ loop: } if (major(dev) >= nblkdev) panic("blkdev"); - /* - * Not found in the cache, select something from - * a free list. Preference is to LRU list, then AGE list. - */ - s = spl6(); - for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) - if (ep->av_forw != ep) - break; - if (ep == bfreelist) { /* no free blocks at all */ - ep->b_flags |= B_WANTED; - sleep((caddr_t)ep, PRIBIO+1); - splx(s); - goto loop; - } - splx(s); - bp = ep->av_forw; - notavail(bp); - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_ASYNC; - bwrite(bp); - goto loop; - } - trace(TR_BRELSE, bp->b_dev, bp->b_blkno); - bp->b_flags = B_BUSY; + bp = getnewbuf(); bfree(bp); bremhash(bp); binshash(bp, dp); bp->b_dev = dev; bp->b_blkno = blkno; + bp->b_error = 0; if (brealloc(bp, size) == 0) goto loop; return(bp); @@ -330,33 +318,17 @@ struct buf * geteblk(size) int size; { - register struct buf *bp, *dp; - int s; + register struct buf *bp, *flist; loop: - s = spl6(); - for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) - if (dp->av_forw != dp) - break; - if (dp == bfreelist) { /* no free blocks */ - dp->b_flags |= B_WANTED; - sleep((caddr_t)dp, PRIBIO+1); - goto loop; - } - splx(s); - bp = dp->av_forw; - notavail(bp); - if (bp->b_flags & B_DELWRI) { - bp->b_flags |= B_ASYNC; - bwrite(bp); - goto loop; - } - trace(TR_BRELSE, bp->b_dev, bp->b_blkno); - bp->b_flags = B_BUSY|B_INVAL; + bp = getnewbuf(); + bp->b_flags |= B_INVAL; bfree(bp); bremhash(bp); - binshash(bp, dp); + flist = &bfreelist[BQ_AGE]; + binshash(bp, flist); bp->b_dev = (dev_t)NODEV; + bp->b_error = 0; if (brealloc(bp, size) == 0) goto loop; return(bp); @@ -387,11 +359,14 @@ brealloc(bp, size) } if (bp->b_flags & B_LOCKED) panic("brealloc"); - goto allocit; + allocbuf(bp, size); + return (1); } bp->b_flags &= ~B_DONE; - if (bp->b_dev == NODEV) - goto allocit; + if (bp->b_dev == NODEV) { + allocbuf(bp, size); + return (1); + } /* * Search cache for any buffers that overlap the one that we @@ -412,39 +387,90 @@ loop: if (ep->b_bcount == 0 || ep->b_blkno > last || ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start) continue; -if (bioprintfs) -if (ep->b_flags&B_BUSY) -printf("sleeping on:dev 0x%x, blks %d-%d, flg 0%o allocing dev 0x%x, blks %d-%d, flg 0%o\n", -ep->b_dev, ep->b_blkno, ep->b_blkno + (ep->b_bcount / DEV_BSIZE) - 1, -ep->b_flags, bp->b_dev, start, last, bp->b_flags); s = spl6(); if (ep->b_flags&B_BUSY) { ep->b_flags |= B_WANTED; sleep((caddr_t)ep, PRIBIO+1); - (void) splx(s); + splx(s); goto loop; } - (void) splx(s); + splx(s); notavail(ep); if (ep->b_flags & B_DELWRI) { -if (bioprintfs) -printf("DELWRI:dev 0x%x, blks %d-%d, flg 0%o allocing dev 0x%x, blks %d-%d, flg 0%o\n", -ep->b_dev, ep->b_blkno, ep->b_blkno + (ep->b_bcount / DEV_BSIZE) - 1, -ep->b_flags, bp->b_dev, start, last, bp->b_flags); bwrite(ep); goto loop; } ep->b_flags |= B_INVAL; brelse(ep); } -allocit: + allocbuf(bp, size); + return (1); +} + +/* + * Expand or contract the actual memory allocated to a buffer. + */ +allocbuf(tp, size) + register struct buf *tp; + int size; +{ + register struct buf *bp, *ep; + int sizealloc, take; + + sizealloc = roundup(size, CLBYTES); + /* + * Buffer size does not change + */ + if (sizealloc == tp->b_bufsize) + goto out; + /* + * Buffer size is shrinking. + * Place excess space in a buffer header taken from the + * BQ_EMPTY buffer list and placed on the "most free" list. + * If no extra buffer headers are available, leave the + * extra space in the present buffer. + */ + if (sizealloc < tp->b_bufsize) { + ep = bfreelist[BQ_EMPTY].av_forw; + if (ep == &bfreelist[BQ_EMPTY]) + goto out; + notavail(ep); + pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, + (int)tp->b_bufsize - sizealloc); + ep->b_bufsize = tp->b_bufsize - sizealloc; + tp->b_bufsize = sizealloc; + ep->b_flags |= B_INVAL; + ep->b_bcount = 0; + brelse(ep); + goto out; + } /* - * Here the buffer is already available, so all we - * need to do is set the size. Someday a better memory - * management scheme will be implemented. + * More buffer space is needed. Get it out of buffers on + * the "most free" list, placing the empty headers on the + * BQ_EMPTY buffer header list. */ - bp->b_bcount = size; - return (1); + while (tp->b_bufsize < sizealloc) { + take = sizealloc - tp->b_bufsize; + bp = getnewbuf(); + if (take >= bp->b_bufsize) + take = bp->b_bufsize; + pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], + &tp->b_un.b_addr[tp->b_bufsize], take); + tp->b_bufsize += take; + bp->b_bufsize = bp->b_bufsize - take; + if (bp->b_bcount > bp->b_bufsize) + bp->b_bcount = bp->b_bufsize; + if (bp->b_bufsize <= 0) { + bremhash(bp); + binshash(bp, &bfreelist[BQ_EMPTY]); + bp->b_dev = (dev_t)NODEV; + bp->b_error = 0; + bp->b_flags |= B_INVAL; + } + brelse(bp); + } +out: + tp->b_bcount = size; } /* @@ -454,13 +480,48 @@ bfree(bp) struct buf *bp; { /* - * Here the buffer does not change, so all we - * need to do is set the size. Someday a better memory - * management scheme will be implemented. + * This stub is provided to allow the system to reclaim + * memory from the buffer pool. Currently we do not migrate + * memory between the buffer memory pool and the user memory + * pool. */ bp->b_bcount = 0; } +/* + * Find a buffer which is available for use. + * Select something from a free list. + * Preference is to AGE list, then LRU list. + */ +struct buf * +getnewbuf() +{ + register struct buf *bp, *dp; + int s; + +loop: + s = spl6(); + for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) + if (dp->av_forw != dp) + break; + if (dp == bfreelist) { /* no free blocks */ + dp->b_flags |= B_WANTED; + sleep((caddr_t)dp, PRIBIO+1); + goto loop; + } + splx(s); + bp = dp->av_forw; + notavail(bp); + if (bp->b_flags & B_DELWRI) { + bp->b_flags |= B_ASYNC; + bwrite(bp); + goto loop; + } + trace(TR_BRELSE, bp->b_dev, bp->b_blkno); + bp->b_flags = B_BUSY; + return (bp); +} + /* * Wait for I/O completion on the buffer; return errors * to the user. @@ -516,6 +577,47 @@ biodone(bp) } } +/* + * Insure that no part of a specified block is in an incore buffer. + */ +blkflush(dev, blkno, size) + dev_t dev; + daddr_t blkno; + long size; +{ + register struct buf *ep; + struct buf *dp; + daddr_t start, last; + int s; + + start = blkno; + last = start + (size / DEV_BSIZE) - 1; + dp = BUFHASH(dev, blkno); +loop: + for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { + if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) + continue; + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || + ep->b_blkno + (ep->b_bcount / DEV_BSIZE) <= start) + continue; + s = spl6(); + if (ep->b_flags&B_BUSY) { + ep->b_flags |= B_WANTED; + sleep((caddr_t)ep, PRIBIO+1); + splx(s); + goto loop; + } + if (ep->b_flags & B_DELWRI) { + splx(s); + notavail(ep); + bwrite(ep); + goto loop; + } + splx(s); + } +} + /* * make sure all write-behind blocks * on dev (or NODEV for all) @@ -532,7 +634,7 @@ bflush(dev) loop: s = spl6(); - for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) + for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { if ((bp->b_flags & B_DELWRI) == 0) continue; diff --git a/usr/src/sys/kern/vfs_lookup.c b/usr/src/sys/kern/vfs_lookup.c index 113260bbe3..9cb45757d2 100644 --- a/usr/src/sys/kern/vfs_lookup.c +++ b/usr/src/sys/kern/vfs_lookup.c @@ -1,4 +1,4 @@ -/* vfs_lookup.c 4.29 82/10/31 */ +/* vfs_lookup.c 4.30 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -10,20 +10,30 @@ #include "../h/buf.h" #include "../h/conf.h" #include "../h/uio.h" +#include "../h/nami.h" struct buf *blkatoff(); -int dirchk = 1; +int dirchk = 0; /* * Convert a pathname into a pointer to a locked inode, * with side effects usable in creating and removing files. * This is a very central and rather complicated routine. * * The func argument gives the routine which returns successive - * characters of the name to be translated. The flag - * argument is (0, 1, 2) depending on whether the name is to be - * (looked up, created, deleted). The follow argument is 1 when - * symbolic links are to be followed when they occur at the end of - * the name translation process. + * characters of the name to be translated. + * + * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether + * the name is to be (looked up, created, deleted). If flag has + * LOCKPARENT or'ed into it and the target of the pathname exists, + * namei returns both the target and its parent directory locked. + * If the file system is not maintained in a strict tree hierarchy, + * this can result in a deadlock situation. When creating and + * LOCKPARENT is specified, the target may not be ".". When deleting + * and LOCKPARENT is specified, the target may be ".", but the caller + * must check to insure it does an irele and iput instead of two iputs. + * + * The follow argument is 1 when symbolic links are to be followed + * when they occur at the end of the name translation process. * * Overall outline: * @@ -36,14 +46,19 @@ int dirchk = 1; * handle degenerate case where name is null string * search for name in directory, to found or notfound * notfound: - * if creating, return locked inode, leaving information on avail. slots + * if creating, return locked directory, leaving info on avail. slots * else return error * found: * if at end of path and deleting, return information to allow delete + * if at end of path and rewriting (create and LOCKPARENT), lock targe + * inode and return info to allow rewrite * if .. and on mounted filesys, look in mount table for parent * if symbolic link, massage name in buffer and continue at dirloop * if more components of name, do next level at dirloop * return the answer as locked inode + * + * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, + * but unlocked. */ struct inode * namei(func, flag, follow) @@ -69,7 +84,10 @@ namei(func, flag, follow) int nlink = 0; /* number of symbolic links taken */ struct inode *pdp; /* saved dp during symlink work */ int i; + int lockparent; + lockparent = flag & LOCKPARENT; + flag &= ~LOCKPARENT; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. @@ -156,7 +174,7 @@ dirloop2: * case it doesn't already exist. */ slotstatus = FOUND; - if (flag == 1 && *cp == 0) { + if (flag == CREATE && *cp == 0) { slotstatus = NONE; slotfreespace = 0; slotneeded = DIRSIZ(&u.u_dent); @@ -258,10 +276,10 @@ dirloop2: /* notfound: */ /* * If creating, and at end of pathname and current - * directory has not been removed, then can consider allowing - * file to be created. + * directory has not been removed, then can consider + * allowing file to be created. */ - if (flag == 1 && *cp == 0 && dp->i_nlink != 0) { + if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. @@ -321,15 +339,17 @@ found: /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. - * Note that in this case we return the directory - * inode, not the inode of the file being deleted. + * If the lockparent flag isn't set, we return only + * the directory (in u.u_pdir), otherwise we go + * on and lock the inode, being careful with ".". */ - if (flag == 2 && *cp == 0) { + if (flag == DELETE && *cp == 0) { /* * Write access to directory required to delete files. */ if (access(dp, IWRITE)) goto bad; + u.u_pdir = dp; /* for dirremove() */ /* * Return pointer to current entry in u.u_offset, * and distance past previous entry (if there @@ -340,8 +360,18 @@ found: u.u_count = 0; else u.u_count = u.u_offset - prevoff; + if (lockparent) { + if (dp->i_number == u.u_dent.d_ino) + dp->i_count++; + else { + dp = iget(dp->i_dev, fs, u.u_dent.d_ino); + if (dp == NULL) { + iput(u.u_pdir); + goto bad; + } + } + } brelse(nbp); - u.u_pdir = dp; /* for dirremove() */ return (dp); } @@ -370,6 +400,33 @@ found: } } + /* + * If rewriting (rename), return the inode and the + * information required to rewrite the present directory + * Must get inode of directory entry to verify it's a + * regular file, or empty directory. + */ + if ((flag == CREATE && lockparent) && *cp == 0) { + if (access(dp, IWRITE)) + goto bad; + u.u_pdir = dp; /* for dirrewrite() */ + /* + * Careful about locking second inode. + * This can only occur if the target is ".". + */ + if (dp->i_number == u.u_dent.d_ino) { + u.u_error = EISDIR; /* XXX */ + goto bad; + } + dp = iget(dp->i_dev, fs, u.u_dent.d_ino); + if (dp == NULL) { + iput(u.u_pdir); + goto bad; + } + brelse(nbp); + return (dp); + } + /* * Check for symbolic link, which may require us * to massage the name before we continue translation. @@ -398,7 +455,7 @@ found: } ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen); u.u_error = - rdwri(UIO_READ, dp, nbp->b_un.b_addr, dp->i_size, + rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size, 0, 1, (int *)0); if (u.u_error) goto bad2; @@ -419,7 +476,6 @@ found: fs = dp->i_fs; goto dirloop; } - irele(pdp); /* * Not a symbolic link. If more pathname, @@ -428,9 +484,14 @@ found: if (*cp == '/') { while (*cp == '/') cp++; + irele(pdp); goto dirloop; } brelse(nbp); + if (lockparent) + u.u_pdir = pdp; + else + irele(pdp); return (dp); bad2: irele(pdp); @@ -514,7 +575,7 @@ direnter(ip) * This should never push the size past a new multiple of * DIRBLKSIZE. */ - if (u.u_offset+u.u_count > u.u_pdir->i_size) + if (u.u_offset + u.u_count > u.u_pdir->i_size) u.u_pdir->i_size = u.u_offset + u.u_count; /* @@ -522,8 +583,10 @@ direnter(ip) * entry. */ bp = blkatoff(u.u_pdir, u.u_offset, (char **)&dirbuf); - if (bp == 0) + if (bp == 0) { + iput(u.u_pdir); return; + } /* * Find space for the new entry. In the simple case, the @@ -570,20 +633,31 @@ direnter(ip) iput(u.u_pdir); } +/* + * Remove a directory entry after a call to namei, using the + * parameters which it left in the u. area. The u. entry + * u_offset contains the offset into the directory of the + * entry to be eliminated. The u_count field contains the + * size of the previous record in the directory. If this + * is 0, the first entry is being deleted, so we need only + * zero the inode number to mark the entry as free. If the + * entry isn't the first in the directory, we must reclaim + * the space of the now empty record by adding the record size + * to the size of the previous entry. + */ dirremove() { register struct inode *dp = u.u_pdir; register struct buf *bp; struct direct *ep; - if (u.u_count == 0) { + if (u.u_count == 0) /* * First entry in block: set d_ino to zero. */ - u.u_dent.d_ino = 0; (void) rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent, (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0); - } else { + else { /* * Collapse new free space into previous entry. */ @@ -597,6 +671,21 @@ dirremove() return (1); } +/* + * Rewrite an existing directory entry to point at the inode + * supplied. The parameters describing the directory entry are + * set up by a call to namei. + */ +dirrewrite(dp, ip) + struct inode *dp, *ip; +{ + + u.u_dent.d_ino = ip->i_number; + u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent, + (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0); + iput(dp); +} + /* * Return buffer with contents of block "offset" * from the beginning of directory "ip". If "res" @@ -627,3 +716,32 @@ blkatoff(ip, offset, res) *res = bp->b_un.b_addr + base; return (bp); } + +/* + * Check if a directory is empty or not. + * Inode supplied must be locked. + */ +dirempty(ip) + struct inode *ip; +{ + register off_t off; + struct direct dbuf; + register struct direct *dp = &dbuf; + int error; + + for (off = 0; off < ip->i_size; off += dp->d_reclen) { + error = rdwri(UIO_READ, ip, (caddr_t)dp, + sizeof (struct direct), off, 1, (int *)0); + if (error) + return (0); + if (dp->d_ino == 0) + continue; + if (dp->d_name[0] != '.') + return (0); + if (dp->d_namlen == 1 || + (dp->d_namlen == 2 && dp->d_name[1] == '.')) + continue; + return (0); + } + return (1); +} diff --git a/usr/src/sys/kern/vfs_syscalls.c b/usr/src/sys/kern/vfs_syscalls.c index 4d9d278cbb..ccf183c281 100644 --- a/usr/src/sys/kern/vfs_syscalls.c +++ b/usr/src/sys/kern/vfs_syscalls.c @@ -1,4 +1,4 @@ -/* vfs_syscalls.c 4.41 82/10/19 */ +/* vfs_syscalls.c 4.42 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -16,13 +16,20 @@ #include "../h/uio.h" #include "../h/socket.h" #include "../h/socketvar.h" +#include "../h/nami.h" +/* + * Change current working directory (``.''). + */ chdir() { chdirec(&u.u_cdir); } +/* + * Change notion of root (``/'') directory. + */ chroot() { @@ -30,6 +37,9 @@ chroot() chdirec(&u.u_rdir); } +/* + * Common routine for chroot and chdir. + */ chdirec(ipp) register struct inode **ipp; { @@ -38,14 +48,14 @@ chdirec(ipp) char *fname; }; - ip = namei(uchar, 0, 1); - if(ip == NULL) + ip = namei(uchar, LOOKUP, 1); + if (ip == NULL) return; - if((ip->i_mode&IFMT) != IFDIR) { + if ((ip->i_mode&IFMT) != IFDIR) { u.u_error = ENOTDIR; goto bad; } - if(access(ip, IEXEC)) + if (access(ip, IEXEC)) goto bad; iunlock(ip); if (*ipp) @@ -68,23 +78,28 @@ open() int flags; int mode; } *uap; - int checkpermissions = 1; + int checkpermissions = 1, flags; uap = (struct a *)u.u_ap; - if (uap->flags&FCREATE) { - ip = namei(uchar, 1, 1); + flags = uap->flags + 1; + if ((flags&FTRUNCATE) && (flags&FWRITE) == 0) { + u.u_error = EINVAL; + return; + } + if (flags&FCREATE) { + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; ip = maknode(uap->mode&07777&(~ISVTX)); checkpermissions = 0; - uap->flags &= ~FTRUNCATE; + flags &= ~FTRUNCATE; } } else - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; - open1(ip, ++uap->flags, checkpermissions); + open1(ip, flags, checkpermissions); } #ifndef NOCOMPAT @@ -100,7 +115,7 @@ ocreat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 1, 1); + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; @@ -109,7 +124,7 @@ ocreat() return; open1(ip, FWRITE, 0); } else - open1(ip, FWRITE|FTRUNCATE, 0); + open1(ip, FWRITE|FTRUNCATE, 1); } #endif @@ -145,7 +160,7 @@ open1(ip, mode, checkpermissions) * while doing so in case we block inside flocki. */ flags = 0; - if (mode&(FRDLOCK|FWRLOCK)) { + if (mode&(FSHLOCK|FEXLOCK)) { iunlock(ip); flags = flocki(ip, 0, mode); ilock(ip); @@ -153,7 +168,7 @@ open1(ip, mode, checkpermissions) goto bad; } if (mode&FTRUNCATE) - itrunc(ip, 0); + itrunc(ip, (u_long)0); iunlock(ip); if ((fp = falloc()) == NULL) goto out; @@ -189,7 +204,7 @@ mknod() uap = (struct a *)u.u_ap; if (suser()) { - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip != NULL) { u.u_error = EEXIST; goto out; @@ -225,10 +240,10 @@ link() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); /* well, this routine is doomed anyhow */ + ip = namei(uchar, LOOKUP, 1); /* well, this routine is doomed anyhow */ if (ip == NULL) return; - if ((ip->i_mode&IFMT)==IFDIR && !suser()) { + if ((ip->i_mode&IFMT) == IFDIR && !suser()) { iput(ip); return; } @@ -237,7 +252,7 @@ link() iupdat(ip, &time, &time, 1); iunlock(ip); u.u_dirp = (caddr_t)uap->linkname; - xp = namei(uchar, 1, 0); + xp = namei(uchar, CREATE, 0); if (xp != NULL) { u.u_error = EEXIST; iput(xp); @@ -284,7 +299,7 @@ symlink() nc++; } u.u_dirp = uap->linkname; - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip) { iput(ip); u.u_error = EEXIST; @@ -296,6 +311,7 @@ symlink() if (ip == NULL) return; u.u_error = rdwri(UIO_WRITE, ip, uap->target, nc, 0, 0, (int *)0); + /* handle u.u_error != 0 */ iput(ip); } @@ -306,34 +322,21 @@ symlink() */ unlink() { - register struct inode *ip, *pp; struct a { char *fname; }; - int unlinkingdot = 0; + register struct inode *ip, *dp; - pp = namei(uchar, 2, 0); - if (pp == NULL) + ip = namei(uchar, DELETE | LOCKPARENT, 0); + if (ip == NULL) return; - - /* - * Check for unlink(".") - * to avoid hanging on the iget - */ - if (pp->i_number == u.u_dent.d_ino) { - ip = pp; - ip->i_count++; - unlinkingdot++; - } else - ip = iget(pp->i_dev, pp->i_fs, u.u_dent.d_ino); - if(ip == NULL) - goto out1; - if((ip->i_mode&IFMT)==IFDIR && !suser()) + dp = u.u_pdir; + if ((ip->i_mode&IFMT) == IFDIR && !suser()) goto out; /* * Don't unlink a mounted file. */ - if (ip->i_dev != pp->i_dev) { + if (ip->i_dev != dp->i_dev) { u.u_error = EBUSY; goto out; } @@ -344,12 +347,11 @@ unlink() ip->i_flag |= ICHG; } out: - if (unlinkingdot) + if (dp == ip) irele(ip); else iput(ip); -out1: - iput(pp); + iput(dp); } /* @@ -397,13 +399,13 @@ saccess() svgid = u.u_gid; u.u_uid = u.u_ruid; u.u_gid = u.u_rgid; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip != NULL) { - if (uap->fmode&FACCESS_READ && access(ip, IREAD)) + if ((uap->fmode&FACCESS_READ) && access(ip, IREAD)) goto done; - if (uap->fmode&FACCESS_WRITE && access(ip, IWRITE)) + if ((uap->fmode&FACCESS_WRITE) && access(ip, IWRITE)) goto done; - if (uap->fmode&FACCESS_EXECUTE && access(ip, IEXEC)) + if ((uap->fmode&FACCESS_EXECUTE) && access(ip, IEXEC)) goto done; done: iput(ip); @@ -445,7 +447,7 @@ stat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; stat1(ip, uap->sb); @@ -464,7 +466,7 @@ lstat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; stat1(ip, uap->sb); @@ -520,7 +522,7 @@ readlink() } *uap = (struct a *)u.u_ap; int resid; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; if ((ip->i_mode&IFMT) != IFLNK) { @@ -533,6 +535,9 @@ out: u.u_r.r_val1 = uap->count - resid; } +/* + * Change mode of a file given path name. + */ chmod() { struct inode *ip; @@ -545,8 +550,12 @@ chmod() if ((ip = owner(1)) == NULL) return; chmod1(ip, uap->fmode); + iput(ip); } +/* + * Change mode of a file given a file descriptor. + */ fchmod() { struct a { @@ -565,14 +574,17 @@ fchmod() return; } ip = fp->f_inode; - ilock(ip); - if (u.u_uid != ip->i_uid && !suser()) { - iunlock(ip); + if (u.u_uid != ip->i_uid && !suser()) return; - } + ilock(ip); chmod1(ip, uap->fmode); + iunlock(ip); } +/* + * Change the mode on a file. + * Inode must be locked before calling. + */ chmod1(ip, mode) register struct inode *ip; register int mode; @@ -598,9 +610,11 @@ ok: ip->i_flag |= ICHG; if (ip->i_flag&ITEXT && (ip->i_mode&ISVTX)==0) xrele(ip); - iput(ip); } +/* + * Set ownership given a path name. + */ chown() { struct inode *ip; @@ -614,8 +628,12 @@ chown() if (!suser() || (ip = owner(0)) == NULL) return; chown1(ip, uap->uid, uap->gid); + iput(ip); } +/* + * Set ownership given a file descriptor. + */ fchown() { struct a { @@ -635,12 +653,11 @@ fchown() return; } ip = fp->f_inode; - ilock(ip); - if (!suser()) { - iunlock(ip); + if (!suser()) return; - } + ilock(ip); chown1(ip, uap->uid, uap->gid); + iunlock(ip); } /* @@ -678,8 +695,8 @@ chown1(ip, uid, gid) change = fragroundup(fs, ip->i_size); change /= DEV_BSIZE; } - chkdq(ip, -change, 1); - chkiq(ip->i_dev, ip, ip->i_uid, 1); + (void)chkdq(ip, -change, 1); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 1); dqrele(ip->i_dquot); #endif /* @@ -695,10 +712,9 @@ chown1(ip, uid, gid) ip->i_mode &= ~(ISUID|ISGID); #ifdef QUOTA ip->i_dquot = inoquota(ip); - chkdq(ip, change, 1); - chkiq(ip->i_dev, NULL, uid, 1); + (void)chkdq(ip, change, 1); + (void)chkiq(ip->i_dev, (struct inode *)NULL, uid, 1); #endif - iput(ip); } /* @@ -729,12 +745,18 @@ outime() iput(ip); } +/* + * Flush any pending I/O. + */ sync() { update(); } +/* + * Apply an advisory lock on a file descriptor. + */ flock() { struct a { @@ -753,35 +775,38 @@ flock() return; } cmd = uap->how; - flags = u.u_pofile[uap->fd] & (RDLOCK|WRLOCK); + flags = u.u_pofile[uap->fd] & (SHLOCK|EXLOCK); if (cmd&FUNLOCK) { if (flags == 0) { u.u_error = EINVAL; return; } funlocki(fp->f_inode, flags); - u.u_pofile[uap->fd] &= ~(RDLOCK|WRLOCK); + u.u_pofile[uap->fd] &= ~(SHLOCK|EXLOCK); return; } /* * No reason to write lock a file we've already * write locked, similarly with a read lock. */ - if ((flags&WRLOCK) && (cmd&FWRLOCK) || - (flags&RDLOCK) && (cmd&FRDLOCK)) + if ((flags&EXLOCK) && (cmd&FEXLOCK) || + (flags&SHLOCK) && (cmd&FSHLOCK)) return; u.u_pofile[uap->fd] = flocki(fp->f_inode, u.u_pofile[uap->fd], cmd); } +/* + * Truncate a file given its path name. + */ truncate() { struct a { char *fname; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; if (access(ip, IWRITE)) @@ -791,16 +816,18 @@ truncate() goto bad; } itrunc(ip, uap->length); - return; bad: iput(ip); } +/* + * Truncate a file given a file descriptor. + */ ftruncate() { struct a { int fd; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; struct file *fp; @@ -819,17 +846,267 @@ ftruncate() ip = fp->f_inode; ilock(ip); itrunc(ip, uap->length); + iunlock(ip); +} + +/* + * Synch an open file. + */ +fsync() +{ + struct a { + int fd; + } *uap = (struct a *)u.u_ap; + struct inode *ip; + struct file *fp; + + fp = getf(uap->fd); + if (fp == NULL) + return; + if (fp->f_type == DTYPE_SOCKET) { + u.u_error = EINVAL; + return; + } + ip = fp->f_inode; + ilock(ip); + syncip(ip); + iunlock(ip); } +/* + * Rename system call. + * rename("foo", "bar"); + * is essentially + * unlink("bar"); + * link("foo", "bar"); + * unlink("foo"); + * but ``atomically''. Can't do full commit without saving state in the + * inode on disk which isn't feasible at this time. Best we can do is + * always guarantee the target exists. + * + * Basic algorithm is: + * + * 1) Bump link count on source while we're linking it to the + * target. This also insure the inode won't be deleted out + * from underneath us while we work. + * 2) Link source to destination. If destination already exists, + * delete it first. + * 3) Unlink source reference to inode if still around. + * 4) If a directory was moved and the parent of the destination + * is different from the source, patch the ".." entry in the + * directory. + * + * Source and destination must either both be directories, or both + * not be directories. If target is a directory, it must be empty. + */ rename() { -#ifdef notdef struct a { char *from; char *to; } *uap; -#endif + register struct inode *ip, *xp, *dp; + int oldparent, parentdifferent, doingdirectory; + + uap = (struct a *)u.u_ap; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) + return; + dp = u.u_pdir; + oldparent = 0, doingdirectory = 0; + if ((ip->i_mode&IFMT) == IFDIR) { + register struct direct *d; + + d = &u.u_dent; + /* + * Avoid "." and ".." for obvious reasons. + */ + if (d->d_name[0] == '.') { + if (d->d_namlen == 1 || + (d->d_namlen == 2 && d->d_name[1] == '.')) { + u.u_error = EINVAL; + iput(ip); + return; + } + } + oldparent = dp->i_number; + doingdirectory++; + } + irele(dp); + + /* + * 1) Bump link count while we're moving stuff + * around. If we crash somewhere before + * completing our work, the link count + * may be wrong, but correctable. + */ + ip->i_nlink++; + ip->i_flag |= ICHG; + iupdat(ip, &time, &time, 1); + iunlock(ip); + + /* + * When the target exists, both the directory + * and target inodes are returned locked. + */ + u.u_dirp = (caddr_t)uap->to; + xp = namei(uchar, CREATE | LOCKPARENT, 0); + if (u.u_error) + goto out; + dp = u.u_pdir; + /* + * 2) If target doesn't exist, link the target + * to the source and unlink the source. + * Otherwise, rewrite the target directory + * entry to reference the source inode and + * expunge the original entry's existence. + */ + parentdifferent = oldparent != dp->i_number; + if (xp == NULL) { + if (dp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Account for ".." in directory. + * When source and destination have the + * same parent we don't fool with the + * link count -- this isn't required + * because we do a similar check below. + */ + if (doingdirectory && parentdifferent) { + dp->i_nlink++; + dp->i_flag |= ICHG; + iupdat(dp, &time, &time, 1); + } + direnter(ip); + if (u.u_error) + goto out; + } else { + if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Target must be empty if a directory. + * Also, insure source and target are + * compatible (both directories, or both + * not directories). + */ + if ((xp->i_mode&IFMT) == IFDIR) { + if (!dirempty(xp)) { + u.u_error = EEXIST; /* XXX */ + goto bad; + } + if (!doingdirectory) { + u.u_error = ENOTDIR; + goto bad; + } + } else if (doingdirectory) { + u.u_error = EISDIR; + goto bad; + } + dirrewrite(dp, ip); + if (u.u_error) + goto bad1; + /* + * If this is a directory we know it is + * empty and we can squash the inode and + * any space associated with it. Otherwise, + * we've got a plain file and the link count + * simply needs to be adjusted. + */ + if (doingdirectory) { + xp->i_nlink = 0; + itrunc(xp, (u_long)0); + } else + xp->i_nlink--; + xp->i_flag |= ICHG; + iput(xp); + } + + /* + * 3) Unlink the source. + */ + u.u_dirp = uap->from; + dp = namei(uchar, DELETE, 0); + /* + * Insure directory entry still exists and + * has not changed since the start of all + * this. If either has occured, forget about + * about deleting the original entry and just + * adjust the link count in the inode. + */ + if (dp == NULL || u.u_dent.d_ino != ip->i_number) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } else { + /* + * If source is a directory, must adjust + * link count of parent directory also. + * If target didn't exist and source and + * target have the same parent, then we + * needn't touch the link count, it all + * balances out in the end. Otherwise, we + * must do so to reflect deletion of ".." + * done above. + */ + if (doingdirectory && (xp != NULL || parentdifferent)) { + dp->i_nlink--; + dp->i_flag |= ICHG; + } + if (dirremove()) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } + } + irele(ip); + if (dp) + iput(dp); + + /* + * 4) Renaming a directory with the parent + * different requires ".." to be rewritten. + * The window is still there for ".." to + * be inconsistent, but this is unavoidable, + * and a lot shorter than when it was done + * in a user process. + */ + if (doingdirectory && parentdifferent && u.u_error == 0) { + struct dirtemplate dirbuf; + u.u_dirp = uap->to; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) { + printf("rename: .. went away\n"); + return; + } + dp = u.u_pdir; + if ((ip->i_mode&IFMT) != IFDIR) { + printf("rename: .. not a directory\n"); + goto stuck; + } + u.u_error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + if (u.u_error == 0) { + dirbuf.dotdot_ino = dp->i_number; + (void) rdwri(UIO_WRITE, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + } +stuck: + irele(dp); + iput(ip); + } + return; +bad: + iput(u.u_pdir); +bad1: + if (xp) + irele(xp); +out: + ip->i_nlink--; + ip->i_flag |= ICHG; + irele(ip); } /* diff --git a/usr/src/sys/kern/vfs_vnops.c b/usr/src/sys/kern/vfs_vnops.c index af6509cd7e..ec06aa8906 100644 --- a/usr/src/sys/kern/vfs_vnops.c +++ b/usr/src/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* vfs_vnops.c 4.29 82/10/31 */ +/* vfs_vnops.c 4.30 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -14,6 +14,7 @@ #include "../h/socket.h" #include "../h/socketvar.h" #include "../h/proc.h" +#include "../h/nami.h" /* * Openi called to allow handler @@ -108,7 +109,7 @@ owner(follow) { register struct inode *ip; - ip = namei(uchar, 0, follow); + ip = namei(uchar, LOOKUP, follow); if (ip == NULL) return (NULL); if (u.u_uid == ip->i_uid) diff --git a/usr/src/sys/ufs/ffs/ffs_alloc.c b/usr/src/sys/ufs/ffs/ffs_alloc.c index 921eee1cb5..3bbefd19da 100644 --- a/usr/src/sys/ufs/ffs/ffs_alloc.c +++ b/usr/src/sys/ufs/ffs/ffs_alloc.c @@ -1,4 +1,4 @@ -/* ffs_alloc.c 2.18 82/10/21 */ +/* ffs_alloc.c 2.19 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -13,8 +13,8 @@ #include "../h/kernel.h" extern u_long hashalloc(); -extern u_long ialloccg(); -extern u_long alloccg(); +extern ino_t ialloccg(); +extern daddr_t alloccg(); extern daddr_t alloccgblk(); extern daddr_t fragextend(); extern daddr_t blkpref(); @@ -74,7 +74,8 @@ alloc(ip, bpref, size) cg = itog(fs, ip->i_number); else cg = dtog(fs, bpref); - bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, alloccg); + bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, + (u_long (*)())alloccg); if (bno <= 0) goto nospace; bp = getblk(ip->i_dev, fsbtodb(fs, bno), size); @@ -137,12 +138,13 @@ realloccg(ip, bprev, bpref, osize, nsize) } } while (brealloc(bp, nsize) == 0); bp->b_flags |= B_DONE; - bzero(bp->b_un.b_addr + osize, nsize - osize); + bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize); return (bp); } if (bpref >= fs->fs_size) bpref = 0; - bno = (daddr_t)hashalloc(ip, cg, (long)bpref, nsize, alloccg); + bno = (daddr_t)hashalloc(ip, cg, (long)bpref, nsize, + (u_long (*)())alloccg); if (bno > 0) { obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize); if (obp->b_flags & B_ERROR) { @@ -151,9 +153,9 @@ realloccg(ip, bprev, bpref, osize, nsize) } bp = getblk(ip->i_dev, fsbtodb(fs, bno), nsize); bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize); - bzero(bp->b_un.b_addr + osize, nsize - osize); + bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize); brelse(obp); - fre(ip, bprev, (off_t)osize); + free(ip, bprev, (off_t)osize); return (bp); } nospace: @@ -196,7 +198,7 @@ ialloc(pip, ipref, mode) if (fs->fs_cstotal.cs_nifree == 0) goto noinodes; #ifdef QUOTA - if (chkiq(pip->i_dev, NULL, u.u_uid, 0)) + if (chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0)) return(NULL); #endif if (ipref >= fs->fs_ncg * fs->fs_ipg) @@ -230,6 +232,7 @@ noinodes: * among those cylinder groups with above the average number of * free inodes, the one with the smallest number of directories. */ +ino_t dirpref(fs) register struct fs *fs; { @@ -244,35 +247,85 @@ dirpref(fs) mincg = cg; minndir = fs->fs_cs(fs, cg).cs_ndir; } - return (fs->fs_ipg * mincg); + return ((ino_t)(fs->fs_ipg * mincg)); } /* - * Select a cylinder to place a large block of data. - * - * The policy implemented by this algorithm is to maintain a - * rotor that sweeps the cylinder groups. When a block is - * needed, the rotor is advanced until a cylinder group with - * greater than the average number of free blocks is found. + * Select the desired position for the next block in a file. The file is + * logically divided into sections. The first section is composed of the + * direct blocks. Each additional section contains fs_maxbpg blocks. + * + * If no blocks have been allocated in the first section, the policy is to + * request a block in the same cylinder group as the inode that describes + * the file. If no blocks have been allocated in any other section, the + * policy is to place the section in a cylinder group with a greater than + * average number of free blocks. An appropriate cylinder group is found + * by maintaining a rotor that sweeps the cylinder groups. When a new + * group of blocks is needed, the rotor is advanced until a cylinder group + * with greater than the average number of free blocks is found. + * + * If a section is already partially allocated, the policy is to + * contiguously allocate fs_maxcontig blocks. The end of one of these + * contiguous blocks and the beginning of the next is physically separated + * so that the disk head will be in transit between them for at least + * fs_rotdelay milliseconds. This is to allow time for the processor to + * schedule another I/O transfer. */ daddr_t -blkpref(fs) - register struct fs *fs; +blkpref(ip, lbn, indx, bap) + struct inode *ip; + daddr_t lbn; + int indx; + daddr_t *bap; { + register struct fs *fs; int cg, avgbfree; + daddr_t nextblk; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - for (cg = fs->fs_cgrotor + 1; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - for (cg = 0; cg <= fs->fs_cgrotor; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; + fs = ip->i_fs; + if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { + if (lbn < NDADDR) { + cg = itog(fs, ip->i_number); return (fs->fs_fpg * cg + fs->fs_frag); } - return (NULL); + /* + * Find a cylinder with greater than average number of + * unused data blocks. + */ + avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; + for (cg = fs->fs_cgrotor + 1; cg < fs->fs_ncg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + for (cg = 0; cg <= fs->fs_cgrotor; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + return (NULL); + } + /* + * One or more previous blocks have been laid out. If less + * than fs_maxcontig previous blocks are contiguous, the + * next block is requested contiguously, otherwise it is + * requested rotationally delayed by fs_rotdelay milliseconds. + */ + nextblk = bap[indx - 1] + fs->fs_frag; + if (indx > fs->fs_maxcontig && + bap[indx - fs->fs_maxcontig] + fs->fs_frag * fs->fs_maxcontig + != nextblk) + return (nextblk); + if (fs->fs_rotdelay != 0) + /* + * Here we convert ms of delay to frags as: + * (frags) = (ms) * (rev/sec) * (sect/rev) / + * ((sect/frag) * (ms/sec)) + * then round up to the next block. + */ + nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / + (NSPF(fs) * 1000), fs->fs_frag); + return (nextblk); } /* @@ -400,7 +453,7 @@ fragextend(ip, cg, bprev, osize, nsize) * Check to see if a block of the apprpriate size is available, * and if it is, allocate it. */ -u_long +daddr_t alloccg(ip, cg, bpref, size) struct inode *ip; int cg; @@ -505,14 +558,10 @@ alloccgblk(fs, cgp, bpref) /* * if the requested block is available, use it */ -/* - * disallow sequential layout. - * if (isblock(fs, cgp->cg_free, bpref/fs->fs_frag)) { bno = bpref; goto gotit; } - */ /* * check for a block available on the same cylinder */ @@ -527,29 +576,13 @@ alloccgblk(fs, cgp, bpref) bpref = howmany(fs->fs_spc * cylno, NSPF(fs)); goto norot; } - /* - * find a block that is rotationally optimal - */ - cylbp = cgp->cg_b[cylno]; - if (fs->fs_rotdelay == 0) { - pos = cbtorpos(fs, bpref); - } else { - /* - * here we convert ms of delay to frags as: - * (frags) = (ms) * (rev/sec) * (sect/rev) / - * ((sect/frag) * (ms/sec)) - * then round up to the next rotational position - */ - bpref += fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / - (NSPF(fs) * 1000); - pos = cbtorpos(fs, bpref); - pos = (pos + 1) % NRPOS; - } /* * check the summary information to see if a block is * available in the requested cylinder starting at the - * optimal rotational position and proceeding around. + * requested rotational position and proceeding around. */ + cylbp = cgp->cg_b[cylno]; + pos = cbtorpos(fs, bpref); for (i = pos; i < NRPOS; i++) if (cylbp[i] > 0) break; @@ -612,7 +645,7 @@ gotit: * 2) allocate the next available inode after the requested * inode in the specified cylinder group. */ -u_long +ino_t ialloccg(ip, cg, ipref, mode) struct inode *ip; int cg; @@ -673,7 +706,7 @@ gotit: * free map. If a fragment is deallocated, a possible * block reassembly is checked. */ -fre(ip, bno, size) +free(ip, bno, size) register struct inode *ip; daddr_t bno; off_t size; @@ -873,37 +906,6 @@ mapsearch(fs, cgp, bpref, allocsiz) return (-1); } -/* - * Getfs maps a device number into a pointer to the incore super block. - * - * The algorithm is a linear search through the mount table. A - * consistency check of the super block magic number is performed. - * - * panic: no fs -- the device is not mounted. - * this "cannot happen" - */ -struct fs * -getfs(dev) - dev_t dev; -{ - register struct mount *mp; - register struct fs *fs; - - for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) { - if (mp->m_bufp == NULL || mp->m_dev != dev) - continue; - fs = mp->m_bufp->b_un.b_fs; - if (fs->fs_magic != FS_MAGIC) { - printf("dev = 0x%x, fs = %s\n", dev, fs->fs_fsmnt); - panic("getfs: bad magic"); - } - return (fs); - } - printf("dev = 0x%x\n", dev); - panic("getfs: no fs"); - return (NULL); -} - /* * Fserr prints the name of a file system with an error diagnostic. * @@ -917,82 +919,3 @@ fserr(fs, cp) printf("%s: %s\n", fs->fs_fsmnt, cp); } - -/* - * Getfsx returns the index in the file system - * table of the specified device. The swap device - * is also assigned a pseudo-index. The index may - * be used as a compressed indication of the location - * of a block, recording - * - * rather than - * - * provided the information need remain valid only - * as long as the file system is mounted. - */ -getfsx(dev) - dev_t dev; -{ - register struct mount *mp; - - if (dev == swapdev) - return (MSWAPX); - for(mp = &mount[0]; mp < &mount[NMOUNT]; mp++) - if (mp->m_dev == dev) - return (mp - &mount[0]); - return (-1); -} - -/* - * Update is the internal name of 'sync'. It goes through the disk - * queues to initiate sandbagged IO; goes through the inodes to write - * modified nodes; and it goes through the mount table to initiate - * the writing of the modified super blocks. - */ -update() -{ - register struct inode *ip; - register struct mount *mp; - struct fs *fs; - - if (updlock) - return; - updlock++; - /* - * Write back modified superblocks. - * Consistency check that the superblock - * of each file system is still in the buffer cache. - */ - for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) { - if (mp->m_bufp == NULL) - continue; - fs = mp->m_bufp->b_un.b_fs; - if (fs->fs_fmod == 0) - continue; - if (fs->fs_ronly != 0) { /* XXX */ - printf("fs = %s\n", fs->fs_fsmnt); - panic("update: rofs mod"); - } - fs->fs_fmod = 0; - fs->fs_time = time.tv_sec; - sbupdate(mp); - } - /* - * Write back each (modified) inode. - */ - for (ip = inode; ip < inodeNINODE; ip++) { - if ((ip->i_flag & ILOCKED) != 0 || ip->i_count == 0) - continue; - ip->i_flag |= ILOCKED; - ip->i_count++; - iupdat(ip, &time, &time, 0); - iput(ip); - } - updlock = 0; - /* - * Force stale buffer cache information to be flushed, - * for all devices. - */ - bflush(NODEV); -} - diff --git a/usr/src/sys/ufs/ffs/ffs_balloc.c b/usr/src/sys/ufs/ffs/ffs_balloc.c index d98232864c..50276921fb 100644 --- a/usr/src/sys/ufs/ffs/ffs_balloc.c +++ b/usr/src/sys/ufs/ffs/ffs_balloc.c @@ -1,4 +1,4 @@ -/* ffs_balloc.c 5.2 82/09/25 */ +/* ffs_balloc.c 5.3 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -51,8 +51,8 @@ bmap(ip, bn, rwflg, size) osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { bp = realloccg(ip, ip->i_db[nb], - blkpref(ip, nb, nb, &ip->i_db[0]), - osize, fs->fs_bsize); + blkpref(ip, nb, (int)nb, &ip->i_db[0]), + osize, (int)fs->fs_bsize); ip->i_size = (nb + 1) * fs->fs_bsize; ip->i_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IUPD|ICHG; @@ -77,7 +77,7 @@ bmap(ip, bn, rwflg, size) if (nsize <= osize) goto gotit; bp = realloccg(ip, nb, - blkpref(ip, bn, bn, &ip->i_db[0]), + blkpref(ip, bn, (int)bn, &ip->i_db[0]), osize, nsize); } else { if (ip->i_size < (bn + 1) * fs->fs_bsize) @@ -85,7 +85,7 @@ bmap(ip, bn, rwflg, size) else nsize = fs->fs_bsize; bp = alloc(ip, - blkpref(ip, bn, bn, &ip->i_db[0]), + blkpref(ip, bn, (int)bn, &ip->i_db[0]), nsize); } if (bp == NULL) @@ -136,8 +136,8 @@ gotit: if (nb == 0) { if (rwflg == B_READ) return ((daddr_t)-1); - pref = blkpref(ip, lbn, 0, 0); - bp = alloc(ip, pref, fs->fs_bsize); + pref = blkpref(ip, lbn, 0, (daddr_t *)0); + bp = alloc(ip, pref, (int)fs->fs_bsize); if (bp == NULL) return ((daddr_t)-1); nb = dbtofsb(fs, bp->b_blkno); @@ -154,7 +154,7 @@ gotit: * fetch through the indirect blocks */ for (; j <= NIADDR; j++) { - bp = bread(ip->i_dev, fsbtodb(fs, nb), fs->fs_bsize); + bp = bread(ip->i_dev, fsbtodb(fs, nb), (int)fs->fs_bsize); if (bp->b_flags & B_ERROR) { brelse(bp); return ((daddr_t)0); @@ -170,10 +170,11 @@ gotit: } if (pref == 0) if (j < NIADDR) - pref = blkpref(ip, lbn, 0, 0); + pref = blkpref(ip, lbn, 0, + (daddr_t *)0); else pref = blkpref(ip, lbn, i, &bap[0]); - nbp = alloc(ip, pref, fs->fs_bsize); + nbp = alloc(ip, pref, (int)fs->fs_bsize); if (nbp == NULL) { brelse(bp); return ((daddr_t)-1); diff --git a/usr/src/sys/ufs/ffs/ffs_inode.c b/usr/src/sys/ufs/ffs/ffs_inode.c index 9a290f9b7e..23df4545fe 100644 --- a/usr/src/sys/ufs/ffs/ffs_inode.c +++ b/usr/src/sys/ufs/ffs/ffs_inode.c @@ -1,4 +1,4 @@ -/* ffs_inode.c 4.30 82/10/23 */ +/* ffs_inode.c 4.31 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -236,14 +236,14 @@ irele(ip) if (ip->i_count == 1) { ip->i_flag |= ILOCKED; if (ip->i_nlink <= 0) { - itrunc(ip, 0); + itrunc(ip, (u_long)0); mode = ip->i_mode; ip->i_mode = 0; ip->i_rdev = 0; ip->i_flag |= IUPD|ICHG; ifree(ip, ip->i_number, mode); #ifdef QUOTA - chkiq(ip->i_dev, ip, ip->i_uid, 0); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 0); dqrele(ip->i_dquot); ip->i_dquot = NODQUOT; #endif @@ -325,146 +325,253 @@ iupdat(ip, ta, tm, waitfor) */ itrunc(ip, length) register struct inode *ip; - register int length; + u_long length; { register i; - daddr_t bn; - struct inode itmp; + register daddr_t lastblock; + daddr_t bn, lastdiblock, lastsiblock; register struct fs *fs; + int j; #ifdef QUOTA - register long cnt = 0; - long tloop(); + long blocksreleased = 0, nblocks; + long indirtrunc(); #endif - /* - * Only plain files, directories and symbolic - * links contain blocks. - */ - i = ip->i_mode & IFMT; - if (i != IFREG && i != IFDIR && i != IFLNK) - return; + if (ip->i_size <= length) return; - +#ifdef notdef + /* this is superfluous given size check above */ + i = ip->i_mode & IFMT; + if (i != IFREG && i != IFDIR && i != IFLNK) { + printf("itrunc: i# %d, size %d\n", ip->i_number, ip->i_size); + return; + } +#endif /* - * Clean inode on disk before freeing blocks - * to insure no duplicates if system crashes. + * Update size of file on disk before + * we start freeing blocks. If we crash + * while free'ing blocks below, the file + * size will be believed and the blocks + * returned to the free list. + * After updating the copy on disk we + * put the old size back so macros like + * blksize will work. */ - itmp = *ip; - itmp.i_size = length; - for (i = 0; i < NDADDR; i++) - itmp.i_db[i] = 0; - for (i = 0; i < NIADDR; i++) - itmp.i_ib[i] = 0; - itmp.i_flag |= ICHG|IUPD; - iupdat(&itmp, &time, &time, 1); - ip->i_flag &= ~(IUPD|IACC|ICHG); + j = ip->i_size; + ip->i_size = length; + ip->i_flag |= ICHG|IUPD; + iupdat(ip, &time, &time, 1); + ip->i_size = j; /* - * Now return blocks to free list... if machine - * crashes, they will be harmless MISSING blocks. + * Calculate last direct, single indirect and + * double indirect block (if any) which we want + * to keep. Lastblock is -1 when the file is + * truncated to 0. */ fs = ip->i_fs; + lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; + lastsiblock = lastblock - NDADDR; + lastdiblock = lastsiblock - NINDIR(fs); +#ifdef QUOTA + nblocks = fs->fs_bsize / DEV_BSIZE; +#endif /* - * release double indirect block first + * Double indirect block first */ - bn = ip->i_ib[NIADDR-1]; - if (bn != (daddr_t)0) { - ip->i_ib[NIADDR - 1] = (daddr_t)0; + bn = ip->i_ib[NIADDR - 1]; + if (bn != 0) { + /* + * If lastdiblock is negative, it's value + * is meaningless; in this case we set it to + * -NINDIR(fs) so calculations performed in + * indirtrunc come out right. + */ + if (lastdiblock < 0) + lastdiblock -= lastsiblock; #ifdef QUOTA - cnt += + blocksreleased += #endif - tloop(ip, bn, 1); + indirtrunc(ip, bn, lastdiblock, 1); + if (lastdiblock < 0) { + ip->i_ib[NIADDR - 1] = 0; + free(ip, bn, (off_t)fs->fs_bsize); +#ifdef QUOTA + blocksreleased += nblocks; +#endif + } } + if (lastdiblock >= 0) + goto done; /* - * release single indirect blocks second + * Single indirect blocks second. + * First, those which can be totally + * zapped, then possibly one which + * needs to be partially cleared. */ - for (i = NIADDR - 2; i >= 0; i--) { + j = lastsiblock < 0 ? -1 : lastsiblock / NINDIR(fs); + for (i = NIADDR - 2; i > j; i--) { bn = ip->i_ib[i]; - if (bn != (daddr_t)0) { - ip->i_ib[i] = (daddr_t)0; + if (bn != 0) { #ifdef QUOTA - cnt += + blocksreleased += nblocks + #endif - tloop(ip, bn, 0); + indirtrunc(ip, bn, (daddr_t)-1, 0); + ip->i_ib[i] = 0; + free(ip, bn, (off_t)fs->fs_bsize); } } + if (lastsiblock >= 0) { + bn = ip->i_ib[j]; + if (bn != 0) +#ifdef QUOTA + blocksreleased += +#endif + indirtrunc(ip, bn, lastsiblock, 0); + goto done; + } /* - * finally release direct blocks + * All whole direct blocks. */ - for (i = NDADDR - 1; i>=0; i--) { + for (i = NDADDR - 1; i > lastblock; i--) { + register int size; + bn = ip->i_db[i]; - if (bn == (daddr_t)0) + if (bn == 0) continue; - ip->i_db[i] = (daddr_t)0; -#ifndef QUOTA - fre(ip, bn, (off_t)blksize(fs, ip, i)); -#else - { int size; - fre(ip, bn, size = (off_t)blksize(fs, ip, i)); - cnt += size / DEV_BSIZE; - } + ip->i_db[i] = 0; + size = (off_t)blksize(fs, ip, i); + free(ip, bn, size); +#ifdef QUOTA + blocksreleased += size / DEV_BSIZE; +#endif + } + /* + * Finally, look for a change in size of the + * last direct block; release any frags. + */ + if (lastblock >= 0 && ip->i_db[lastblock] != 0) { + /* + * Calculate amount of space we're giving + * back as old block size minus new block size. + */ + i = blksize(fs, ip, lastblock); + ip->i_size = length; + i = i - blksize(fs, ip, lastblock); + if (i > 0) { + /* + * Block number of space to be free'd is + * the old block # plus the number of frags + * required for the storage we're keeping. + */ + bn = ip->i_db[lastblock] + + numfrags(fs, fs->fs_bsize - i); + free(ip, bn, i); +#ifdef QUOTA + blocksreleased += i / DEV_BSIZE; #endif + } } - ip->i_size = 0; +done: /* - * Inode was written and flags updated above. - * No need to modify flags here. + * Finished free'ing blocks, complete + * inode update to reflect new length. */ #ifdef QUOTA - (void) chkdq(ip, -cnt, 0); + (void) chkdq(ip, -blocksreleased, 0); #endif + ip->i_size = length; + ip->i_flag |= ICHG|IUPD; + iupdat(ip, &time, &time, 1); } +/* + * Release blocks associated with the inode ip and + * stored in the indirect block bn. Blocks are free'd + * in LIFO order up to (but not including) lastbn. If + * doubleindirect is indicated, this block is a double + * indirect block and recursive calls to indirtrunc must + * be used to cleanse single indirect blocks instead of + * a simple free. + */ #ifdef QUOTA long #endif -tloop(ip, bn, indflg) +indirtrunc(ip, bn, lastbn, doubleindirect) register struct inode *ip; - daddr_t bn; - int indflg; + daddr_t bn, lastbn; + int doubleindirect; { - register i; - register struct buf *bp; + register int i; + struct buf *bp; register daddr_t *bap; register struct fs *fs; - daddr_t nb; + daddr_t nb, last; #ifdef QUOTA - register long cnt = 0; + int blocksreleased = 0, nblocks; #endif bp = NULL; fs = ip->i_fs; - for (i = NINDIR(fs) - 1; i >= 0; i--) { + last = lastbn; + if (doubleindirect) + last /= NINDIR(fs); +#ifdef QUOTA + nblocks = fs->fs_bsize / DEV_BSIZE; +#endif + for (i = NINDIR(fs) - 1; i > last; i--) { if (bp == NULL) { + struct buf *copy; + + copy = geteblk((int)fs->fs_bsize); bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); - if (bp->b_flags & B_ERROR) { + if (bp->b_flags&B_ERROR) { + brelse(copy); brelse(bp); - return; + return (NULL); } bap = bp->b_un.b_daddr; + /* + * Update pointers before freeing blocks. + * If we crash before freeing the blocks + * they'll be recovered as missing. + */ + bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, + (u_int)fs->fs_bsize); + bzero((caddr_t)&bap[last + 1], + (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); + bwrite(bp); + bp = copy, bap = bp->b_un.b_daddr; } nb = bap[i]; - if (nb == (daddr_t)0) + if (nb == 0) continue; - if (indflg) { + if (doubleindirect) #ifdef QUOTA - cnt += + blocksreleased += #endif - tloop(ip, nb, 0); - } else { - fre(ip, nb, (int)fs->fs_bsize); + indirtrunc(ip, nb, (daddr_t)-1, 0); + free(ip, nb, (int)fs->fs_bsize); #ifdef QUOTA - cnt += fs->fs_bsize / DEV_BSIZE; + blocksreleased += nblocks; #endif - } + } + if (doubleindirect && lastbn >= 0) { + last = lastbn % NINDIR(fs); + if (bp == NULL) + panic("indirtrunc"); + nb = bap[i]; + if (nb != 0) +#ifdef QUOTA + blocksreleased += +#endif + indirtrunc(ip, nb, last, 0); } if (bp != NULL) brelse(bp); - fre(ip, bn, (int)fs->fs_bsize); #ifdef QUOTA - cnt += fs->fs_bsize / DEV_BSIZE; - return(cnt); + return (blocksreleased); #endif } diff --git a/usr/src/sys/ufs/ffs/ffs_subr.c b/usr/src/sys/ufs/ffs/ffs_subr.c index 9e299f2e43..0284376f1e 100644 --- a/usr/src/sys/ufs/ffs/ffs_subr.c +++ b/usr/src/sys/ufs/ffs/ffs_subr.c @@ -1,11 +1,110 @@ -/* ffs_subr.c 4.2 82/10/21 */ +/* ffs_subr.c 4.3 82/11/13 */ #ifdef KERNEL #include "../h/param.h" +#include "../h/systm.h" +#include "../h/mount.h" #include "../h/fs.h" +#include "../h/conf.h" +#include "../h/buf.h" +#include "../h/inode.h" +#include "../h/dir.h" +#include "../h/user.h" +#include "../h/quota.h" +#include "../h/kernel.h" #else #include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#endif + +#ifdef KERNEL +int syncprt = 0; + +/* + * Update is the internal name of 'sync'. It goes through the disk + * queues to initiate sandbagged IO; goes through the inodes to write + * modified nodes; and it goes through the mount table to initiate + * the writing of the modified super blocks. + */ +update() +{ + register struct inode *ip; + register struct mount *mp; + struct fs *fs; + + if (syncprt) + bufstats(); + if (updlock) + return; + updlock++; + /* + * Write back modified superblocks. + * Consistency check that the superblock + * of each file system is still in the buffer cache. + */ + for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) { + if (mp->m_bufp == NULL) + continue; + fs = mp->m_bufp->b_un.b_fs; + if (fs->fs_fmod == 0) + continue; + if (fs->fs_ronly != 0) { /* XXX */ + printf("fs = %s\n", fs->fs_fsmnt); + panic("update: rofs mod"); + } + fs->fs_fmod = 0; + fs->fs_time = time.tv_sec; + sbupdate(mp); + } + /* + * Write back each (modified) inode. + */ + for (ip = inode; ip < inodeNINODE; ip++) { + if ((ip->i_flag & ILOCKED) != 0 || ip->i_count == 0) + continue; + ip->i_flag |= ILOCKED; + ip->i_count++; + iupdat(ip, &time, &time, 0); + iput(ip); + } + updlock = 0; + /* + * Force stale buffer cache information to be flushed, + * for all devices. + */ + bflush(NODEV); +} + +/* + * Flush all the blocks associated with an inode. + * Note that we make a more stringent check of + * writing out any block in the buffer pool that may + * overlap the inode. This brings the inode up to + * date with recent mods to the cooked device. + */ +syncip(ip) + register struct inode *ip; +{ + register struct fs *fs; + long lbn, lastlbn; + daddr_t blkno; + + fs = ip->i_fs; + lastlbn = howmany(ip->i_size, fs->fs_bsize); + for (lbn = 0; lbn < lastlbn; lbn++) { + blkno = fsbtodb(fs, bmap(ip, lbn, B_READ)); + blkflush(ip->i_dev, blkno, blksize(fs, ip, lbn)); + } +} #endif extern int around[9]; @@ -148,3 +247,91 @@ setblock(fs, cp, h) panic("setblock"); } } + +#ifdef KERNEL +/* + * Getfs maps a device number into a pointer to the incore super block. + * + * The algorithm is a linear search through the mount table. A + * consistency check of the super block magic number is performed. + * + * panic: no fs -- the device is not mounted. + * this "cannot happen" + */ +struct fs * +getfs(dev) + dev_t dev; +{ + register struct mount *mp; + register struct fs *fs; + + for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) { + if (mp->m_bufp == NULL || mp->m_dev != dev) + continue; + fs = mp->m_bufp->b_un.b_fs; + if (fs->fs_magic != FS_MAGIC) { + printf("dev = 0x%x, fs = %s\n", dev, fs->fs_fsmnt); + panic("getfs: bad magic"); + } + return (fs); + } + printf("dev = 0x%x\n", dev); + panic("getfs: no fs"); + return (NULL); +} + +/* + * Getfsx returns the index in the file system + * table of the specified device. The swap device + * is also assigned a pseudo-index. The index may + * be used as a compressed indication of the location + * of a block, recording + * + * rather than + * + * provided the information need remain valid only + * as long as the file system is mounted. + */ +getfsx(dev) + dev_t dev; +{ + register struct mount *mp; + + if (dev == swapdev) + return (MSWAPX); + for(mp = &mount[0]; mp < &mount[NMOUNT]; mp++) + if (mp->m_dev == dev) + return (mp - &mount[0]); + return (-1); +} + +/* + * Print out statistics on the current allocation of the buffer pool. + * Can be enabled to print out on every ``sync'' by setting "syncprt" + * above. + */ +bufstats() +{ + int s, i, j, count; + register struct buf *bp, *dp; + int counts[MAXBSIZE/CLBYTES+1]; + static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; + + for (bp = bfreelist, i = 0; bp < &bfreelist[BQUEUES]; bp++, i++) { + count = 0; + for (j = 0; j <= MAXBSIZE/CLBYTES; j++) + counts[j] = 0; + s = spl6(); + for (dp = bp->av_forw; dp != bp; dp = dp->av_forw) { + counts[dp->b_bufsize/CLBYTES]++; + count++; + } + splx(s); + printf("%s: total-%d", bname[i], count); + for (j = 0; j <= MAXBSIZE/CLBYTES; j++) + if (counts[j] != 0) + printf(", %d-%d", j * CLBYTES, counts[j]); + printf("\n"); + } +} +#endif diff --git a/usr/src/sys/ufs/ffs/ffs_vnops.c b/usr/src/sys/ufs/ffs/ffs_vnops.c index e185d2abf7..e0ba4995e4 100644 --- a/usr/src/sys/ufs/ffs/ffs_vnops.c +++ b/usr/src/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* ffs_vnops.c 4.41 82/10/19 */ +/* ffs_vnops.c 4.42 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -16,13 +16,20 @@ #include "../h/uio.h" #include "../h/socket.h" #include "../h/socketvar.h" +#include "../h/nami.h" +/* + * Change current working directory (``.''). + */ chdir() { chdirec(&u.u_cdir); } +/* + * Change notion of root (``/'') directory. + */ chroot() { @@ -30,6 +37,9 @@ chroot() chdirec(&u.u_rdir); } +/* + * Common routine for chroot and chdir. + */ chdirec(ipp) register struct inode **ipp; { @@ -38,14 +48,14 @@ chdirec(ipp) char *fname; }; - ip = namei(uchar, 0, 1); - if(ip == NULL) + ip = namei(uchar, LOOKUP, 1); + if (ip == NULL) return; - if((ip->i_mode&IFMT) != IFDIR) { + if ((ip->i_mode&IFMT) != IFDIR) { u.u_error = ENOTDIR; goto bad; } - if(access(ip, IEXEC)) + if (access(ip, IEXEC)) goto bad; iunlock(ip); if (*ipp) @@ -68,23 +78,28 @@ open() int flags; int mode; } *uap; - int checkpermissions = 1; + int checkpermissions = 1, flags; uap = (struct a *)u.u_ap; - if (uap->flags&FCREATE) { - ip = namei(uchar, 1, 1); + flags = uap->flags + 1; + if ((flags&FTRUNCATE) && (flags&FWRITE) == 0) { + u.u_error = EINVAL; + return; + } + if (flags&FCREATE) { + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; ip = maknode(uap->mode&07777&(~ISVTX)); checkpermissions = 0; - uap->flags &= ~FTRUNCATE; + flags &= ~FTRUNCATE; } } else - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; - open1(ip, ++uap->flags, checkpermissions); + open1(ip, flags, checkpermissions); } #ifndef NOCOMPAT @@ -100,7 +115,7 @@ ocreat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 1, 1); + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; @@ -109,7 +124,7 @@ ocreat() return; open1(ip, FWRITE, 0); } else - open1(ip, FWRITE|FTRUNCATE, 0); + open1(ip, FWRITE|FTRUNCATE, 1); } #endif @@ -145,7 +160,7 @@ open1(ip, mode, checkpermissions) * while doing so in case we block inside flocki. */ flags = 0; - if (mode&(FRDLOCK|FWRLOCK)) { + if (mode&(FSHLOCK|FEXLOCK)) { iunlock(ip); flags = flocki(ip, 0, mode); ilock(ip); @@ -153,7 +168,7 @@ open1(ip, mode, checkpermissions) goto bad; } if (mode&FTRUNCATE) - itrunc(ip, 0); + itrunc(ip, (u_long)0); iunlock(ip); if ((fp = falloc()) == NULL) goto out; @@ -189,7 +204,7 @@ mknod() uap = (struct a *)u.u_ap; if (suser()) { - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip != NULL) { u.u_error = EEXIST; goto out; @@ -225,10 +240,10 @@ link() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); /* well, this routine is doomed anyhow */ + ip = namei(uchar, LOOKUP, 1); /* well, this routine is doomed anyhow */ if (ip == NULL) return; - if ((ip->i_mode&IFMT)==IFDIR && !suser()) { + if ((ip->i_mode&IFMT) == IFDIR && !suser()) { iput(ip); return; } @@ -237,7 +252,7 @@ link() iupdat(ip, &time, &time, 1); iunlock(ip); u.u_dirp = (caddr_t)uap->linkname; - xp = namei(uchar, 1, 0); + xp = namei(uchar, CREATE, 0); if (xp != NULL) { u.u_error = EEXIST; iput(xp); @@ -284,7 +299,7 @@ symlink() nc++; } u.u_dirp = uap->linkname; - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip) { iput(ip); u.u_error = EEXIST; @@ -296,6 +311,7 @@ symlink() if (ip == NULL) return; u.u_error = rdwri(UIO_WRITE, ip, uap->target, nc, 0, 0, (int *)0); + /* handle u.u_error != 0 */ iput(ip); } @@ -306,34 +322,21 @@ symlink() */ unlink() { - register struct inode *ip, *pp; struct a { char *fname; }; - int unlinkingdot = 0; + register struct inode *ip, *dp; - pp = namei(uchar, 2, 0); - if (pp == NULL) + ip = namei(uchar, DELETE | LOCKPARENT, 0); + if (ip == NULL) return; - - /* - * Check for unlink(".") - * to avoid hanging on the iget - */ - if (pp->i_number == u.u_dent.d_ino) { - ip = pp; - ip->i_count++; - unlinkingdot++; - } else - ip = iget(pp->i_dev, pp->i_fs, u.u_dent.d_ino); - if(ip == NULL) - goto out1; - if((ip->i_mode&IFMT)==IFDIR && !suser()) + dp = u.u_pdir; + if ((ip->i_mode&IFMT) == IFDIR && !suser()) goto out; /* * Don't unlink a mounted file. */ - if (ip->i_dev != pp->i_dev) { + if (ip->i_dev != dp->i_dev) { u.u_error = EBUSY; goto out; } @@ -344,12 +347,11 @@ unlink() ip->i_flag |= ICHG; } out: - if (unlinkingdot) + if (dp == ip) irele(ip); else iput(ip); -out1: - iput(pp); + iput(dp); } /* @@ -397,13 +399,13 @@ saccess() svgid = u.u_gid; u.u_uid = u.u_ruid; u.u_gid = u.u_rgid; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip != NULL) { - if (uap->fmode&FACCESS_READ && access(ip, IREAD)) + if ((uap->fmode&FACCESS_READ) && access(ip, IREAD)) goto done; - if (uap->fmode&FACCESS_WRITE && access(ip, IWRITE)) + if ((uap->fmode&FACCESS_WRITE) && access(ip, IWRITE)) goto done; - if (uap->fmode&FACCESS_EXECUTE && access(ip, IEXEC)) + if ((uap->fmode&FACCESS_EXECUTE) && access(ip, IEXEC)) goto done; done: iput(ip); @@ -445,7 +447,7 @@ stat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; stat1(ip, uap->sb); @@ -464,7 +466,7 @@ lstat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; stat1(ip, uap->sb); @@ -520,7 +522,7 @@ readlink() } *uap = (struct a *)u.u_ap; int resid; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; if ((ip->i_mode&IFMT) != IFLNK) { @@ -533,6 +535,9 @@ out: u.u_r.r_val1 = uap->count - resid; } +/* + * Change mode of a file given path name. + */ chmod() { struct inode *ip; @@ -545,8 +550,12 @@ chmod() if ((ip = owner(1)) == NULL) return; chmod1(ip, uap->fmode); + iput(ip); } +/* + * Change mode of a file given a file descriptor. + */ fchmod() { struct a { @@ -565,14 +574,17 @@ fchmod() return; } ip = fp->f_inode; - ilock(ip); - if (u.u_uid != ip->i_uid && !suser()) { - iunlock(ip); + if (u.u_uid != ip->i_uid && !suser()) return; - } + ilock(ip); chmod1(ip, uap->fmode); + iunlock(ip); } +/* + * Change the mode on a file. + * Inode must be locked before calling. + */ chmod1(ip, mode) register struct inode *ip; register int mode; @@ -598,9 +610,11 @@ ok: ip->i_flag |= ICHG; if (ip->i_flag&ITEXT && (ip->i_mode&ISVTX)==0) xrele(ip); - iput(ip); } +/* + * Set ownership given a path name. + */ chown() { struct inode *ip; @@ -614,8 +628,12 @@ chown() if (!suser() || (ip = owner(0)) == NULL) return; chown1(ip, uap->uid, uap->gid); + iput(ip); } +/* + * Set ownership given a file descriptor. + */ fchown() { struct a { @@ -635,12 +653,11 @@ fchown() return; } ip = fp->f_inode; - ilock(ip); - if (!suser()) { - iunlock(ip); + if (!suser()) return; - } + ilock(ip); chown1(ip, uap->uid, uap->gid); + iunlock(ip); } /* @@ -678,8 +695,8 @@ chown1(ip, uid, gid) change = fragroundup(fs, ip->i_size); change /= DEV_BSIZE; } - chkdq(ip, -change, 1); - chkiq(ip->i_dev, ip, ip->i_uid, 1); + (void)chkdq(ip, -change, 1); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 1); dqrele(ip->i_dquot); #endif /* @@ -695,10 +712,9 @@ chown1(ip, uid, gid) ip->i_mode &= ~(ISUID|ISGID); #ifdef QUOTA ip->i_dquot = inoquota(ip); - chkdq(ip, change, 1); - chkiq(ip->i_dev, NULL, uid, 1); + (void)chkdq(ip, change, 1); + (void)chkiq(ip->i_dev, (struct inode *)NULL, uid, 1); #endif - iput(ip); } /* @@ -729,12 +745,18 @@ outime() iput(ip); } +/* + * Flush any pending I/O. + */ sync() { update(); } +/* + * Apply an advisory lock on a file descriptor. + */ flock() { struct a { @@ -753,35 +775,38 @@ flock() return; } cmd = uap->how; - flags = u.u_pofile[uap->fd] & (RDLOCK|WRLOCK); + flags = u.u_pofile[uap->fd] & (SHLOCK|EXLOCK); if (cmd&FUNLOCK) { if (flags == 0) { u.u_error = EINVAL; return; } funlocki(fp->f_inode, flags); - u.u_pofile[uap->fd] &= ~(RDLOCK|WRLOCK); + u.u_pofile[uap->fd] &= ~(SHLOCK|EXLOCK); return; } /* * No reason to write lock a file we've already * write locked, similarly with a read lock. */ - if ((flags&WRLOCK) && (cmd&FWRLOCK) || - (flags&RDLOCK) && (cmd&FRDLOCK)) + if ((flags&EXLOCK) && (cmd&FEXLOCK) || + (flags&SHLOCK) && (cmd&FSHLOCK)) return; u.u_pofile[uap->fd] = flocki(fp->f_inode, u.u_pofile[uap->fd], cmd); } +/* + * Truncate a file given its path name. + */ truncate() { struct a { char *fname; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; if (access(ip, IWRITE)) @@ -791,16 +816,18 @@ truncate() goto bad; } itrunc(ip, uap->length); - return; bad: iput(ip); } +/* + * Truncate a file given a file descriptor. + */ ftruncate() { struct a { int fd; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; struct file *fp; @@ -819,17 +846,267 @@ ftruncate() ip = fp->f_inode; ilock(ip); itrunc(ip, uap->length); + iunlock(ip); +} + +/* + * Synch an open file. + */ +fsync() +{ + struct a { + int fd; + } *uap = (struct a *)u.u_ap; + struct inode *ip; + struct file *fp; + + fp = getf(uap->fd); + if (fp == NULL) + return; + if (fp->f_type == DTYPE_SOCKET) { + u.u_error = EINVAL; + return; + } + ip = fp->f_inode; + ilock(ip); + syncip(ip); + iunlock(ip); } +/* + * Rename system call. + * rename("foo", "bar"); + * is essentially + * unlink("bar"); + * link("foo", "bar"); + * unlink("foo"); + * but ``atomically''. Can't do full commit without saving state in the + * inode on disk which isn't feasible at this time. Best we can do is + * always guarantee the target exists. + * + * Basic algorithm is: + * + * 1) Bump link count on source while we're linking it to the + * target. This also insure the inode won't be deleted out + * from underneath us while we work. + * 2) Link source to destination. If destination already exists, + * delete it first. + * 3) Unlink source reference to inode if still around. + * 4) If a directory was moved and the parent of the destination + * is different from the source, patch the ".." entry in the + * directory. + * + * Source and destination must either both be directories, or both + * not be directories. If target is a directory, it must be empty. + */ rename() { -#ifdef notdef struct a { char *from; char *to; } *uap; -#endif + register struct inode *ip, *xp, *dp; + int oldparent, parentdifferent, doingdirectory; + + uap = (struct a *)u.u_ap; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) + return; + dp = u.u_pdir; + oldparent = 0, doingdirectory = 0; + if ((ip->i_mode&IFMT) == IFDIR) { + register struct direct *d; + + d = &u.u_dent; + /* + * Avoid "." and ".." for obvious reasons. + */ + if (d->d_name[0] == '.') { + if (d->d_namlen == 1 || + (d->d_namlen == 2 && d->d_name[1] == '.')) { + u.u_error = EINVAL; + iput(ip); + return; + } + } + oldparent = dp->i_number; + doingdirectory++; + } + irele(dp); + + /* + * 1) Bump link count while we're moving stuff + * around. If we crash somewhere before + * completing our work, the link count + * may be wrong, but correctable. + */ + ip->i_nlink++; + ip->i_flag |= ICHG; + iupdat(ip, &time, &time, 1); + iunlock(ip); + + /* + * When the target exists, both the directory + * and target inodes are returned locked. + */ + u.u_dirp = (caddr_t)uap->to; + xp = namei(uchar, CREATE | LOCKPARENT, 0); + if (u.u_error) + goto out; + dp = u.u_pdir; + /* + * 2) If target doesn't exist, link the target + * to the source and unlink the source. + * Otherwise, rewrite the target directory + * entry to reference the source inode and + * expunge the original entry's existence. + */ + parentdifferent = oldparent != dp->i_number; + if (xp == NULL) { + if (dp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Account for ".." in directory. + * When source and destination have the + * same parent we don't fool with the + * link count -- this isn't required + * because we do a similar check below. + */ + if (doingdirectory && parentdifferent) { + dp->i_nlink++; + dp->i_flag |= ICHG; + iupdat(dp, &time, &time, 1); + } + direnter(ip); + if (u.u_error) + goto out; + } else { + if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Target must be empty if a directory. + * Also, insure source and target are + * compatible (both directories, or both + * not directories). + */ + if ((xp->i_mode&IFMT) == IFDIR) { + if (!dirempty(xp)) { + u.u_error = EEXIST; /* XXX */ + goto bad; + } + if (!doingdirectory) { + u.u_error = ENOTDIR; + goto bad; + } + } else if (doingdirectory) { + u.u_error = EISDIR; + goto bad; + } + dirrewrite(dp, ip); + if (u.u_error) + goto bad1; + /* + * If this is a directory we know it is + * empty and we can squash the inode and + * any space associated with it. Otherwise, + * we've got a plain file and the link count + * simply needs to be adjusted. + */ + if (doingdirectory) { + xp->i_nlink = 0; + itrunc(xp, (u_long)0); + } else + xp->i_nlink--; + xp->i_flag |= ICHG; + iput(xp); + } + + /* + * 3) Unlink the source. + */ + u.u_dirp = uap->from; + dp = namei(uchar, DELETE, 0); + /* + * Insure directory entry still exists and + * has not changed since the start of all + * this. If either has occured, forget about + * about deleting the original entry and just + * adjust the link count in the inode. + */ + if (dp == NULL || u.u_dent.d_ino != ip->i_number) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } else { + /* + * If source is a directory, must adjust + * link count of parent directory also. + * If target didn't exist and source and + * target have the same parent, then we + * needn't touch the link count, it all + * balances out in the end. Otherwise, we + * must do so to reflect deletion of ".." + * done above. + */ + if (doingdirectory && (xp != NULL || parentdifferent)) { + dp->i_nlink--; + dp->i_flag |= ICHG; + } + if (dirremove()) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } + } + irele(ip); + if (dp) + iput(dp); + + /* + * 4) Renaming a directory with the parent + * different requires ".." to be rewritten. + * The window is still there for ".." to + * be inconsistent, but this is unavoidable, + * and a lot shorter than when it was done + * in a user process. + */ + if (doingdirectory && parentdifferent && u.u_error == 0) { + struct dirtemplate dirbuf; + u.u_dirp = uap->to; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) { + printf("rename: .. went away\n"); + return; + } + dp = u.u_pdir; + if ((ip->i_mode&IFMT) != IFDIR) { + printf("rename: .. not a directory\n"); + goto stuck; + } + u.u_error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + if (u.u_error == 0) { + dirbuf.dotdot_ino = dp->i_number; + (void) rdwri(UIO_WRITE, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + } +stuck: + irele(dp); + iput(ip); + } + return; +bad: + iput(u.u_pdir); +bad1: + if (xp) + irele(xp); +out: + ip->i_nlink--; + ip->i_flag |= ICHG; + irele(ip); } /* diff --git a/usr/src/sys/ufs/ffs/ufs_inode.c b/usr/src/sys/ufs/ffs/ufs_inode.c index 32c0ade5d3..acff80669f 100644 --- a/usr/src/sys/ufs/ffs/ufs_inode.c +++ b/usr/src/sys/ufs/ffs/ufs_inode.c @@ -1,4 +1,4 @@ -/* ufs_inode.c 4.30 82/10/23 */ +/* ufs_inode.c 4.31 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -236,14 +236,14 @@ irele(ip) if (ip->i_count == 1) { ip->i_flag |= ILOCKED; if (ip->i_nlink <= 0) { - itrunc(ip, 0); + itrunc(ip, (u_long)0); mode = ip->i_mode; ip->i_mode = 0; ip->i_rdev = 0; ip->i_flag |= IUPD|ICHG; ifree(ip, ip->i_number, mode); #ifdef QUOTA - chkiq(ip->i_dev, ip, ip->i_uid, 0); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 0); dqrele(ip->i_dquot); ip->i_dquot = NODQUOT; #endif @@ -325,146 +325,253 @@ iupdat(ip, ta, tm, waitfor) */ itrunc(ip, length) register struct inode *ip; - register int length; + u_long length; { register i; - daddr_t bn; - struct inode itmp; + register daddr_t lastblock; + daddr_t bn, lastdiblock, lastsiblock; register struct fs *fs; + int j; #ifdef QUOTA - register long cnt = 0; - long tloop(); + long blocksreleased = 0, nblocks; + long indirtrunc(); #endif - /* - * Only plain files, directories and symbolic - * links contain blocks. - */ - i = ip->i_mode & IFMT; - if (i != IFREG && i != IFDIR && i != IFLNK) - return; + if (ip->i_size <= length) return; - +#ifdef notdef + /* this is superfluous given size check above */ + i = ip->i_mode & IFMT; + if (i != IFREG && i != IFDIR && i != IFLNK) { + printf("itrunc: i# %d, size %d\n", ip->i_number, ip->i_size); + return; + } +#endif /* - * Clean inode on disk before freeing blocks - * to insure no duplicates if system crashes. + * Update size of file on disk before + * we start freeing blocks. If we crash + * while free'ing blocks below, the file + * size will be believed and the blocks + * returned to the free list. + * After updating the copy on disk we + * put the old size back so macros like + * blksize will work. */ - itmp = *ip; - itmp.i_size = length; - for (i = 0; i < NDADDR; i++) - itmp.i_db[i] = 0; - for (i = 0; i < NIADDR; i++) - itmp.i_ib[i] = 0; - itmp.i_flag |= ICHG|IUPD; - iupdat(&itmp, &time, &time, 1); - ip->i_flag &= ~(IUPD|IACC|ICHG); + j = ip->i_size; + ip->i_size = length; + ip->i_flag |= ICHG|IUPD; + iupdat(ip, &time, &time, 1); + ip->i_size = j; /* - * Now return blocks to free list... if machine - * crashes, they will be harmless MISSING blocks. + * Calculate last direct, single indirect and + * double indirect block (if any) which we want + * to keep. Lastblock is -1 when the file is + * truncated to 0. */ fs = ip->i_fs; + lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; + lastsiblock = lastblock - NDADDR; + lastdiblock = lastsiblock - NINDIR(fs); +#ifdef QUOTA + nblocks = fs->fs_bsize / DEV_BSIZE; +#endif /* - * release double indirect block first + * Double indirect block first */ - bn = ip->i_ib[NIADDR-1]; - if (bn != (daddr_t)0) { - ip->i_ib[NIADDR - 1] = (daddr_t)0; + bn = ip->i_ib[NIADDR - 1]; + if (bn != 0) { + /* + * If lastdiblock is negative, it's value + * is meaningless; in this case we set it to + * -NINDIR(fs) so calculations performed in + * indirtrunc come out right. + */ + if (lastdiblock < 0) + lastdiblock -= lastsiblock; #ifdef QUOTA - cnt += + blocksreleased += #endif - tloop(ip, bn, 1); + indirtrunc(ip, bn, lastdiblock, 1); + if (lastdiblock < 0) { + ip->i_ib[NIADDR - 1] = 0; + free(ip, bn, (off_t)fs->fs_bsize); +#ifdef QUOTA + blocksreleased += nblocks; +#endif + } } + if (lastdiblock >= 0) + goto done; /* - * release single indirect blocks second + * Single indirect blocks second. + * First, those which can be totally + * zapped, then possibly one which + * needs to be partially cleared. */ - for (i = NIADDR - 2; i >= 0; i--) { + j = lastsiblock < 0 ? -1 : lastsiblock / NINDIR(fs); + for (i = NIADDR - 2; i > j; i--) { bn = ip->i_ib[i]; - if (bn != (daddr_t)0) { - ip->i_ib[i] = (daddr_t)0; + if (bn != 0) { #ifdef QUOTA - cnt += + blocksreleased += nblocks + #endif - tloop(ip, bn, 0); + indirtrunc(ip, bn, (daddr_t)-1, 0); + ip->i_ib[i] = 0; + free(ip, bn, (off_t)fs->fs_bsize); } } + if (lastsiblock >= 0) { + bn = ip->i_ib[j]; + if (bn != 0) +#ifdef QUOTA + blocksreleased += +#endif + indirtrunc(ip, bn, lastsiblock, 0); + goto done; + } /* - * finally release direct blocks + * All whole direct blocks. */ - for (i = NDADDR - 1; i>=0; i--) { + for (i = NDADDR - 1; i > lastblock; i--) { + register int size; + bn = ip->i_db[i]; - if (bn == (daddr_t)0) + if (bn == 0) continue; - ip->i_db[i] = (daddr_t)0; -#ifndef QUOTA - fre(ip, bn, (off_t)blksize(fs, ip, i)); -#else - { int size; - fre(ip, bn, size = (off_t)blksize(fs, ip, i)); - cnt += size / DEV_BSIZE; - } + ip->i_db[i] = 0; + size = (off_t)blksize(fs, ip, i); + free(ip, bn, size); +#ifdef QUOTA + blocksreleased += size / DEV_BSIZE; +#endif + } + /* + * Finally, look for a change in size of the + * last direct block; release any frags. + */ + if (lastblock >= 0 && ip->i_db[lastblock] != 0) { + /* + * Calculate amount of space we're giving + * back as old block size minus new block size. + */ + i = blksize(fs, ip, lastblock); + ip->i_size = length; + i = i - blksize(fs, ip, lastblock); + if (i > 0) { + /* + * Block number of space to be free'd is + * the old block # plus the number of frags + * required for the storage we're keeping. + */ + bn = ip->i_db[lastblock] + + numfrags(fs, fs->fs_bsize - i); + free(ip, bn, i); +#ifdef QUOTA + blocksreleased += i / DEV_BSIZE; #endif + } } - ip->i_size = 0; +done: /* - * Inode was written and flags updated above. - * No need to modify flags here. + * Finished free'ing blocks, complete + * inode update to reflect new length. */ #ifdef QUOTA - (void) chkdq(ip, -cnt, 0); + (void) chkdq(ip, -blocksreleased, 0); #endif + ip->i_size = length; + ip->i_flag |= ICHG|IUPD; + iupdat(ip, &time, &time, 1); } +/* + * Release blocks associated with the inode ip and + * stored in the indirect block bn. Blocks are free'd + * in LIFO order up to (but not including) lastbn. If + * doubleindirect is indicated, this block is a double + * indirect block and recursive calls to indirtrunc must + * be used to cleanse single indirect blocks instead of + * a simple free. + */ #ifdef QUOTA long #endif -tloop(ip, bn, indflg) +indirtrunc(ip, bn, lastbn, doubleindirect) register struct inode *ip; - daddr_t bn; - int indflg; + daddr_t bn, lastbn; + int doubleindirect; { - register i; - register struct buf *bp; + register int i; + struct buf *bp; register daddr_t *bap; register struct fs *fs; - daddr_t nb; + daddr_t nb, last; #ifdef QUOTA - register long cnt = 0; + int blocksreleased = 0, nblocks; #endif bp = NULL; fs = ip->i_fs; - for (i = NINDIR(fs) - 1; i >= 0; i--) { + last = lastbn; + if (doubleindirect) + last /= NINDIR(fs); +#ifdef QUOTA + nblocks = fs->fs_bsize / DEV_BSIZE; +#endif + for (i = NINDIR(fs) - 1; i > last; i--) { if (bp == NULL) { + struct buf *copy; + + copy = geteblk((int)fs->fs_bsize); bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); - if (bp->b_flags & B_ERROR) { + if (bp->b_flags&B_ERROR) { + brelse(copy); brelse(bp); - return; + return (NULL); } bap = bp->b_un.b_daddr; + /* + * Update pointers before freeing blocks. + * If we crash before freeing the blocks + * they'll be recovered as missing. + */ + bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, + (u_int)fs->fs_bsize); + bzero((caddr_t)&bap[last + 1], + (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); + bwrite(bp); + bp = copy, bap = bp->b_un.b_daddr; } nb = bap[i]; - if (nb == (daddr_t)0) + if (nb == 0) continue; - if (indflg) { + if (doubleindirect) #ifdef QUOTA - cnt += + blocksreleased += #endif - tloop(ip, nb, 0); - } else { - fre(ip, nb, (int)fs->fs_bsize); + indirtrunc(ip, nb, (daddr_t)-1, 0); + free(ip, nb, (int)fs->fs_bsize); #ifdef QUOTA - cnt += fs->fs_bsize / DEV_BSIZE; + blocksreleased += nblocks; #endif - } + } + if (doubleindirect && lastbn >= 0) { + last = lastbn % NINDIR(fs); + if (bp == NULL) + panic("indirtrunc"); + nb = bap[i]; + if (nb != 0) +#ifdef QUOTA + blocksreleased += +#endif + indirtrunc(ip, nb, last, 0); } if (bp != NULL) brelse(bp); - fre(ip, bn, (int)fs->fs_bsize); #ifdef QUOTA - cnt += fs->fs_bsize / DEV_BSIZE; - return(cnt); + return (blocksreleased); #endif } diff --git a/usr/src/sys/ufs/ffs/ufs_lookup.c b/usr/src/sys/ufs/ffs/ufs_lookup.c index 57eaf17136..0403df6fb3 100644 --- a/usr/src/sys/ufs/ffs/ufs_lookup.c +++ b/usr/src/sys/ufs/ffs/ufs_lookup.c @@ -1,4 +1,4 @@ -/* ufs_lookup.c 4.29 82/10/31 */ +/* ufs_lookup.c 4.30 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -10,20 +10,30 @@ #include "../h/buf.h" #include "../h/conf.h" #include "../h/uio.h" +#include "../h/nami.h" struct buf *blkatoff(); -int dirchk = 1; +int dirchk = 0; /* * Convert a pathname into a pointer to a locked inode, * with side effects usable in creating and removing files. * This is a very central and rather complicated routine. * * The func argument gives the routine which returns successive - * characters of the name to be translated. The flag - * argument is (0, 1, 2) depending on whether the name is to be - * (looked up, created, deleted). The follow argument is 1 when - * symbolic links are to be followed when they occur at the end of - * the name translation process. + * characters of the name to be translated. + * + * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether + * the name is to be (looked up, created, deleted). If flag has + * LOCKPARENT or'ed into it and the target of the pathname exists, + * namei returns both the target and its parent directory locked. + * If the file system is not maintained in a strict tree hierarchy, + * this can result in a deadlock situation. When creating and + * LOCKPARENT is specified, the target may not be ".". When deleting + * and LOCKPARENT is specified, the target may be ".", but the caller + * must check to insure it does an irele and iput instead of two iputs. + * + * The follow argument is 1 when symbolic links are to be followed + * when they occur at the end of the name translation process. * * Overall outline: * @@ -36,14 +46,19 @@ int dirchk = 1; * handle degenerate case where name is null string * search for name in directory, to found or notfound * notfound: - * if creating, return locked inode, leaving information on avail. slots + * if creating, return locked directory, leaving info on avail. slots * else return error * found: * if at end of path and deleting, return information to allow delete + * if at end of path and rewriting (create and LOCKPARENT), lock targe + * inode and return info to allow rewrite * if .. and on mounted filesys, look in mount table for parent * if symbolic link, massage name in buffer and continue at dirloop * if more components of name, do next level at dirloop * return the answer as locked inode + * + * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, + * but unlocked. */ struct inode * namei(func, flag, follow) @@ -69,7 +84,10 @@ namei(func, flag, follow) int nlink = 0; /* number of symbolic links taken */ struct inode *pdp; /* saved dp during symlink work */ int i; + int lockparent; + lockparent = flag & LOCKPARENT; + flag &= ~LOCKPARENT; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. @@ -156,7 +174,7 @@ dirloop2: * case it doesn't already exist. */ slotstatus = FOUND; - if (flag == 1 && *cp == 0) { + if (flag == CREATE && *cp == 0) { slotstatus = NONE; slotfreespace = 0; slotneeded = DIRSIZ(&u.u_dent); @@ -258,10 +276,10 @@ dirloop2: /* notfound: */ /* * If creating, and at end of pathname and current - * directory has not been removed, then can consider allowing - * file to be created. + * directory has not been removed, then can consider + * allowing file to be created. */ - if (flag == 1 && *cp == 0 && dp->i_nlink != 0) { + if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. @@ -321,15 +339,17 @@ found: /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. - * Note that in this case we return the directory - * inode, not the inode of the file being deleted. + * If the lockparent flag isn't set, we return only + * the directory (in u.u_pdir), otherwise we go + * on and lock the inode, being careful with ".". */ - if (flag == 2 && *cp == 0) { + if (flag == DELETE && *cp == 0) { /* * Write access to directory required to delete files. */ if (access(dp, IWRITE)) goto bad; + u.u_pdir = dp; /* for dirremove() */ /* * Return pointer to current entry in u.u_offset, * and distance past previous entry (if there @@ -340,8 +360,18 @@ found: u.u_count = 0; else u.u_count = u.u_offset - prevoff; + if (lockparent) { + if (dp->i_number == u.u_dent.d_ino) + dp->i_count++; + else { + dp = iget(dp->i_dev, fs, u.u_dent.d_ino); + if (dp == NULL) { + iput(u.u_pdir); + goto bad; + } + } + } brelse(nbp); - u.u_pdir = dp; /* for dirremove() */ return (dp); } @@ -370,6 +400,33 @@ found: } } + /* + * If rewriting (rename), return the inode and the + * information required to rewrite the present directory + * Must get inode of directory entry to verify it's a + * regular file, or empty directory. + */ + if ((flag == CREATE && lockparent) && *cp == 0) { + if (access(dp, IWRITE)) + goto bad; + u.u_pdir = dp; /* for dirrewrite() */ + /* + * Careful about locking second inode. + * This can only occur if the target is ".". + */ + if (dp->i_number == u.u_dent.d_ino) { + u.u_error = EISDIR; /* XXX */ + goto bad; + } + dp = iget(dp->i_dev, fs, u.u_dent.d_ino); + if (dp == NULL) { + iput(u.u_pdir); + goto bad; + } + brelse(nbp); + return (dp); + } + /* * Check for symbolic link, which may require us * to massage the name before we continue translation. @@ -398,7 +455,7 @@ found: } ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen); u.u_error = - rdwri(UIO_READ, dp, nbp->b_un.b_addr, dp->i_size, + rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size, 0, 1, (int *)0); if (u.u_error) goto bad2; @@ -419,7 +476,6 @@ found: fs = dp->i_fs; goto dirloop; } - irele(pdp); /* * Not a symbolic link. If more pathname, @@ -428,9 +484,14 @@ found: if (*cp == '/') { while (*cp == '/') cp++; + irele(pdp); goto dirloop; } brelse(nbp); + if (lockparent) + u.u_pdir = pdp; + else + irele(pdp); return (dp); bad2: irele(pdp); @@ -514,7 +575,7 @@ direnter(ip) * This should never push the size past a new multiple of * DIRBLKSIZE. */ - if (u.u_offset+u.u_count > u.u_pdir->i_size) + if (u.u_offset + u.u_count > u.u_pdir->i_size) u.u_pdir->i_size = u.u_offset + u.u_count; /* @@ -522,8 +583,10 @@ direnter(ip) * entry. */ bp = blkatoff(u.u_pdir, u.u_offset, (char **)&dirbuf); - if (bp == 0) + if (bp == 0) { + iput(u.u_pdir); return; + } /* * Find space for the new entry. In the simple case, the @@ -570,20 +633,31 @@ direnter(ip) iput(u.u_pdir); } +/* + * Remove a directory entry after a call to namei, using the + * parameters which it left in the u. area. The u. entry + * u_offset contains the offset into the directory of the + * entry to be eliminated. The u_count field contains the + * size of the previous record in the directory. If this + * is 0, the first entry is being deleted, so we need only + * zero the inode number to mark the entry as free. If the + * entry isn't the first in the directory, we must reclaim + * the space of the now empty record by adding the record size + * to the size of the previous entry. + */ dirremove() { register struct inode *dp = u.u_pdir; register struct buf *bp; struct direct *ep; - if (u.u_count == 0) { + if (u.u_count == 0) /* * First entry in block: set d_ino to zero. */ - u.u_dent.d_ino = 0; (void) rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent, (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0); - } else { + else { /* * Collapse new free space into previous entry. */ @@ -597,6 +671,21 @@ dirremove() return (1); } +/* + * Rewrite an existing directory entry to point at the inode + * supplied. The parameters describing the directory entry are + * set up by a call to namei. + */ +dirrewrite(dp, ip) + struct inode *dp, *ip; +{ + + u.u_dent.d_ino = ip->i_number; + u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent, + (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0); + iput(dp); +} + /* * Return buffer with contents of block "offset" * from the beginning of directory "ip". If "res" @@ -627,3 +716,32 @@ blkatoff(ip, offset, res) *res = bp->b_un.b_addr + base; return (bp); } + +/* + * Check if a directory is empty or not. + * Inode supplied must be locked. + */ +dirempty(ip) + struct inode *ip; +{ + register off_t off; + struct direct dbuf; + register struct direct *dp = &dbuf; + int error; + + for (off = 0; off < ip->i_size; off += dp->d_reclen) { + error = rdwri(UIO_READ, ip, (caddr_t)dp, + sizeof (struct direct), off, 1, (int *)0); + if (error) + return (0); + if (dp->d_ino == 0) + continue; + if (dp->d_name[0] != '.') + return (0); + if (dp->d_namlen == 1 || + (dp->d_namlen == 2 && dp->d_name[1] == '.')) + continue; + return (0); + } + return (1); +} diff --git a/usr/src/sys/ufs/ffs/ufs_vnops.c b/usr/src/sys/ufs/ffs/ufs_vnops.c index bfce70338a..a81a284f0d 100644 --- a/usr/src/sys/ufs/ffs/ufs_vnops.c +++ b/usr/src/sys/ufs/ffs/ufs_vnops.c @@ -1,4 +1,4 @@ -/* ufs_vnops.c 4.41 82/10/19 */ +/* ufs_vnops.c 4.42 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -16,13 +16,20 @@ #include "../h/uio.h" #include "../h/socket.h" #include "../h/socketvar.h" +#include "../h/nami.h" +/* + * Change current working directory (``.''). + */ chdir() { chdirec(&u.u_cdir); } +/* + * Change notion of root (``/'') directory. + */ chroot() { @@ -30,6 +37,9 @@ chroot() chdirec(&u.u_rdir); } +/* + * Common routine for chroot and chdir. + */ chdirec(ipp) register struct inode **ipp; { @@ -38,14 +48,14 @@ chdirec(ipp) char *fname; }; - ip = namei(uchar, 0, 1); - if(ip == NULL) + ip = namei(uchar, LOOKUP, 1); + if (ip == NULL) return; - if((ip->i_mode&IFMT) != IFDIR) { + if ((ip->i_mode&IFMT) != IFDIR) { u.u_error = ENOTDIR; goto bad; } - if(access(ip, IEXEC)) + if (access(ip, IEXEC)) goto bad; iunlock(ip); if (*ipp) @@ -68,23 +78,28 @@ open() int flags; int mode; } *uap; - int checkpermissions = 1; + int checkpermissions = 1, flags; uap = (struct a *)u.u_ap; - if (uap->flags&FCREATE) { - ip = namei(uchar, 1, 1); + flags = uap->flags + 1; + if ((flags&FTRUNCATE) && (flags&FWRITE) == 0) { + u.u_error = EINVAL; + return; + } + if (flags&FCREATE) { + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; ip = maknode(uap->mode&07777&(~ISVTX)); checkpermissions = 0; - uap->flags &= ~FTRUNCATE; + flags &= ~FTRUNCATE; } } else - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; - open1(ip, ++uap->flags, checkpermissions); + open1(ip, flags, checkpermissions); } #ifndef NOCOMPAT @@ -100,7 +115,7 @@ ocreat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 1, 1); + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; @@ -109,7 +124,7 @@ ocreat() return; open1(ip, FWRITE, 0); } else - open1(ip, FWRITE|FTRUNCATE, 0); + open1(ip, FWRITE|FTRUNCATE, 1); } #endif @@ -145,7 +160,7 @@ open1(ip, mode, checkpermissions) * while doing so in case we block inside flocki. */ flags = 0; - if (mode&(FRDLOCK|FWRLOCK)) { + if (mode&(FSHLOCK|FEXLOCK)) { iunlock(ip); flags = flocki(ip, 0, mode); ilock(ip); @@ -153,7 +168,7 @@ open1(ip, mode, checkpermissions) goto bad; } if (mode&FTRUNCATE) - itrunc(ip, 0); + itrunc(ip, (u_long)0); iunlock(ip); if ((fp = falloc()) == NULL) goto out; @@ -189,7 +204,7 @@ mknod() uap = (struct a *)u.u_ap; if (suser()) { - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip != NULL) { u.u_error = EEXIST; goto out; @@ -225,10 +240,10 @@ link() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); /* well, this routine is doomed anyhow */ + ip = namei(uchar, LOOKUP, 1); /* well, this routine is doomed anyhow */ if (ip == NULL) return; - if ((ip->i_mode&IFMT)==IFDIR && !suser()) { + if ((ip->i_mode&IFMT) == IFDIR && !suser()) { iput(ip); return; } @@ -237,7 +252,7 @@ link() iupdat(ip, &time, &time, 1); iunlock(ip); u.u_dirp = (caddr_t)uap->linkname; - xp = namei(uchar, 1, 0); + xp = namei(uchar, CREATE, 0); if (xp != NULL) { u.u_error = EEXIST; iput(xp); @@ -284,7 +299,7 @@ symlink() nc++; } u.u_dirp = uap->linkname; - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip) { iput(ip); u.u_error = EEXIST; @@ -296,6 +311,7 @@ symlink() if (ip == NULL) return; u.u_error = rdwri(UIO_WRITE, ip, uap->target, nc, 0, 0, (int *)0); + /* handle u.u_error != 0 */ iput(ip); } @@ -306,34 +322,21 @@ symlink() */ unlink() { - register struct inode *ip, *pp; struct a { char *fname; }; - int unlinkingdot = 0; + register struct inode *ip, *dp; - pp = namei(uchar, 2, 0); - if (pp == NULL) + ip = namei(uchar, DELETE | LOCKPARENT, 0); + if (ip == NULL) return; - - /* - * Check for unlink(".") - * to avoid hanging on the iget - */ - if (pp->i_number == u.u_dent.d_ino) { - ip = pp; - ip->i_count++; - unlinkingdot++; - } else - ip = iget(pp->i_dev, pp->i_fs, u.u_dent.d_ino); - if(ip == NULL) - goto out1; - if((ip->i_mode&IFMT)==IFDIR && !suser()) + dp = u.u_pdir; + if ((ip->i_mode&IFMT) == IFDIR && !suser()) goto out; /* * Don't unlink a mounted file. */ - if (ip->i_dev != pp->i_dev) { + if (ip->i_dev != dp->i_dev) { u.u_error = EBUSY; goto out; } @@ -344,12 +347,11 @@ unlink() ip->i_flag |= ICHG; } out: - if (unlinkingdot) + if (dp == ip) irele(ip); else iput(ip); -out1: - iput(pp); + iput(dp); } /* @@ -397,13 +399,13 @@ saccess() svgid = u.u_gid; u.u_uid = u.u_ruid; u.u_gid = u.u_rgid; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip != NULL) { - if (uap->fmode&FACCESS_READ && access(ip, IREAD)) + if ((uap->fmode&FACCESS_READ) && access(ip, IREAD)) goto done; - if (uap->fmode&FACCESS_WRITE && access(ip, IWRITE)) + if ((uap->fmode&FACCESS_WRITE) && access(ip, IWRITE)) goto done; - if (uap->fmode&FACCESS_EXECUTE && access(ip, IEXEC)) + if ((uap->fmode&FACCESS_EXECUTE) && access(ip, IEXEC)) goto done; done: iput(ip); @@ -445,7 +447,7 @@ stat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; stat1(ip, uap->sb); @@ -464,7 +466,7 @@ lstat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; stat1(ip, uap->sb); @@ -520,7 +522,7 @@ readlink() } *uap = (struct a *)u.u_ap; int resid; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; if ((ip->i_mode&IFMT) != IFLNK) { @@ -533,6 +535,9 @@ out: u.u_r.r_val1 = uap->count - resid; } +/* + * Change mode of a file given path name. + */ chmod() { struct inode *ip; @@ -545,8 +550,12 @@ chmod() if ((ip = owner(1)) == NULL) return; chmod1(ip, uap->fmode); + iput(ip); } +/* + * Change mode of a file given a file descriptor. + */ fchmod() { struct a { @@ -565,14 +574,17 @@ fchmod() return; } ip = fp->f_inode; - ilock(ip); - if (u.u_uid != ip->i_uid && !suser()) { - iunlock(ip); + if (u.u_uid != ip->i_uid && !suser()) return; - } + ilock(ip); chmod1(ip, uap->fmode); + iunlock(ip); } +/* + * Change the mode on a file. + * Inode must be locked before calling. + */ chmod1(ip, mode) register struct inode *ip; register int mode; @@ -598,9 +610,11 @@ ok: ip->i_flag |= ICHG; if (ip->i_flag&ITEXT && (ip->i_mode&ISVTX)==0) xrele(ip); - iput(ip); } +/* + * Set ownership given a path name. + */ chown() { struct inode *ip; @@ -614,8 +628,12 @@ chown() if (!suser() || (ip = owner(0)) == NULL) return; chown1(ip, uap->uid, uap->gid); + iput(ip); } +/* + * Set ownership given a file descriptor. + */ fchown() { struct a { @@ -635,12 +653,11 @@ fchown() return; } ip = fp->f_inode; - ilock(ip); - if (!suser()) { - iunlock(ip); + if (!suser()) return; - } + ilock(ip); chown1(ip, uap->uid, uap->gid); + iunlock(ip); } /* @@ -678,8 +695,8 @@ chown1(ip, uid, gid) change = fragroundup(fs, ip->i_size); change /= DEV_BSIZE; } - chkdq(ip, -change, 1); - chkiq(ip->i_dev, ip, ip->i_uid, 1); + (void)chkdq(ip, -change, 1); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 1); dqrele(ip->i_dquot); #endif /* @@ -695,10 +712,9 @@ chown1(ip, uid, gid) ip->i_mode &= ~(ISUID|ISGID); #ifdef QUOTA ip->i_dquot = inoquota(ip); - chkdq(ip, change, 1); - chkiq(ip->i_dev, NULL, uid, 1); + (void)chkdq(ip, change, 1); + (void)chkiq(ip->i_dev, (struct inode *)NULL, uid, 1); #endif - iput(ip); } /* @@ -729,12 +745,18 @@ outime() iput(ip); } +/* + * Flush any pending I/O. + */ sync() { update(); } +/* + * Apply an advisory lock on a file descriptor. + */ flock() { struct a { @@ -753,35 +775,38 @@ flock() return; } cmd = uap->how; - flags = u.u_pofile[uap->fd] & (RDLOCK|WRLOCK); + flags = u.u_pofile[uap->fd] & (SHLOCK|EXLOCK); if (cmd&FUNLOCK) { if (flags == 0) { u.u_error = EINVAL; return; } funlocki(fp->f_inode, flags); - u.u_pofile[uap->fd] &= ~(RDLOCK|WRLOCK); + u.u_pofile[uap->fd] &= ~(SHLOCK|EXLOCK); return; } /* * No reason to write lock a file we've already * write locked, similarly with a read lock. */ - if ((flags&WRLOCK) && (cmd&FWRLOCK) || - (flags&RDLOCK) && (cmd&FRDLOCK)) + if ((flags&EXLOCK) && (cmd&FEXLOCK) || + (flags&SHLOCK) && (cmd&FSHLOCK)) return; u.u_pofile[uap->fd] = flocki(fp->f_inode, u.u_pofile[uap->fd], cmd); } +/* + * Truncate a file given its path name. + */ truncate() { struct a { char *fname; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; if (access(ip, IWRITE)) @@ -791,16 +816,18 @@ truncate() goto bad; } itrunc(ip, uap->length); - return; bad: iput(ip); } +/* + * Truncate a file given a file descriptor. + */ ftruncate() { struct a { int fd; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; struct file *fp; @@ -819,17 +846,267 @@ ftruncate() ip = fp->f_inode; ilock(ip); itrunc(ip, uap->length); + iunlock(ip); +} + +/* + * Synch an open file. + */ +fsync() +{ + struct a { + int fd; + } *uap = (struct a *)u.u_ap; + struct inode *ip; + struct file *fp; + + fp = getf(uap->fd); + if (fp == NULL) + return; + if (fp->f_type == DTYPE_SOCKET) { + u.u_error = EINVAL; + return; + } + ip = fp->f_inode; + ilock(ip); + syncip(ip); + iunlock(ip); } +/* + * Rename system call. + * rename("foo", "bar"); + * is essentially + * unlink("bar"); + * link("foo", "bar"); + * unlink("foo"); + * but ``atomically''. Can't do full commit without saving state in the + * inode on disk which isn't feasible at this time. Best we can do is + * always guarantee the target exists. + * + * Basic algorithm is: + * + * 1) Bump link count on source while we're linking it to the + * target. This also insure the inode won't be deleted out + * from underneath us while we work. + * 2) Link source to destination. If destination already exists, + * delete it first. + * 3) Unlink source reference to inode if still around. + * 4) If a directory was moved and the parent of the destination + * is different from the source, patch the ".." entry in the + * directory. + * + * Source and destination must either both be directories, or both + * not be directories. If target is a directory, it must be empty. + */ rename() { -#ifdef notdef struct a { char *from; char *to; } *uap; -#endif + register struct inode *ip, *xp, *dp; + int oldparent, parentdifferent, doingdirectory; + + uap = (struct a *)u.u_ap; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) + return; + dp = u.u_pdir; + oldparent = 0, doingdirectory = 0; + if ((ip->i_mode&IFMT) == IFDIR) { + register struct direct *d; + + d = &u.u_dent; + /* + * Avoid "." and ".." for obvious reasons. + */ + if (d->d_name[0] == '.') { + if (d->d_namlen == 1 || + (d->d_namlen == 2 && d->d_name[1] == '.')) { + u.u_error = EINVAL; + iput(ip); + return; + } + } + oldparent = dp->i_number; + doingdirectory++; + } + irele(dp); + + /* + * 1) Bump link count while we're moving stuff + * around. If we crash somewhere before + * completing our work, the link count + * may be wrong, but correctable. + */ + ip->i_nlink++; + ip->i_flag |= ICHG; + iupdat(ip, &time, &time, 1); + iunlock(ip); + + /* + * When the target exists, both the directory + * and target inodes are returned locked. + */ + u.u_dirp = (caddr_t)uap->to; + xp = namei(uchar, CREATE | LOCKPARENT, 0); + if (u.u_error) + goto out; + dp = u.u_pdir; + /* + * 2) If target doesn't exist, link the target + * to the source and unlink the source. + * Otherwise, rewrite the target directory + * entry to reference the source inode and + * expunge the original entry's existence. + */ + parentdifferent = oldparent != dp->i_number; + if (xp == NULL) { + if (dp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Account for ".." in directory. + * When source and destination have the + * same parent we don't fool with the + * link count -- this isn't required + * because we do a similar check below. + */ + if (doingdirectory && parentdifferent) { + dp->i_nlink++; + dp->i_flag |= ICHG; + iupdat(dp, &time, &time, 1); + } + direnter(ip); + if (u.u_error) + goto out; + } else { + if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Target must be empty if a directory. + * Also, insure source and target are + * compatible (both directories, or both + * not directories). + */ + if ((xp->i_mode&IFMT) == IFDIR) { + if (!dirempty(xp)) { + u.u_error = EEXIST; /* XXX */ + goto bad; + } + if (!doingdirectory) { + u.u_error = ENOTDIR; + goto bad; + } + } else if (doingdirectory) { + u.u_error = EISDIR; + goto bad; + } + dirrewrite(dp, ip); + if (u.u_error) + goto bad1; + /* + * If this is a directory we know it is + * empty and we can squash the inode and + * any space associated with it. Otherwise, + * we've got a plain file and the link count + * simply needs to be adjusted. + */ + if (doingdirectory) { + xp->i_nlink = 0; + itrunc(xp, (u_long)0); + } else + xp->i_nlink--; + xp->i_flag |= ICHG; + iput(xp); + } + + /* + * 3) Unlink the source. + */ + u.u_dirp = uap->from; + dp = namei(uchar, DELETE, 0); + /* + * Insure directory entry still exists and + * has not changed since the start of all + * this. If either has occured, forget about + * about deleting the original entry and just + * adjust the link count in the inode. + */ + if (dp == NULL || u.u_dent.d_ino != ip->i_number) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } else { + /* + * If source is a directory, must adjust + * link count of parent directory also. + * If target didn't exist and source and + * target have the same parent, then we + * needn't touch the link count, it all + * balances out in the end. Otherwise, we + * must do so to reflect deletion of ".." + * done above. + */ + if (doingdirectory && (xp != NULL || parentdifferent)) { + dp->i_nlink--; + dp->i_flag |= ICHG; + } + if (dirremove()) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } + } + irele(ip); + if (dp) + iput(dp); + + /* + * 4) Renaming a directory with the parent + * different requires ".." to be rewritten. + * The window is still there for ".." to + * be inconsistent, but this is unavoidable, + * and a lot shorter than when it was done + * in a user process. + */ + if (doingdirectory && parentdifferent && u.u_error == 0) { + struct dirtemplate dirbuf; + u.u_dirp = uap->to; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) { + printf("rename: .. went away\n"); + return; + } + dp = u.u_pdir; + if ((ip->i_mode&IFMT) != IFDIR) { + printf("rename: .. not a directory\n"); + goto stuck; + } + u.u_error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + if (u.u_error == 0) { + dirbuf.dotdot_ino = dp->i_number; + (void) rdwri(UIO_WRITE, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + } +stuck: + irele(dp); + iput(ip); + } + return; +bad: + iput(u.u_pdir); +bad1: + if (xp) + irele(xp); +out: + ip->i_nlink--; + ip->i_flag |= ICHG; + irele(ip); } /* diff --git a/usr/src/sys/ufs/lfs/lfs_alloc.c b/usr/src/sys/ufs/lfs/lfs_alloc.c index 1866026e88..59f889a4e6 100644 --- a/usr/src/sys/ufs/lfs/lfs_alloc.c +++ b/usr/src/sys/ufs/lfs/lfs_alloc.c @@ -1,4 +1,4 @@ -/* lfs_alloc.c 2.18 82/10/21 */ +/* lfs_alloc.c 2.19 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -13,8 +13,8 @@ #include "../h/kernel.h" extern u_long hashalloc(); -extern u_long ialloccg(); -extern u_long alloccg(); +extern ino_t ialloccg(); +extern daddr_t alloccg(); extern daddr_t alloccgblk(); extern daddr_t fragextend(); extern daddr_t blkpref(); @@ -74,7 +74,8 @@ alloc(ip, bpref, size) cg = itog(fs, ip->i_number); else cg = dtog(fs, bpref); - bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, alloccg); + bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size, + (u_long (*)())alloccg); if (bno <= 0) goto nospace; bp = getblk(ip->i_dev, fsbtodb(fs, bno), size); @@ -137,12 +138,13 @@ realloccg(ip, bprev, bpref, osize, nsize) } } while (brealloc(bp, nsize) == 0); bp->b_flags |= B_DONE; - bzero(bp->b_un.b_addr + osize, nsize - osize); + bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize); return (bp); } if (bpref >= fs->fs_size) bpref = 0; - bno = (daddr_t)hashalloc(ip, cg, (long)bpref, nsize, alloccg); + bno = (daddr_t)hashalloc(ip, cg, (long)bpref, nsize, + (u_long (*)())alloccg); if (bno > 0) { obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize); if (obp->b_flags & B_ERROR) { @@ -151,9 +153,9 @@ realloccg(ip, bprev, bpref, osize, nsize) } bp = getblk(ip->i_dev, fsbtodb(fs, bno), nsize); bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize); - bzero(bp->b_un.b_addr + osize, nsize - osize); + bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize); brelse(obp); - fre(ip, bprev, (off_t)osize); + free(ip, bprev, (off_t)osize); return (bp); } nospace: @@ -196,7 +198,7 @@ ialloc(pip, ipref, mode) if (fs->fs_cstotal.cs_nifree == 0) goto noinodes; #ifdef QUOTA - if (chkiq(pip->i_dev, NULL, u.u_uid, 0)) + if (chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0)) return(NULL); #endif if (ipref >= fs->fs_ncg * fs->fs_ipg) @@ -230,6 +232,7 @@ noinodes: * among those cylinder groups with above the average number of * free inodes, the one with the smallest number of directories. */ +ino_t dirpref(fs) register struct fs *fs; { @@ -244,35 +247,85 @@ dirpref(fs) mincg = cg; minndir = fs->fs_cs(fs, cg).cs_ndir; } - return (fs->fs_ipg * mincg); + return ((ino_t)(fs->fs_ipg * mincg)); } /* - * Select a cylinder to place a large block of data. - * - * The policy implemented by this algorithm is to maintain a - * rotor that sweeps the cylinder groups. When a block is - * needed, the rotor is advanced until a cylinder group with - * greater than the average number of free blocks is found. + * Select the desired position for the next block in a file. The file is + * logically divided into sections. The first section is composed of the + * direct blocks. Each additional section contains fs_maxbpg blocks. + * + * If no blocks have been allocated in the first section, the policy is to + * request a block in the same cylinder group as the inode that describes + * the file. If no blocks have been allocated in any other section, the + * policy is to place the section in a cylinder group with a greater than + * average number of free blocks. An appropriate cylinder group is found + * by maintaining a rotor that sweeps the cylinder groups. When a new + * group of blocks is needed, the rotor is advanced until a cylinder group + * with greater than the average number of free blocks is found. + * + * If a section is already partially allocated, the policy is to + * contiguously allocate fs_maxcontig blocks. The end of one of these + * contiguous blocks and the beginning of the next is physically separated + * so that the disk head will be in transit between them for at least + * fs_rotdelay milliseconds. This is to allow time for the processor to + * schedule another I/O transfer. */ daddr_t -blkpref(fs) - register struct fs *fs; +blkpref(ip, lbn, indx, bap) + struct inode *ip; + daddr_t lbn; + int indx; + daddr_t *bap; { + register struct fs *fs; int cg, avgbfree; + daddr_t nextblk; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - for (cg = fs->fs_cgrotor + 1; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - for (cg = 0; cg <= fs->fs_cgrotor; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; + fs = ip->i_fs; + if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { + if (lbn < NDADDR) { + cg = itog(fs, ip->i_number); return (fs->fs_fpg * cg + fs->fs_frag); } - return (NULL); + /* + * Find a cylinder with greater than average number of + * unused data blocks. + */ + avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; + for (cg = fs->fs_cgrotor + 1; cg < fs->fs_ncg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + for (cg = 0; cg <= fs->fs_cgrotor; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + return (NULL); + } + /* + * One or more previous blocks have been laid out. If less + * than fs_maxcontig previous blocks are contiguous, the + * next block is requested contiguously, otherwise it is + * requested rotationally delayed by fs_rotdelay milliseconds. + */ + nextblk = bap[indx - 1] + fs->fs_frag; + if (indx > fs->fs_maxcontig && + bap[indx - fs->fs_maxcontig] + fs->fs_frag * fs->fs_maxcontig + != nextblk) + return (nextblk); + if (fs->fs_rotdelay != 0) + /* + * Here we convert ms of delay to frags as: + * (frags) = (ms) * (rev/sec) * (sect/rev) / + * ((sect/frag) * (ms/sec)) + * then round up to the next block. + */ + nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / + (NSPF(fs) * 1000), fs->fs_frag); + return (nextblk); } /* @@ -400,7 +453,7 @@ fragextend(ip, cg, bprev, osize, nsize) * Check to see if a block of the apprpriate size is available, * and if it is, allocate it. */ -u_long +daddr_t alloccg(ip, cg, bpref, size) struct inode *ip; int cg; @@ -505,14 +558,10 @@ alloccgblk(fs, cgp, bpref) /* * if the requested block is available, use it */ -/* - * disallow sequential layout. - * if (isblock(fs, cgp->cg_free, bpref/fs->fs_frag)) { bno = bpref; goto gotit; } - */ /* * check for a block available on the same cylinder */ @@ -527,29 +576,13 @@ alloccgblk(fs, cgp, bpref) bpref = howmany(fs->fs_spc * cylno, NSPF(fs)); goto norot; } - /* - * find a block that is rotationally optimal - */ - cylbp = cgp->cg_b[cylno]; - if (fs->fs_rotdelay == 0) { - pos = cbtorpos(fs, bpref); - } else { - /* - * here we convert ms of delay to frags as: - * (frags) = (ms) * (rev/sec) * (sect/rev) / - * ((sect/frag) * (ms/sec)) - * then round up to the next rotational position - */ - bpref += fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / - (NSPF(fs) * 1000); - pos = cbtorpos(fs, bpref); - pos = (pos + 1) % NRPOS; - } /* * check the summary information to see if a block is * available in the requested cylinder starting at the - * optimal rotational position and proceeding around. + * requested rotational position and proceeding around. */ + cylbp = cgp->cg_b[cylno]; + pos = cbtorpos(fs, bpref); for (i = pos; i < NRPOS; i++) if (cylbp[i] > 0) break; @@ -612,7 +645,7 @@ gotit: * 2) allocate the next available inode after the requested * inode in the specified cylinder group. */ -u_long +ino_t ialloccg(ip, cg, ipref, mode) struct inode *ip; int cg; @@ -673,7 +706,7 @@ gotit: * free map. If a fragment is deallocated, a possible * block reassembly is checked. */ -fre(ip, bno, size) +free(ip, bno, size) register struct inode *ip; daddr_t bno; off_t size; @@ -873,37 +906,6 @@ mapsearch(fs, cgp, bpref, allocsiz) return (-1); } -/* - * Getfs maps a device number into a pointer to the incore super block. - * - * The algorithm is a linear search through the mount table. A - * consistency check of the super block magic number is performed. - * - * panic: no fs -- the device is not mounted. - * this "cannot happen" - */ -struct fs * -getfs(dev) - dev_t dev; -{ - register struct mount *mp; - register struct fs *fs; - - for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) { - if (mp->m_bufp == NULL || mp->m_dev != dev) - continue; - fs = mp->m_bufp->b_un.b_fs; - if (fs->fs_magic != FS_MAGIC) { - printf("dev = 0x%x, fs = %s\n", dev, fs->fs_fsmnt); - panic("getfs: bad magic"); - } - return (fs); - } - printf("dev = 0x%x\n", dev); - panic("getfs: no fs"); - return (NULL); -} - /* * Fserr prints the name of a file system with an error diagnostic. * @@ -917,82 +919,3 @@ fserr(fs, cp) printf("%s: %s\n", fs->fs_fsmnt, cp); } - -/* - * Getfsx returns the index in the file system - * table of the specified device. The swap device - * is also assigned a pseudo-index. The index may - * be used as a compressed indication of the location - * of a block, recording - * - * rather than - * - * provided the information need remain valid only - * as long as the file system is mounted. - */ -getfsx(dev) - dev_t dev; -{ - register struct mount *mp; - - if (dev == swapdev) - return (MSWAPX); - for(mp = &mount[0]; mp < &mount[NMOUNT]; mp++) - if (mp->m_dev == dev) - return (mp - &mount[0]); - return (-1); -} - -/* - * Update is the internal name of 'sync'. It goes through the disk - * queues to initiate sandbagged IO; goes through the inodes to write - * modified nodes; and it goes through the mount table to initiate - * the writing of the modified super blocks. - */ -update() -{ - register struct inode *ip; - register struct mount *mp; - struct fs *fs; - - if (updlock) - return; - updlock++; - /* - * Write back modified superblocks. - * Consistency check that the superblock - * of each file system is still in the buffer cache. - */ - for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) { - if (mp->m_bufp == NULL) - continue; - fs = mp->m_bufp->b_un.b_fs; - if (fs->fs_fmod == 0) - continue; - if (fs->fs_ronly != 0) { /* XXX */ - printf("fs = %s\n", fs->fs_fsmnt); - panic("update: rofs mod"); - } - fs->fs_fmod = 0; - fs->fs_time = time.tv_sec; - sbupdate(mp); - } - /* - * Write back each (modified) inode. - */ - for (ip = inode; ip < inodeNINODE; ip++) { - if ((ip->i_flag & ILOCKED) != 0 || ip->i_count == 0) - continue; - ip->i_flag |= ILOCKED; - ip->i_count++; - iupdat(ip, &time, &time, 0); - iput(ip); - } - updlock = 0; - /* - * Force stale buffer cache information to be flushed, - * for all devices. - */ - bflush(NODEV); -} - diff --git a/usr/src/sys/ufs/lfs/lfs_balloc.c b/usr/src/sys/ufs/lfs/lfs_balloc.c index 5f90878e57..27477dfbf8 100644 --- a/usr/src/sys/ufs/lfs/lfs_balloc.c +++ b/usr/src/sys/ufs/lfs/lfs_balloc.c @@ -1,4 +1,4 @@ -/* lfs_balloc.c 5.2 82/09/25 */ +/* lfs_balloc.c 5.3 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -51,8 +51,8 @@ bmap(ip, bn, rwflg, size) osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { bp = realloccg(ip, ip->i_db[nb], - blkpref(ip, nb, nb, &ip->i_db[0]), - osize, fs->fs_bsize); + blkpref(ip, nb, (int)nb, &ip->i_db[0]), + osize, (int)fs->fs_bsize); ip->i_size = (nb + 1) * fs->fs_bsize; ip->i_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IUPD|ICHG; @@ -77,7 +77,7 @@ bmap(ip, bn, rwflg, size) if (nsize <= osize) goto gotit; bp = realloccg(ip, nb, - blkpref(ip, bn, bn, &ip->i_db[0]), + blkpref(ip, bn, (int)bn, &ip->i_db[0]), osize, nsize); } else { if (ip->i_size < (bn + 1) * fs->fs_bsize) @@ -85,7 +85,7 @@ bmap(ip, bn, rwflg, size) else nsize = fs->fs_bsize; bp = alloc(ip, - blkpref(ip, bn, bn, &ip->i_db[0]), + blkpref(ip, bn, (int)bn, &ip->i_db[0]), nsize); } if (bp == NULL) @@ -136,8 +136,8 @@ gotit: if (nb == 0) { if (rwflg == B_READ) return ((daddr_t)-1); - pref = blkpref(ip, lbn, 0, 0); - bp = alloc(ip, pref, fs->fs_bsize); + pref = blkpref(ip, lbn, 0, (daddr_t *)0); + bp = alloc(ip, pref, (int)fs->fs_bsize); if (bp == NULL) return ((daddr_t)-1); nb = dbtofsb(fs, bp->b_blkno); @@ -154,7 +154,7 @@ gotit: * fetch through the indirect blocks */ for (; j <= NIADDR; j++) { - bp = bread(ip->i_dev, fsbtodb(fs, nb), fs->fs_bsize); + bp = bread(ip->i_dev, fsbtodb(fs, nb), (int)fs->fs_bsize); if (bp->b_flags & B_ERROR) { brelse(bp); return ((daddr_t)0); @@ -170,10 +170,11 @@ gotit: } if (pref == 0) if (j < NIADDR) - pref = blkpref(ip, lbn, 0, 0); + pref = blkpref(ip, lbn, 0, + (daddr_t *)0); else pref = blkpref(ip, lbn, i, &bap[0]); - nbp = alloc(ip, pref, fs->fs_bsize); + nbp = alloc(ip, pref, (int)fs->fs_bsize); if (nbp == NULL) { brelse(bp); return ((daddr_t)-1); diff --git a/usr/src/sys/ufs/lfs/lfs_inode.c b/usr/src/sys/ufs/lfs/lfs_inode.c index 6d672139c4..4c99428b49 100644 --- a/usr/src/sys/ufs/lfs/lfs_inode.c +++ b/usr/src/sys/ufs/lfs/lfs_inode.c @@ -1,4 +1,4 @@ -/* lfs_inode.c 4.30 82/10/23 */ +/* lfs_inode.c 4.31 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -236,14 +236,14 @@ irele(ip) if (ip->i_count == 1) { ip->i_flag |= ILOCKED; if (ip->i_nlink <= 0) { - itrunc(ip, 0); + itrunc(ip, (u_long)0); mode = ip->i_mode; ip->i_mode = 0; ip->i_rdev = 0; ip->i_flag |= IUPD|ICHG; ifree(ip, ip->i_number, mode); #ifdef QUOTA - chkiq(ip->i_dev, ip, ip->i_uid, 0); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 0); dqrele(ip->i_dquot); ip->i_dquot = NODQUOT; #endif @@ -325,146 +325,253 @@ iupdat(ip, ta, tm, waitfor) */ itrunc(ip, length) register struct inode *ip; - register int length; + u_long length; { register i; - daddr_t bn; - struct inode itmp; + register daddr_t lastblock; + daddr_t bn, lastdiblock, lastsiblock; register struct fs *fs; + int j; #ifdef QUOTA - register long cnt = 0; - long tloop(); + long blocksreleased = 0, nblocks; + long indirtrunc(); #endif - /* - * Only plain files, directories and symbolic - * links contain blocks. - */ - i = ip->i_mode & IFMT; - if (i != IFREG && i != IFDIR && i != IFLNK) - return; + if (ip->i_size <= length) return; - +#ifdef notdef + /* this is superfluous given size check above */ + i = ip->i_mode & IFMT; + if (i != IFREG && i != IFDIR && i != IFLNK) { + printf("itrunc: i# %d, size %d\n", ip->i_number, ip->i_size); + return; + } +#endif /* - * Clean inode on disk before freeing blocks - * to insure no duplicates if system crashes. + * Update size of file on disk before + * we start freeing blocks. If we crash + * while free'ing blocks below, the file + * size will be believed and the blocks + * returned to the free list. + * After updating the copy on disk we + * put the old size back so macros like + * blksize will work. */ - itmp = *ip; - itmp.i_size = length; - for (i = 0; i < NDADDR; i++) - itmp.i_db[i] = 0; - for (i = 0; i < NIADDR; i++) - itmp.i_ib[i] = 0; - itmp.i_flag |= ICHG|IUPD; - iupdat(&itmp, &time, &time, 1); - ip->i_flag &= ~(IUPD|IACC|ICHG); + j = ip->i_size; + ip->i_size = length; + ip->i_flag |= ICHG|IUPD; + iupdat(ip, &time, &time, 1); + ip->i_size = j; /* - * Now return blocks to free list... if machine - * crashes, they will be harmless MISSING blocks. + * Calculate last direct, single indirect and + * double indirect block (if any) which we want + * to keep. Lastblock is -1 when the file is + * truncated to 0. */ fs = ip->i_fs; + lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; + lastsiblock = lastblock - NDADDR; + lastdiblock = lastsiblock - NINDIR(fs); +#ifdef QUOTA + nblocks = fs->fs_bsize / DEV_BSIZE; +#endif /* - * release double indirect block first + * Double indirect block first */ - bn = ip->i_ib[NIADDR-1]; - if (bn != (daddr_t)0) { - ip->i_ib[NIADDR - 1] = (daddr_t)0; + bn = ip->i_ib[NIADDR - 1]; + if (bn != 0) { + /* + * If lastdiblock is negative, it's value + * is meaningless; in this case we set it to + * -NINDIR(fs) so calculations performed in + * indirtrunc come out right. + */ + if (lastdiblock < 0) + lastdiblock -= lastsiblock; #ifdef QUOTA - cnt += + blocksreleased += #endif - tloop(ip, bn, 1); + indirtrunc(ip, bn, lastdiblock, 1); + if (lastdiblock < 0) { + ip->i_ib[NIADDR - 1] = 0; + free(ip, bn, (off_t)fs->fs_bsize); +#ifdef QUOTA + blocksreleased += nblocks; +#endif + } } + if (lastdiblock >= 0) + goto done; /* - * release single indirect blocks second + * Single indirect blocks second. + * First, those which can be totally + * zapped, then possibly one which + * needs to be partially cleared. */ - for (i = NIADDR - 2; i >= 0; i--) { + j = lastsiblock < 0 ? -1 : lastsiblock / NINDIR(fs); + for (i = NIADDR - 2; i > j; i--) { bn = ip->i_ib[i]; - if (bn != (daddr_t)0) { - ip->i_ib[i] = (daddr_t)0; + if (bn != 0) { #ifdef QUOTA - cnt += + blocksreleased += nblocks + #endif - tloop(ip, bn, 0); + indirtrunc(ip, bn, (daddr_t)-1, 0); + ip->i_ib[i] = 0; + free(ip, bn, (off_t)fs->fs_bsize); } } + if (lastsiblock >= 0) { + bn = ip->i_ib[j]; + if (bn != 0) +#ifdef QUOTA + blocksreleased += +#endif + indirtrunc(ip, bn, lastsiblock, 0); + goto done; + } /* - * finally release direct blocks + * All whole direct blocks. */ - for (i = NDADDR - 1; i>=0; i--) { + for (i = NDADDR - 1; i > lastblock; i--) { + register int size; + bn = ip->i_db[i]; - if (bn == (daddr_t)0) + if (bn == 0) continue; - ip->i_db[i] = (daddr_t)0; -#ifndef QUOTA - fre(ip, bn, (off_t)blksize(fs, ip, i)); -#else - { int size; - fre(ip, bn, size = (off_t)blksize(fs, ip, i)); - cnt += size / DEV_BSIZE; - } + ip->i_db[i] = 0; + size = (off_t)blksize(fs, ip, i); + free(ip, bn, size); +#ifdef QUOTA + blocksreleased += size / DEV_BSIZE; +#endif + } + /* + * Finally, look for a change in size of the + * last direct block; release any frags. + */ + if (lastblock >= 0 && ip->i_db[lastblock] != 0) { + /* + * Calculate amount of space we're giving + * back as old block size minus new block size. + */ + i = blksize(fs, ip, lastblock); + ip->i_size = length; + i = i - blksize(fs, ip, lastblock); + if (i > 0) { + /* + * Block number of space to be free'd is + * the old block # plus the number of frags + * required for the storage we're keeping. + */ + bn = ip->i_db[lastblock] + + numfrags(fs, fs->fs_bsize - i); + free(ip, bn, i); +#ifdef QUOTA + blocksreleased += i / DEV_BSIZE; #endif + } } - ip->i_size = 0; +done: /* - * Inode was written and flags updated above. - * No need to modify flags here. + * Finished free'ing blocks, complete + * inode update to reflect new length. */ #ifdef QUOTA - (void) chkdq(ip, -cnt, 0); + (void) chkdq(ip, -blocksreleased, 0); #endif + ip->i_size = length; + ip->i_flag |= ICHG|IUPD; + iupdat(ip, &time, &time, 1); } +/* + * Release blocks associated with the inode ip and + * stored in the indirect block bn. Blocks are free'd + * in LIFO order up to (but not including) lastbn. If + * doubleindirect is indicated, this block is a double + * indirect block and recursive calls to indirtrunc must + * be used to cleanse single indirect blocks instead of + * a simple free. + */ #ifdef QUOTA long #endif -tloop(ip, bn, indflg) +indirtrunc(ip, bn, lastbn, doubleindirect) register struct inode *ip; - daddr_t bn; - int indflg; + daddr_t bn, lastbn; + int doubleindirect; { - register i; - register struct buf *bp; + register int i; + struct buf *bp; register daddr_t *bap; register struct fs *fs; - daddr_t nb; + daddr_t nb, last; #ifdef QUOTA - register long cnt = 0; + int blocksreleased = 0, nblocks; #endif bp = NULL; fs = ip->i_fs; - for (i = NINDIR(fs) - 1; i >= 0; i--) { + last = lastbn; + if (doubleindirect) + last /= NINDIR(fs); +#ifdef QUOTA + nblocks = fs->fs_bsize / DEV_BSIZE; +#endif + for (i = NINDIR(fs) - 1; i > last; i--) { if (bp == NULL) { + struct buf *copy; + + copy = geteblk((int)fs->fs_bsize); bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); - if (bp->b_flags & B_ERROR) { + if (bp->b_flags&B_ERROR) { + brelse(copy); brelse(bp); - return; + return (NULL); } bap = bp->b_un.b_daddr; + /* + * Update pointers before freeing blocks. + * If we crash before freeing the blocks + * they'll be recovered as missing. + */ + bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, + (u_int)fs->fs_bsize); + bzero((caddr_t)&bap[last + 1], + (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); + bwrite(bp); + bp = copy, bap = bp->b_un.b_daddr; } nb = bap[i]; - if (nb == (daddr_t)0) + if (nb == 0) continue; - if (indflg) { + if (doubleindirect) #ifdef QUOTA - cnt += + blocksreleased += #endif - tloop(ip, nb, 0); - } else { - fre(ip, nb, (int)fs->fs_bsize); + indirtrunc(ip, nb, (daddr_t)-1, 0); + free(ip, nb, (int)fs->fs_bsize); #ifdef QUOTA - cnt += fs->fs_bsize / DEV_BSIZE; + blocksreleased += nblocks; #endif - } + } + if (doubleindirect && lastbn >= 0) { + last = lastbn % NINDIR(fs); + if (bp == NULL) + panic("indirtrunc"); + nb = bap[i]; + if (nb != 0) +#ifdef QUOTA + blocksreleased += +#endif + indirtrunc(ip, nb, last, 0); } if (bp != NULL) brelse(bp); - fre(ip, bn, (int)fs->fs_bsize); #ifdef QUOTA - cnt += fs->fs_bsize / DEV_BSIZE; - return(cnt); + return (blocksreleased); #endif } diff --git a/usr/src/sys/ufs/lfs/lfs_vnops.c b/usr/src/sys/ufs/lfs/lfs_vnops.c index 5cd55d875e..251c68f22e 100644 --- a/usr/src/sys/ufs/lfs/lfs_vnops.c +++ b/usr/src/sys/ufs/lfs/lfs_vnops.c @@ -1,4 +1,4 @@ -/* lfs_vnops.c 4.41 82/10/19 */ +/* lfs_vnops.c 4.42 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -16,13 +16,20 @@ #include "../h/uio.h" #include "../h/socket.h" #include "../h/socketvar.h" +#include "../h/nami.h" +/* + * Change current working directory (``.''). + */ chdir() { chdirec(&u.u_cdir); } +/* + * Change notion of root (``/'') directory. + */ chroot() { @@ -30,6 +37,9 @@ chroot() chdirec(&u.u_rdir); } +/* + * Common routine for chroot and chdir. + */ chdirec(ipp) register struct inode **ipp; { @@ -38,14 +48,14 @@ chdirec(ipp) char *fname; }; - ip = namei(uchar, 0, 1); - if(ip == NULL) + ip = namei(uchar, LOOKUP, 1); + if (ip == NULL) return; - if((ip->i_mode&IFMT) != IFDIR) { + if ((ip->i_mode&IFMT) != IFDIR) { u.u_error = ENOTDIR; goto bad; } - if(access(ip, IEXEC)) + if (access(ip, IEXEC)) goto bad; iunlock(ip); if (*ipp) @@ -68,23 +78,28 @@ open() int flags; int mode; } *uap; - int checkpermissions = 1; + int checkpermissions = 1, flags; uap = (struct a *)u.u_ap; - if (uap->flags&FCREATE) { - ip = namei(uchar, 1, 1); + flags = uap->flags + 1; + if ((flags&FTRUNCATE) && (flags&FWRITE) == 0) { + u.u_error = EINVAL; + return; + } + if (flags&FCREATE) { + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; ip = maknode(uap->mode&07777&(~ISVTX)); checkpermissions = 0; - uap->flags &= ~FTRUNCATE; + flags &= ~FTRUNCATE; } } else - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; - open1(ip, ++uap->flags, checkpermissions); + open1(ip, flags, checkpermissions); } #ifndef NOCOMPAT @@ -100,7 +115,7 @@ ocreat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 1, 1); + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; @@ -109,7 +124,7 @@ ocreat() return; open1(ip, FWRITE, 0); } else - open1(ip, FWRITE|FTRUNCATE, 0); + open1(ip, FWRITE|FTRUNCATE, 1); } #endif @@ -145,7 +160,7 @@ open1(ip, mode, checkpermissions) * while doing so in case we block inside flocki. */ flags = 0; - if (mode&(FRDLOCK|FWRLOCK)) { + if (mode&(FSHLOCK|FEXLOCK)) { iunlock(ip); flags = flocki(ip, 0, mode); ilock(ip); @@ -153,7 +168,7 @@ open1(ip, mode, checkpermissions) goto bad; } if (mode&FTRUNCATE) - itrunc(ip, 0); + itrunc(ip, (u_long)0); iunlock(ip); if ((fp = falloc()) == NULL) goto out; @@ -189,7 +204,7 @@ mknod() uap = (struct a *)u.u_ap; if (suser()) { - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip != NULL) { u.u_error = EEXIST; goto out; @@ -225,10 +240,10 @@ link() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); /* well, this routine is doomed anyhow */ + ip = namei(uchar, LOOKUP, 1); /* well, this routine is doomed anyhow */ if (ip == NULL) return; - if ((ip->i_mode&IFMT)==IFDIR && !suser()) { + if ((ip->i_mode&IFMT) == IFDIR && !suser()) { iput(ip); return; } @@ -237,7 +252,7 @@ link() iupdat(ip, &time, &time, 1); iunlock(ip); u.u_dirp = (caddr_t)uap->linkname; - xp = namei(uchar, 1, 0); + xp = namei(uchar, CREATE, 0); if (xp != NULL) { u.u_error = EEXIST; iput(xp); @@ -284,7 +299,7 @@ symlink() nc++; } u.u_dirp = uap->linkname; - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip) { iput(ip); u.u_error = EEXIST; @@ -296,6 +311,7 @@ symlink() if (ip == NULL) return; u.u_error = rdwri(UIO_WRITE, ip, uap->target, nc, 0, 0, (int *)0); + /* handle u.u_error != 0 */ iput(ip); } @@ -306,34 +322,21 @@ symlink() */ unlink() { - register struct inode *ip, *pp; struct a { char *fname; }; - int unlinkingdot = 0; + register struct inode *ip, *dp; - pp = namei(uchar, 2, 0); - if (pp == NULL) + ip = namei(uchar, DELETE | LOCKPARENT, 0); + if (ip == NULL) return; - - /* - * Check for unlink(".") - * to avoid hanging on the iget - */ - if (pp->i_number == u.u_dent.d_ino) { - ip = pp; - ip->i_count++; - unlinkingdot++; - } else - ip = iget(pp->i_dev, pp->i_fs, u.u_dent.d_ino); - if(ip == NULL) - goto out1; - if((ip->i_mode&IFMT)==IFDIR && !suser()) + dp = u.u_pdir; + if ((ip->i_mode&IFMT) == IFDIR && !suser()) goto out; /* * Don't unlink a mounted file. */ - if (ip->i_dev != pp->i_dev) { + if (ip->i_dev != dp->i_dev) { u.u_error = EBUSY; goto out; } @@ -344,12 +347,11 @@ unlink() ip->i_flag |= ICHG; } out: - if (unlinkingdot) + if (dp == ip) irele(ip); else iput(ip); -out1: - iput(pp); + iput(dp); } /* @@ -397,13 +399,13 @@ saccess() svgid = u.u_gid; u.u_uid = u.u_ruid; u.u_gid = u.u_rgid; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip != NULL) { - if (uap->fmode&FACCESS_READ && access(ip, IREAD)) + if ((uap->fmode&FACCESS_READ) && access(ip, IREAD)) goto done; - if (uap->fmode&FACCESS_WRITE && access(ip, IWRITE)) + if ((uap->fmode&FACCESS_WRITE) && access(ip, IWRITE)) goto done; - if (uap->fmode&FACCESS_EXECUTE && access(ip, IEXEC)) + if ((uap->fmode&FACCESS_EXECUTE) && access(ip, IEXEC)) goto done; done: iput(ip); @@ -445,7 +447,7 @@ stat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; stat1(ip, uap->sb); @@ -464,7 +466,7 @@ lstat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; stat1(ip, uap->sb); @@ -520,7 +522,7 @@ readlink() } *uap = (struct a *)u.u_ap; int resid; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; if ((ip->i_mode&IFMT) != IFLNK) { @@ -533,6 +535,9 @@ out: u.u_r.r_val1 = uap->count - resid; } +/* + * Change mode of a file given path name. + */ chmod() { struct inode *ip; @@ -545,8 +550,12 @@ chmod() if ((ip = owner(1)) == NULL) return; chmod1(ip, uap->fmode); + iput(ip); } +/* + * Change mode of a file given a file descriptor. + */ fchmod() { struct a { @@ -565,14 +574,17 @@ fchmod() return; } ip = fp->f_inode; - ilock(ip); - if (u.u_uid != ip->i_uid && !suser()) { - iunlock(ip); + if (u.u_uid != ip->i_uid && !suser()) return; - } + ilock(ip); chmod1(ip, uap->fmode); + iunlock(ip); } +/* + * Change the mode on a file. + * Inode must be locked before calling. + */ chmod1(ip, mode) register struct inode *ip; register int mode; @@ -598,9 +610,11 @@ ok: ip->i_flag |= ICHG; if (ip->i_flag&ITEXT && (ip->i_mode&ISVTX)==0) xrele(ip); - iput(ip); } +/* + * Set ownership given a path name. + */ chown() { struct inode *ip; @@ -614,8 +628,12 @@ chown() if (!suser() || (ip = owner(0)) == NULL) return; chown1(ip, uap->uid, uap->gid); + iput(ip); } +/* + * Set ownership given a file descriptor. + */ fchown() { struct a { @@ -635,12 +653,11 @@ fchown() return; } ip = fp->f_inode; - ilock(ip); - if (!suser()) { - iunlock(ip); + if (!suser()) return; - } + ilock(ip); chown1(ip, uap->uid, uap->gid); + iunlock(ip); } /* @@ -678,8 +695,8 @@ chown1(ip, uid, gid) change = fragroundup(fs, ip->i_size); change /= DEV_BSIZE; } - chkdq(ip, -change, 1); - chkiq(ip->i_dev, ip, ip->i_uid, 1); + (void)chkdq(ip, -change, 1); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 1); dqrele(ip->i_dquot); #endif /* @@ -695,10 +712,9 @@ chown1(ip, uid, gid) ip->i_mode &= ~(ISUID|ISGID); #ifdef QUOTA ip->i_dquot = inoquota(ip); - chkdq(ip, change, 1); - chkiq(ip->i_dev, NULL, uid, 1); + (void)chkdq(ip, change, 1); + (void)chkiq(ip->i_dev, (struct inode *)NULL, uid, 1); #endif - iput(ip); } /* @@ -729,12 +745,18 @@ outime() iput(ip); } +/* + * Flush any pending I/O. + */ sync() { update(); } +/* + * Apply an advisory lock on a file descriptor. + */ flock() { struct a { @@ -753,35 +775,38 @@ flock() return; } cmd = uap->how; - flags = u.u_pofile[uap->fd] & (RDLOCK|WRLOCK); + flags = u.u_pofile[uap->fd] & (SHLOCK|EXLOCK); if (cmd&FUNLOCK) { if (flags == 0) { u.u_error = EINVAL; return; } funlocki(fp->f_inode, flags); - u.u_pofile[uap->fd] &= ~(RDLOCK|WRLOCK); + u.u_pofile[uap->fd] &= ~(SHLOCK|EXLOCK); return; } /* * No reason to write lock a file we've already * write locked, similarly with a read lock. */ - if ((flags&WRLOCK) && (cmd&FWRLOCK) || - (flags&RDLOCK) && (cmd&FRDLOCK)) + if ((flags&EXLOCK) && (cmd&FEXLOCK) || + (flags&SHLOCK) && (cmd&FSHLOCK)) return; u.u_pofile[uap->fd] = flocki(fp->f_inode, u.u_pofile[uap->fd], cmd); } +/* + * Truncate a file given its path name. + */ truncate() { struct a { char *fname; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; if (access(ip, IWRITE)) @@ -791,16 +816,18 @@ truncate() goto bad; } itrunc(ip, uap->length); - return; bad: iput(ip); } +/* + * Truncate a file given a file descriptor. + */ ftruncate() { struct a { int fd; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; struct file *fp; @@ -819,17 +846,267 @@ ftruncate() ip = fp->f_inode; ilock(ip); itrunc(ip, uap->length); + iunlock(ip); +} + +/* + * Synch an open file. + */ +fsync() +{ + struct a { + int fd; + } *uap = (struct a *)u.u_ap; + struct inode *ip; + struct file *fp; + + fp = getf(uap->fd); + if (fp == NULL) + return; + if (fp->f_type == DTYPE_SOCKET) { + u.u_error = EINVAL; + return; + } + ip = fp->f_inode; + ilock(ip); + syncip(ip); + iunlock(ip); } +/* + * Rename system call. + * rename("foo", "bar"); + * is essentially + * unlink("bar"); + * link("foo", "bar"); + * unlink("foo"); + * but ``atomically''. Can't do full commit without saving state in the + * inode on disk which isn't feasible at this time. Best we can do is + * always guarantee the target exists. + * + * Basic algorithm is: + * + * 1) Bump link count on source while we're linking it to the + * target. This also insure the inode won't be deleted out + * from underneath us while we work. + * 2) Link source to destination. If destination already exists, + * delete it first. + * 3) Unlink source reference to inode if still around. + * 4) If a directory was moved and the parent of the destination + * is different from the source, patch the ".." entry in the + * directory. + * + * Source and destination must either both be directories, or both + * not be directories. If target is a directory, it must be empty. + */ rename() { -#ifdef notdef struct a { char *from; char *to; } *uap; -#endif + register struct inode *ip, *xp, *dp; + int oldparent, parentdifferent, doingdirectory; + + uap = (struct a *)u.u_ap; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) + return; + dp = u.u_pdir; + oldparent = 0, doingdirectory = 0; + if ((ip->i_mode&IFMT) == IFDIR) { + register struct direct *d; + + d = &u.u_dent; + /* + * Avoid "." and ".." for obvious reasons. + */ + if (d->d_name[0] == '.') { + if (d->d_namlen == 1 || + (d->d_namlen == 2 && d->d_name[1] == '.')) { + u.u_error = EINVAL; + iput(ip); + return; + } + } + oldparent = dp->i_number; + doingdirectory++; + } + irele(dp); + + /* + * 1) Bump link count while we're moving stuff + * around. If we crash somewhere before + * completing our work, the link count + * may be wrong, but correctable. + */ + ip->i_nlink++; + ip->i_flag |= ICHG; + iupdat(ip, &time, &time, 1); + iunlock(ip); + + /* + * When the target exists, both the directory + * and target inodes are returned locked. + */ + u.u_dirp = (caddr_t)uap->to; + xp = namei(uchar, CREATE | LOCKPARENT, 0); + if (u.u_error) + goto out; + dp = u.u_pdir; + /* + * 2) If target doesn't exist, link the target + * to the source and unlink the source. + * Otherwise, rewrite the target directory + * entry to reference the source inode and + * expunge the original entry's existence. + */ + parentdifferent = oldparent != dp->i_number; + if (xp == NULL) { + if (dp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Account for ".." in directory. + * When source and destination have the + * same parent we don't fool with the + * link count -- this isn't required + * because we do a similar check below. + */ + if (doingdirectory && parentdifferent) { + dp->i_nlink++; + dp->i_flag |= ICHG; + iupdat(dp, &time, &time, 1); + } + direnter(ip); + if (u.u_error) + goto out; + } else { + if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Target must be empty if a directory. + * Also, insure source and target are + * compatible (both directories, or both + * not directories). + */ + if ((xp->i_mode&IFMT) == IFDIR) { + if (!dirempty(xp)) { + u.u_error = EEXIST; /* XXX */ + goto bad; + } + if (!doingdirectory) { + u.u_error = ENOTDIR; + goto bad; + } + } else if (doingdirectory) { + u.u_error = EISDIR; + goto bad; + } + dirrewrite(dp, ip); + if (u.u_error) + goto bad1; + /* + * If this is a directory we know it is + * empty and we can squash the inode and + * any space associated with it. Otherwise, + * we've got a plain file and the link count + * simply needs to be adjusted. + */ + if (doingdirectory) { + xp->i_nlink = 0; + itrunc(xp, (u_long)0); + } else + xp->i_nlink--; + xp->i_flag |= ICHG; + iput(xp); + } + + /* + * 3) Unlink the source. + */ + u.u_dirp = uap->from; + dp = namei(uchar, DELETE, 0); + /* + * Insure directory entry still exists and + * has not changed since the start of all + * this. If either has occured, forget about + * about deleting the original entry and just + * adjust the link count in the inode. + */ + if (dp == NULL || u.u_dent.d_ino != ip->i_number) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } else { + /* + * If source is a directory, must adjust + * link count of parent directory also. + * If target didn't exist and source and + * target have the same parent, then we + * needn't touch the link count, it all + * balances out in the end. Otherwise, we + * must do so to reflect deletion of ".." + * done above. + */ + if (doingdirectory && (xp != NULL || parentdifferent)) { + dp->i_nlink--; + dp->i_flag |= ICHG; + } + if (dirremove()) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } + } + irele(ip); + if (dp) + iput(dp); + + /* + * 4) Renaming a directory with the parent + * different requires ".." to be rewritten. + * The window is still there for ".." to + * be inconsistent, but this is unavoidable, + * and a lot shorter than when it was done + * in a user process. + */ + if (doingdirectory && parentdifferent && u.u_error == 0) { + struct dirtemplate dirbuf; + u.u_dirp = uap->to; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) { + printf("rename: .. went away\n"); + return; + } + dp = u.u_pdir; + if ((ip->i_mode&IFMT) != IFDIR) { + printf("rename: .. not a directory\n"); + goto stuck; + } + u.u_error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + if (u.u_error == 0) { + dirbuf.dotdot_ino = dp->i_number; + (void) rdwri(UIO_WRITE, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + } +stuck: + irele(dp); + iput(ip); + } + return; +bad: + iput(u.u_pdir); +bad1: + if (xp) + irele(xp); +out: + ip->i_nlink--; + ip->i_flag |= ICHG; + irele(ip); } /* diff --git a/usr/src/sys/ufs/ufs/ufs_inode.c b/usr/src/sys/ufs/ufs/ufs_inode.c index 32c0ade5d3..acff80669f 100644 --- a/usr/src/sys/ufs/ufs/ufs_inode.c +++ b/usr/src/sys/ufs/ufs/ufs_inode.c @@ -1,4 +1,4 @@ -/* ufs_inode.c 4.30 82/10/23 */ +/* ufs_inode.c 4.31 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -236,14 +236,14 @@ irele(ip) if (ip->i_count == 1) { ip->i_flag |= ILOCKED; if (ip->i_nlink <= 0) { - itrunc(ip, 0); + itrunc(ip, (u_long)0); mode = ip->i_mode; ip->i_mode = 0; ip->i_rdev = 0; ip->i_flag |= IUPD|ICHG; ifree(ip, ip->i_number, mode); #ifdef QUOTA - chkiq(ip->i_dev, ip, ip->i_uid, 0); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 0); dqrele(ip->i_dquot); ip->i_dquot = NODQUOT; #endif @@ -325,146 +325,253 @@ iupdat(ip, ta, tm, waitfor) */ itrunc(ip, length) register struct inode *ip; - register int length; + u_long length; { register i; - daddr_t bn; - struct inode itmp; + register daddr_t lastblock; + daddr_t bn, lastdiblock, lastsiblock; register struct fs *fs; + int j; #ifdef QUOTA - register long cnt = 0; - long tloop(); + long blocksreleased = 0, nblocks; + long indirtrunc(); #endif - /* - * Only plain files, directories and symbolic - * links contain blocks. - */ - i = ip->i_mode & IFMT; - if (i != IFREG && i != IFDIR && i != IFLNK) - return; + if (ip->i_size <= length) return; - +#ifdef notdef + /* this is superfluous given size check above */ + i = ip->i_mode & IFMT; + if (i != IFREG && i != IFDIR && i != IFLNK) { + printf("itrunc: i# %d, size %d\n", ip->i_number, ip->i_size); + return; + } +#endif /* - * Clean inode on disk before freeing blocks - * to insure no duplicates if system crashes. + * Update size of file on disk before + * we start freeing blocks. If we crash + * while free'ing blocks below, the file + * size will be believed and the blocks + * returned to the free list. + * After updating the copy on disk we + * put the old size back so macros like + * blksize will work. */ - itmp = *ip; - itmp.i_size = length; - for (i = 0; i < NDADDR; i++) - itmp.i_db[i] = 0; - for (i = 0; i < NIADDR; i++) - itmp.i_ib[i] = 0; - itmp.i_flag |= ICHG|IUPD; - iupdat(&itmp, &time, &time, 1); - ip->i_flag &= ~(IUPD|IACC|ICHG); + j = ip->i_size; + ip->i_size = length; + ip->i_flag |= ICHG|IUPD; + iupdat(ip, &time, &time, 1); + ip->i_size = j; /* - * Now return blocks to free list... if machine - * crashes, they will be harmless MISSING blocks. + * Calculate last direct, single indirect and + * double indirect block (if any) which we want + * to keep. Lastblock is -1 when the file is + * truncated to 0. */ fs = ip->i_fs; + lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; + lastsiblock = lastblock - NDADDR; + lastdiblock = lastsiblock - NINDIR(fs); +#ifdef QUOTA + nblocks = fs->fs_bsize / DEV_BSIZE; +#endif /* - * release double indirect block first + * Double indirect block first */ - bn = ip->i_ib[NIADDR-1]; - if (bn != (daddr_t)0) { - ip->i_ib[NIADDR - 1] = (daddr_t)0; + bn = ip->i_ib[NIADDR - 1]; + if (bn != 0) { + /* + * If lastdiblock is negative, it's value + * is meaningless; in this case we set it to + * -NINDIR(fs) so calculations performed in + * indirtrunc come out right. + */ + if (lastdiblock < 0) + lastdiblock -= lastsiblock; #ifdef QUOTA - cnt += + blocksreleased += #endif - tloop(ip, bn, 1); + indirtrunc(ip, bn, lastdiblock, 1); + if (lastdiblock < 0) { + ip->i_ib[NIADDR - 1] = 0; + free(ip, bn, (off_t)fs->fs_bsize); +#ifdef QUOTA + blocksreleased += nblocks; +#endif + } } + if (lastdiblock >= 0) + goto done; /* - * release single indirect blocks second + * Single indirect blocks second. + * First, those which can be totally + * zapped, then possibly one which + * needs to be partially cleared. */ - for (i = NIADDR - 2; i >= 0; i--) { + j = lastsiblock < 0 ? -1 : lastsiblock / NINDIR(fs); + for (i = NIADDR - 2; i > j; i--) { bn = ip->i_ib[i]; - if (bn != (daddr_t)0) { - ip->i_ib[i] = (daddr_t)0; + if (bn != 0) { #ifdef QUOTA - cnt += + blocksreleased += nblocks + #endif - tloop(ip, bn, 0); + indirtrunc(ip, bn, (daddr_t)-1, 0); + ip->i_ib[i] = 0; + free(ip, bn, (off_t)fs->fs_bsize); } } + if (lastsiblock >= 0) { + bn = ip->i_ib[j]; + if (bn != 0) +#ifdef QUOTA + blocksreleased += +#endif + indirtrunc(ip, bn, lastsiblock, 0); + goto done; + } /* - * finally release direct blocks + * All whole direct blocks. */ - for (i = NDADDR - 1; i>=0; i--) { + for (i = NDADDR - 1; i > lastblock; i--) { + register int size; + bn = ip->i_db[i]; - if (bn == (daddr_t)0) + if (bn == 0) continue; - ip->i_db[i] = (daddr_t)0; -#ifndef QUOTA - fre(ip, bn, (off_t)blksize(fs, ip, i)); -#else - { int size; - fre(ip, bn, size = (off_t)blksize(fs, ip, i)); - cnt += size / DEV_BSIZE; - } + ip->i_db[i] = 0; + size = (off_t)blksize(fs, ip, i); + free(ip, bn, size); +#ifdef QUOTA + blocksreleased += size / DEV_BSIZE; +#endif + } + /* + * Finally, look for a change in size of the + * last direct block; release any frags. + */ + if (lastblock >= 0 && ip->i_db[lastblock] != 0) { + /* + * Calculate amount of space we're giving + * back as old block size minus new block size. + */ + i = blksize(fs, ip, lastblock); + ip->i_size = length; + i = i - blksize(fs, ip, lastblock); + if (i > 0) { + /* + * Block number of space to be free'd is + * the old block # plus the number of frags + * required for the storage we're keeping. + */ + bn = ip->i_db[lastblock] + + numfrags(fs, fs->fs_bsize - i); + free(ip, bn, i); +#ifdef QUOTA + blocksreleased += i / DEV_BSIZE; #endif + } } - ip->i_size = 0; +done: /* - * Inode was written and flags updated above. - * No need to modify flags here. + * Finished free'ing blocks, complete + * inode update to reflect new length. */ #ifdef QUOTA - (void) chkdq(ip, -cnt, 0); + (void) chkdq(ip, -blocksreleased, 0); #endif + ip->i_size = length; + ip->i_flag |= ICHG|IUPD; + iupdat(ip, &time, &time, 1); } +/* + * Release blocks associated with the inode ip and + * stored in the indirect block bn. Blocks are free'd + * in LIFO order up to (but not including) lastbn. If + * doubleindirect is indicated, this block is a double + * indirect block and recursive calls to indirtrunc must + * be used to cleanse single indirect blocks instead of + * a simple free. + */ #ifdef QUOTA long #endif -tloop(ip, bn, indflg) +indirtrunc(ip, bn, lastbn, doubleindirect) register struct inode *ip; - daddr_t bn; - int indflg; + daddr_t bn, lastbn; + int doubleindirect; { - register i; - register struct buf *bp; + register int i; + struct buf *bp; register daddr_t *bap; register struct fs *fs; - daddr_t nb; + daddr_t nb, last; #ifdef QUOTA - register long cnt = 0; + int blocksreleased = 0, nblocks; #endif bp = NULL; fs = ip->i_fs; - for (i = NINDIR(fs) - 1; i >= 0; i--) { + last = lastbn; + if (doubleindirect) + last /= NINDIR(fs); +#ifdef QUOTA + nblocks = fs->fs_bsize / DEV_BSIZE; +#endif + for (i = NINDIR(fs) - 1; i > last; i--) { if (bp == NULL) { + struct buf *copy; + + copy = geteblk((int)fs->fs_bsize); bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); - if (bp->b_flags & B_ERROR) { + if (bp->b_flags&B_ERROR) { + brelse(copy); brelse(bp); - return; + return (NULL); } bap = bp->b_un.b_daddr; + /* + * Update pointers before freeing blocks. + * If we crash before freeing the blocks + * they'll be recovered as missing. + */ + bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, + (u_int)fs->fs_bsize); + bzero((caddr_t)&bap[last + 1], + (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); + bwrite(bp); + bp = copy, bap = bp->b_un.b_daddr; } nb = bap[i]; - if (nb == (daddr_t)0) + if (nb == 0) continue; - if (indflg) { + if (doubleindirect) #ifdef QUOTA - cnt += + blocksreleased += #endif - tloop(ip, nb, 0); - } else { - fre(ip, nb, (int)fs->fs_bsize); + indirtrunc(ip, nb, (daddr_t)-1, 0); + free(ip, nb, (int)fs->fs_bsize); #ifdef QUOTA - cnt += fs->fs_bsize / DEV_BSIZE; + blocksreleased += nblocks; #endif - } + } + if (doubleindirect && lastbn >= 0) { + last = lastbn % NINDIR(fs); + if (bp == NULL) + panic("indirtrunc"); + nb = bap[i]; + if (nb != 0) +#ifdef QUOTA + blocksreleased += +#endif + indirtrunc(ip, nb, last, 0); } if (bp != NULL) brelse(bp); - fre(ip, bn, (int)fs->fs_bsize); #ifdef QUOTA - cnt += fs->fs_bsize / DEV_BSIZE; - return(cnt); + return (blocksreleased); #endif } diff --git a/usr/src/sys/ufs/ufs/ufs_lookup.c b/usr/src/sys/ufs/ufs/ufs_lookup.c index 57eaf17136..0403df6fb3 100644 --- a/usr/src/sys/ufs/ufs/ufs_lookup.c +++ b/usr/src/sys/ufs/ufs/ufs_lookup.c @@ -1,4 +1,4 @@ -/* ufs_lookup.c 4.29 82/10/31 */ +/* ufs_lookup.c 4.30 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -10,20 +10,30 @@ #include "../h/buf.h" #include "../h/conf.h" #include "../h/uio.h" +#include "../h/nami.h" struct buf *blkatoff(); -int dirchk = 1; +int dirchk = 0; /* * Convert a pathname into a pointer to a locked inode, * with side effects usable in creating and removing files. * This is a very central and rather complicated routine. * * The func argument gives the routine which returns successive - * characters of the name to be translated. The flag - * argument is (0, 1, 2) depending on whether the name is to be - * (looked up, created, deleted). The follow argument is 1 when - * symbolic links are to be followed when they occur at the end of - * the name translation process. + * characters of the name to be translated. + * + * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether + * the name is to be (looked up, created, deleted). If flag has + * LOCKPARENT or'ed into it and the target of the pathname exists, + * namei returns both the target and its parent directory locked. + * If the file system is not maintained in a strict tree hierarchy, + * this can result in a deadlock situation. When creating and + * LOCKPARENT is specified, the target may not be ".". When deleting + * and LOCKPARENT is specified, the target may be ".", but the caller + * must check to insure it does an irele and iput instead of two iputs. + * + * The follow argument is 1 when symbolic links are to be followed + * when they occur at the end of the name translation process. * * Overall outline: * @@ -36,14 +46,19 @@ int dirchk = 1; * handle degenerate case where name is null string * search for name in directory, to found or notfound * notfound: - * if creating, return locked inode, leaving information on avail. slots + * if creating, return locked directory, leaving info on avail. slots * else return error * found: * if at end of path and deleting, return information to allow delete + * if at end of path and rewriting (create and LOCKPARENT), lock targe + * inode and return info to allow rewrite * if .. and on mounted filesys, look in mount table for parent * if symbolic link, massage name in buffer and continue at dirloop * if more components of name, do next level at dirloop * return the answer as locked inode + * + * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, + * but unlocked. */ struct inode * namei(func, flag, follow) @@ -69,7 +84,10 @@ namei(func, flag, follow) int nlink = 0; /* number of symbolic links taken */ struct inode *pdp; /* saved dp during symlink work */ int i; + int lockparent; + lockparent = flag & LOCKPARENT; + flag &= ~LOCKPARENT; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. @@ -156,7 +174,7 @@ dirloop2: * case it doesn't already exist. */ slotstatus = FOUND; - if (flag == 1 && *cp == 0) { + if (flag == CREATE && *cp == 0) { slotstatus = NONE; slotfreespace = 0; slotneeded = DIRSIZ(&u.u_dent); @@ -258,10 +276,10 @@ dirloop2: /* notfound: */ /* * If creating, and at end of pathname and current - * directory has not been removed, then can consider allowing - * file to be created. + * directory has not been removed, then can consider + * allowing file to be created. */ - if (flag == 1 && *cp == 0 && dp->i_nlink != 0) { + if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. @@ -321,15 +339,17 @@ found: /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. - * Note that in this case we return the directory - * inode, not the inode of the file being deleted. + * If the lockparent flag isn't set, we return only + * the directory (in u.u_pdir), otherwise we go + * on and lock the inode, being careful with ".". */ - if (flag == 2 && *cp == 0) { + if (flag == DELETE && *cp == 0) { /* * Write access to directory required to delete files. */ if (access(dp, IWRITE)) goto bad; + u.u_pdir = dp; /* for dirremove() */ /* * Return pointer to current entry in u.u_offset, * and distance past previous entry (if there @@ -340,8 +360,18 @@ found: u.u_count = 0; else u.u_count = u.u_offset - prevoff; + if (lockparent) { + if (dp->i_number == u.u_dent.d_ino) + dp->i_count++; + else { + dp = iget(dp->i_dev, fs, u.u_dent.d_ino); + if (dp == NULL) { + iput(u.u_pdir); + goto bad; + } + } + } brelse(nbp); - u.u_pdir = dp; /* for dirremove() */ return (dp); } @@ -370,6 +400,33 @@ found: } } + /* + * If rewriting (rename), return the inode and the + * information required to rewrite the present directory + * Must get inode of directory entry to verify it's a + * regular file, or empty directory. + */ + if ((flag == CREATE && lockparent) && *cp == 0) { + if (access(dp, IWRITE)) + goto bad; + u.u_pdir = dp; /* for dirrewrite() */ + /* + * Careful about locking second inode. + * This can only occur if the target is ".". + */ + if (dp->i_number == u.u_dent.d_ino) { + u.u_error = EISDIR; /* XXX */ + goto bad; + } + dp = iget(dp->i_dev, fs, u.u_dent.d_ino); + if (dp == NULL) { + iput(u.u_pdir); + goto bad; + } + brelse(nbp); + return (dp); + } + /* * Check for symbolic link, which may require us * to massage the name before we continue translation. @@ -398,7 +455,7 @@ found: } ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen); u.u_error = - rdwri(UIO_READ, dp, nbp->b_un.b_addr, dp->i_size, + rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size, 0, 1, (int *)0); if (u.u_error) goto bad2; @@ -419,7 +476,6 @@ found: fs = dp->i_fs; goto dirloop; } - irele(pdp); /* * Not a symbolic link. If more pathname, @@ -428,9 +484,14 @@ found: if (*cp == '/') { while (*cp == '/') cp++; + irele(pdp); goto dirloop; } brelse(nbp); + if (lockparent) + u.u_pdir = pdp; + else + irele(pdp); return (dp); bad2: irele(pdp); @@ -514,7 +575,7 @@ direnter(ip) * This should never push the size past a new multiple of * DIRBLKSIZE. */ - if (u.u_offset+u.u_count > u.u_pdir->i_size) + if (u.u_offset + u.u_count > u.u_pdir->i_size) u.u_pdir->i_size = u.u_offset + u.u_count; /* @@ -522,8 +583,10 @@ direnter(ip) * entry. */ bp = blkatoff(u.u_pdir, u.u_offset, (char **)&dirbuf); - if (bp == 0) + if (bp == 0) { + iput(u.u_pdir); return; + } /* * Find space for the new entry. In the simple case, the @@ -570,20 +633,31 @@ direnter(ip) iput(u.u_pdir); } +/* + * Remove a directory entry after a call to namei, using the + * parameters which it left in the u. area. The u. entry + * u_offset contains the offset into the directory of the + * entry to be eliminated. The u_count field contains the + * size of the previous record in the directory. If this + * is 0, the first entry is being deleted, so we need only + * zero the inode number to mark the entry as free. If the + * entry isn't the first in the directory, we must reclaim + * the space of the now empty record by adding the record size + * to the size of the previous entry. + */ dirremove() { register struct inode *dp = u.u_pdir; register struct buf *bp; struct direct *ep; - if (u.u_count == 0) { + if (u.u_count == 0) /* * First entry in block: set d_ino to zero. */ - u.u_dent.d_ino = 0; (void) rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent, (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0); - } else { + else { /* * Collapse new free space into previous entry. */ @@ -597,6 +671,21 @@ dirremove() return (1); } +/* + * Rewrite an existing directory entry to point at the inode + * supplied. The parameters describing the directory entry are + * set up by a call to namei. + */ +dirrewrite(dp, ip) + struct inode *dp, *ip; +{ + + u.u_dent.d_ino = ip->i_number; + u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent, + (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0); + iput(dp); +} + /* * Return buffer with contents of block "offset" * from the beginning of directory "ip". If "res" @@ -627,3 +716,32 @@ blkatoff(ip, offset, res) *res = bp->b_un.b_addr + base; return (bp); } + +/* + * Check if a directory is empty or not. + * Inode supplied must be locked. + */ +dirempty(ip) + struct inode *ip; +{ + register off_t off; + struct direct dbuf; + register struct direct *dp = &dbuf; + int error; + + for (off = 0; off < ip->i_size; off += dp->d_reclen) { + error = rdwri(UIO_READ, ip, (caddr_t)dp, + sizeof (struct direct), off, 1, (int *)0); + if (error) + return (0); + if (dp->d_ino == 0) + continue; + if (dp->d_name[0] != '.') + return (0); + if (dp->d_namlen == 1 || + (dp->d_namlen == 2 && dp->d_name[1] == '.')) + continue; + return (0); + } + return (1); +} diff --git a/usr/src/sys/ufs/ufs/ufs_vnops.c b/usr/src/sys/ufs/ufs/ufs_vnops.c index bfce70338a..a81a284f0d 100644 --- a/usr/src/sys/ufs/ufs/ufs_vnops.c +++ b/usr/src/sys/ufs/ufs/ufs_vnops.c @@ -1,4 +1,4 @@ -/* ufs_vnops.c 4.41 82/10/19 */ +/* ufs_vnops.c 4.42 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -16,13 +16,20 @@ #include "../h/uio.h" #include "../h/socket.h" #include "../h/socketvar.h" +#include "../h/nami.h" +/* + * Change current working directory (``.''). + */ chdir() { chdirec(&u.u_cdir); } +/* + * Change notion of root (``/'') directory. + */ chroot() { @@ -30,6 +37,9 @@ chroot() chdirec(&u.u_rdir); } +/* + * Common routine for chroot and chdir. + */ chdirec(ipp) register struct inode **ipp; { @@ -38,14 +48,14 @@ chdirec(ipp) char *fname; }; - ip = namei(uchar, 0, 1); - if(ip == NULL) + ip = namei(uchar, LOOKUP, 1); + if (ip == NULL) return; - if((ip->i_mode&IFMT) != IFDIR) { + if ((ip->i_mode&IFMT) != IFDIR) { u.u_error = ENOTDIR; goto bad; } - if(access(ip, IEXEC)) + if (access(ip, IEXEC)) goto bad; iunlock(ip); if (*ipp) @@ -68,23 +78,28 @@ open() int flags; int mode; } *uap; - int checkpermissions = 1; + int checkpermissions = 1, flags; uap = (struct a *)u.u_ap; - if (uap->flags&FCREATE) { - ip = namei(uchar, 1, 1); + flags = uap->flags + 1; + if ((flags&FTRUNCATE) && (flags&FWRITE) == 0) { + u.u_error = EINVAL; + return; + } + if (flags&FCREATE) { + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; ip = maknode(uap->mode&07777&(~ISVTX)); checkpermissions = 0; - uap->flags &= ~FTRUNCATE; + flags &= ~FTRUNCATE; } } else - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; - open1(ip, ++uap->flags, checkpermissions); + open1(ip, flags, checkpermissions); } #ifndef NOCOMPAT @@ -100,7 +115,7 @@ ocreat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 1, 1); + ip = namei(uchar, CREATE, 1); if (ip == NULL) { if (u.u_error) return; @@ -109,7 +124,7 @@ ocreat() return; open1(ip, FWRITE, 0); } else - open1(ip, FWRITE|FTRUNCATE, 0); + open1(ip, FWRITE|FTRUNCATE, 1); } #endif @@ -145,7 +160,7 @@ open1(ip, mode, checkpermissions) * while doing so in case we block inside flocki. */ flags = 0; - if (mode&(FRDLOCK|FWRLOCK)) { + if (mode&(FSHLOCK|FEXLOCK)) { iunlock(ip); flags = flocki(ip, 0, mode); ilock(ip); @@ -153,7 +168,7 @@ open1(ip, mode, checkpermissions) goto bad; } if (mode&FTRUNCATE) - itrunc(ip, 0); + itrunc(ip, (u_long)0); iunlock(ip); if ((fp = falloc()) == NULL) goto out; @@ -189,7 +204,7 @@ mknod() uap = (struct a *)u.u_ap; if (suser()) { - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip != NULL) { u.u_error = EEXIST; goto out; @@ -225,10 +240,10 @@ link() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); /* well, this routine is doomed anyhow */ + ip = namei(uchar, LOOKUP, 1); /* well, this routine is doomed anyhow */ if (ip == NULL) return; - if ((ip->i_mode&IFMT)==IFDIR && !suser()) { + if ((ip->i_mode&IFMT) == IFDIR && !suser()) { iput(ip); return; } @@ -237,7 +252,7 @@ link() iupdat(ip, &time, &time, 1); iunlock(ip); u.u_dirp = (caddr_t)uap->linkname; - xp = namei(uchar, 1, 0); + xp = namei(uchar, CREATE, 0); if (xp != NULL) { u.u_error = EEXIST; iput(xp); @@ -284,7 +299,7 @@ symlink() nc++; } u.u_dirp = uap->linkname; - ip = namei(uchar, 1, 0); + ip = namei(uchar, CREATE, 0); if (ip) { iput(ip); u.u_error = EEXIST; @@ -296,6 +311,7 @@ symlink() if (ip == NULL) return; u.u_error = rdwri(UIO_WRITE, ip, uap->target, nc, 0, 0, (int *)0); + /* handle u.u_error != 0 */ iput(ip); } @@ -306,34 +322,21 @@ symlink() */ unlink() { - register struct inode *ip, *pp; struct a { char *fname; }; - int unlinkingdot = 0; + register struct inode *ip, *dp; - pp = namei(uchar, 2, 0); - if (pp == NULL) + ip = namei(uchar, DELETE | LOCKPARENT, 0); + if (ip == NULL) return; - - /* - * Check for unlink(".") - * to avoid hanging on the iget - */ - if (pp->i_number == u.u_dent.d_ino) { - ip = pp; - ip->i_count++; - unlinkingdot++; - } else - ip = iget(pp->i_dev, pp->i_fs, u.u_dent.d_ino); - if(ip == NULL) - goto out1; - if((ip->i_mode&IFMT)==IFDIR && !suser()) + dp = u.u_pdir; + if ((ip->i_mode&IFMT) == IFDIR && !suser()) goto out; /* * Don't unlink a mounted file. */ - if (ip->i_dev != pp->i_dev) { + if (ip->i_dev != dp->i_dev) { u.u_error = EBUSY; goto out; } @@ -344,12 +347,11 @@ unlink() ip->i_flag |= ICHG; } out: - if (unlinkingdot) + if (dp == ip) irele(ip); else iput(ip); -out1: - iput(pp); + iput(dp); } /* @@ -397,13 +399,13 @@ saccess() svgid = u.u_gid; u.u_uid = u.u_ruid; u.u_gid = u.u_rgid; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip != NULL) { - if (uap->fmode&FACCESS_READ && access(ip, IREAD)) + if ((uap->fmode&FACCESS_READ) && access(ip, IREAD)) goto done; - if (uap->fmode&FACCESS_WRITE && access(ip, IWRITE)) + if ((uap->fmode&FACCESS_WRITE) && access(ip, IWRITE)) goto done; - if (uap->fmode&FACCESS_EXECUTE && access(ip, IEXEC)) + if ((uap->fmode&FACCESS_EXECUTE) && access(ip, IEXEC)) goto done; done: iput(ip); @@ -445,7 +447,7 @@ stat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; stat1(ip, uap->sb); @@ -464,7 +466,7 @@ lstat() } *uap; uap = (struct a *)u.u_ap; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; stat1(ip, uap->sb); @@ -520,7 +522,7 @@ readlink() } *uap = (struct a *)u.u_ap; int resid; - ip = namei(uchar, 0, 0); + ip = namei(uchar, LOOKUP, 0); if (ip == NULL) return; if ((ip->i_mode&IFMT) != IFLNK) { @@ -533,6 +535,9 @@ out: u.u_r.r_val1 = uap->count - resid; } +/* + * Change mode of a file given path name. + */ chmod() { struct inode *ip; @@ -545,8 +550,12 @@ chmod() if ((ip = owner(1)) == NULL) return; chmod1(ip, uap->fmode); + iput(ip); } +/* + * Change mode of a file given a file descriptor. + */ fchmod() { struct a { @@ -565,14 +574,17 @@ fchmod() return; } ip = fp->f_inode; - ilock(ip); - if (u.u_uid != ip->i_uid && !suser()) { - iunlock(ip); + if (u.u_uid != ip->i_uid && !suser()) return; - } + ilock(ip); chmod1(ip, uap->fmode); + iunlock(ip); } +/* + * Change the mode on a file. + * Inode must be locked before calling. + */ chmod1(ip, mode) register struct inode *ip; register int mode; @@ -598,9 +610,11 @@ ok: ip->i_flag |= ICHG; if (ip->i_flag&ITEXT && (ip->i_mode&ISVTX)==0) xrele(ip); - iput(ip); } +/* + * Set ownership given a path name. + */ chown() { struct inode *ip; @@ -614,8 +628,12 @@ chown() if (!suser() || (ip = owner(0)) == NULL) return; chown1(ip, uap->uid, uap->gid); + iput(ip); } +/* + * Set ownership given a file descriptor. + */ fchown() { struct a { @@ -635,12 +653,11 @@ fchown() return; } ip = fp->f_inode; - ilock(ip); - if (!suser()) { - iunlock(ip); + if (!suser()) return; - } + ilock(ip); chown1(ip, uap->uid, uap->gid); + iunlock(ip); } /* @@ -678,8 +695,8 @@ chown1(ip, uid, gid) change = fragroundup(fs, ip->i_size); change /= DEV_BSIZE; } - chkdq(ip, -change, 1); - chkiq(ip->i_dev, ip, ip->i_uid, 1); + (void)chkdq(ip, -change, 1); + (void)chkiq(ip->i_dev, ip, ip->i_uid, 1); dqrele(ip->i_dquot); #endif /* @@ -695,10 +712,9 @@ chown1(ip, uid, gid) ip->i_mode &= ~(ISUID|ISGID); #ifdef QUOTA ip->i_dquot = inoquota(ip); - chkdq(ip, change, 1); - chkiq(ip->i_dev, NULL, uid, 1); + (void)chkdq(ip, change, 1); + (void)chkiq(ip->i_dev, (struct inode *)NULL, uid, 1); #endif - iput(ip); } /* @@ -729,12 +745,18 @@ outime() iput(ip); } +/* + * Flush any pending I/O. + */ sync() { update(); } +/* + * Apply an advisory lock on a file descriptor. + */ flock() { struct a { @@ -753,35 +775,38 @@ flock() return; } cmd = uap->how; - flags = u.u_pofile[uap->fd] & (RDLOCK|WRLOCK); + flags = u.u_pofile[uap->fd] & (SHLOCK|EXLOCK); if (cmd&FUNLOCK) { if (flags == 0) { u.u_error = EINVAL; return; } funlocki(fp->f_inode, flags); - u.u_pofile[uap->fd] &= ~(RDLOCK|WRLOCK); + u.u_pofile[uap->fd] &= ~(SHLOCK|EXLOCK); return; } /* * No reason to write lock a file we've already * write locked, similarly with a read lock. */ - if ((flags&WRLOCK) && (cmd&FWRLOCK) || - (flags&RDLOCK) && (cmd&FRDLOCK)) + if ((flags&EXLOCK) && (cmd&FEXLOCK) || + (flags&SHLOCK) && (cmd&FSHLOCK)) return; u.u_pofile[uap->fd] = flocki(fp->f_inode, u.u_pofile[uap->fd], cmd); } +/* + * Truncate a file given its path name. + */ truncate() { struct a { char *fname; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; if (access(ip, IWRITE)) @@ -791,16 +816,18 @@ truncate() goto bad; } itrunc(ip, uap->length); - return; bad: iput(ip); } +/* + * Truncate a file given a file descriptor. + */ ftruncate() { struct a { int fd; - int length; + u_long length; } *uap = (struct a *)u.u_ap; struct inode *ip; struct file *fp; @@ -819,17 +846,267 @@ ftruncate() ip = fp->f_inode; ilock(ip); itrunc(ip, uap->length); + iunlock(ip); +} + +/* + * Synch an open file. + */ +fsync() +{ + struct a { + int fd; + } *uap = (struct a *)u.u_ap; + struct inode *ip; + struct file *fp; + + fp = getf(uap->fd); + if (fp == NULL) + return; + if (fp->f_type == DTYPE_SOCKET) { + u.u_error = EINVAL; + return; + } + ip = fp->f_inode; + ilock(ip); + syncip(ip); + iunlock(ip); } +/* + * Rename system call. + * rename("foo", "bar"); + * is essentially + * unlink("bar"); + * link("foo", "bar"); + * unlink("foo"); + * but ``atomically''. Can't do full commit without saving state in the + * inode on disk which isn't feasible at this time. Best we can do is + * always guarantee the target exists. + * + * Basic algorithm is: + * + * 1) Bump link count on source while we're linking it to the + * target. This also insure the inode won't be deleted out + * from underneath us while we work. + * 2) Link source to destination. If destination already exists, + * delete it first. + * 3) Unlink source reference to inode if still around. + * 4) If a directory was moved and the parent of the destination + * is different from the source, patch the ".." entry in the + * directory. + * + * Source and destination must either both be directories, or both + * not be directories. If target is a directory, it must be empty. + */ rename() { -#ifdef notdef struct a { char *from; char *to; } *uap; -#endif + register struct inode *ip, *xp, *dp; + int oldparent, parentdifferent, doingdirectory; + + uap = (struct a *)u.u_ap; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) + return; + dp = u.u_pdir; + oldparent = 0, doingdirectory = 0; + if ((ip->i_mode&IFMT) == IFDIR) { + register struct direct *d; + + d = &u.u_dent; + /* + * Avoid "." and ".." for obvious reasons. + */ + if (d->d_name[0] == '.') { + if (d->d_namlen == 1 || + (d->d_namlen == 2 && d->d_name[1] == '.')) { + u.u_error = EINVAL; + iput(ip); + return; + } + } + oldparent = dp->i_number; + doingdirectory++; + } + irele(dp); + + /* + * 1) Bump link count while we're moving stuff + * around. If we crash somewhere before + * completing our work, the link count + * may be wrong, but correctable. + */ + ip->i_nlink++; + ip->i_flag |= ICHG; + iupdat(ip, &time, &time, 1); + iunlock(ip); + + /* + * When the target exists, both the directory + * and target inodes are returned locked. + */ + u.u_dirp = (caddr_t)uap->to; + xp = namei(uchar, CREATE | LOCKPARENT, 0); + if (u.u_error) + goto out; + dp = u.u_pdir; + /* + * 2) If target doesn't exist, link the target + * to the source and unlink the source. + * Otherwise, rewrite the target directory + * entry to reference the source inode and + * expunge the original entry's existence. + */ + parentdifferent = oldparent != dp->i_number; + if (xp == NULL) { + if (dp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Account for ".." in directory. + * When source and destination have the + * same parent we don't fool with the + * link count -- this isn't required + * because we do a similar check below. + */ + if (doingdirectory && parentdifferent) { + dp->i_nlink++; + dp->i_flag |= ICHG; + iupdat(dp, &time, &time, 1); + } + direnter(ip); + if (u.u_error) + goto out; + } else { + if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) { + u.u_error = EXDEV; + goto bad; + } + /* + * Target must be empty if a directory. + * Also, insure source and target are + * compatible (both directories, or both + * not directories). + */ + if ((xp->i_mode&IFMT) == IFDIR) { + if (!dirempty(xp)) { + u.u_error = EEXIST; /* XXX */ + goto bad; + } + if (!doingdirectory) { + u.u_error = ENOTDIR; + goto bad; + } + } else if (doingdirectory) { + u.u_error = EISDIR; + goto bad; + } + dirrewrite(dp, ip); + if (u.u_error) + goto bad1; + /* + * If this is a directory we know it is + * empty and we can squash the inode and + * any space associated with it. Otherwise, + * we've got a plain file and the link count + * simply needs to be adjusted. + */ + if (doingdirectory) { + xp->i_nlink = 0; + itrunc(xp, (u_long)0); + } else + xp->i_nlink--; + xp->i_flag |= ICHG; + iput(xp); + } + + /* + * 3) Unlink the source. + */ + u.u_dirp = uap->from; + dp = namei(uchar, DELETE, 0); + /* + * Insure directory entry still exists and + * has not changed since the start of all + * this. If either has occured, forget about + * about deleting the original entry and just + * adjust the link count in the inode. + */ + if (dp == NULL || u.u_dent.d_ino != ip->i_number) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } else { + /* + * If source is a directory, must adjust + * link count of parent directory also. + * If target didn't exist and source and + * target have the same parent, then we + * needn't touch the link count, it all + * balances out in the end. Otherwise, we + * must do so to reflect deletion of ".." + * done above. + */ + if (doingdirectory && (xp != NULL || parentdifferent)) { + dp->i_nlink--; + dp->i_flag |= ICHG; + } + if (dirremove()) { + ip->i_nlink--; + ip->i_flag |= ICHG; + } + } + irele(ip); + if (dp) + iput(dp); + + /* + * 4) Renaming a directory with the parent + * different requires ".." to be rewritten. + * The window is still there for ".." to + * be inconsistent, but this is unavoidable, + * and a lot shorter than when it was done + * in a user process. + */ + if (doingdirectory && parentdifferent && u.u_error == 0) { + struct dirtemplate dirbuf; + u.u_dirp = uap->to; + ip = namei(uchar, LOOKUP | LOCKPARENT, 0); + if (ip == NULL) { + printf("rename: .. went away\n"); + return; + } + dp = u.u_pdir; + if ((ip->i_mode&IFMT) != IFDIR) { + printf("rename: .. not a directory\n"); + goto stuck; + } + u.u_error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + if (u.u_error == 0) { + dirbuf.dotdot_ino = dp->i_number; + (void) rdwri(UIO_WRITE, ip, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); + } +stuck: + irele(dp); + iput(ip); + } + return; +bad: + iput(u.u_pdir); +bad1: + if (xp) + irele(xp); +out: + ip->i_nlink--; + ip->i_flag |= ICHG; + irele(ip); } /* diff --git a/usr/src/sys/vm/vm_swap.c b/usr/src/sys/vm/vm_swap.c index c264cd8d38..51a4997852 100644 --- a/usr/src/sys/vm/vm_swap.c +++ b/usr/src/sys/vm/vm_swap.c @@ -1,4 +1,4 @@ -/* vm_swap.c 4.14 82/11/02 */ +/* vm_swap.c 4.15 82/11/13 */ #include "../h/param.h" #include "../h/systm.h" @@ -10,6 +10,7 @@ #include "../h/map.h" #include "../h/uio.h" #include "../h/file.h" +#include "../h/nami.h" struct buf rswbuf; /* @@ -76,7 +77,7 @@ oswapon() dev_t dev; register struct swdevt *sp; - ip = namei(uchar, 0, 1); + ip = namei(uchar, LOOKUP, 1); if (ip == NULL) return; if ((ip->i_mode&IFMT) != IFBLK) { -- 2.20.1