* Copyright (c) 1989 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
* %sccs.include.redist.c%
* @(#)nfs_serv.c 7.31 (Berkeley) %G%
* nfs version 2 server calls to vnode ops
* - these routines generally have 3 phases
* 1 - break down and validate rpc request in mbuf list
* 2 - do the vnode ops for the request
* (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
* 3 - build the rpc reply in an mbuf list
* - do not mix the phases, since the nfsm_?? macros can return failures
* on a bad rpc or similar and do not do any vrele() or vput()'s
* - the nfsm_reply() macro generates an nfs rpc reply with the nfs
* error number iff error != 0 whereas
* returning an error from the server function implies a fatal error
* such as a badly constructed rpc request that should be dropped without
#include "../ufs/quota.h"
#include "../ufs/inode.h"
extern u_long nfs_procids
[NFS_NPROCS
];
extern u_long nfs_xdrneg1
;
extern u_long nfs_false
, nfs_true
;
nfstype nfs_type
[9]={ NFNON
, NFREG
, NFDIR
, NFBLK
, NFCHR
, NFLNK
, NFNON
,
nfsrv_getattr(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
register struct nfsv2_fattr
*fp
;
register struct vattr
*vap
= &va
;
struct mbuf
*mb
, *mb2
, *mreq
;
if (error
= nfsrv_fhtovp(fhp
, TRUE
, &vp
, cred
))
error
= VOP_GETATTR(vp
, vap
, cred
);
nfsm_build(fp
, struct nfsv2_fattr
*, NFSX_FATTR
);
nfsrv_setattr(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
register struct vattr
*vap
= &va
;
register struct nfsv2_sattr
*sp
;
register struct nfsv2_fattr
*fp
;
struct mbuf
*mb
, *mb2
, *mreq
;
nfsm_disect(sp
, struct nfsv2_sattr
*, NFSX_SATTR
);
if (error
= nfsrv_fhtovp(fhp
, TRUE
, &vp
, cred
))
if (error
= nfsrv_access(vp
, VWRITE
, cred
))
* There is a bug in the Sun client that puts 0xffff in the mode
* field of sattr when it should put in 0xffffffff. The u_short
* --> check the low order 2 bytes for 0xffff
if ((fxdr_unsigned(int, sp
->sa_mode
) & 0xffff) != 0xffff)
vap
->va_mode
= nfstov_mode(sp
->sa_mode
);
if (sp
->sa_uid
!= nfs_xdrneg1
)
vap
->va_uid
= fxdr_unsigned(uid_t
, sp
->sa_uid
);
if (sp
->sa_gid
!= nfs_xdrneg1
)
vap
->va_gid
= fxdr_unsigned(gid_t
, sp
->sa_gid
);
if (sp
->sa_size
!= nfs_xdrneg1
)
vap
->va_size
= fxdr_unsigned(u_long
, sp
->sa_size
);
* The usec field of sa_atime is overloaded with the va_flags field
* for 4.4BSD clients. Hopefully other clients always set both the
* sec and usec fields to -1 when not setting the atime.
if (sp
->sa_atime
.tv_sec
!= nfs_xdrneg1
) {
vap
->va_atime
.tv_sec
= fxdr_unsigned(long, sp
->sa_atime
.tv_sec
);
vap
->va_atime
.tv_usec
= 0;
if (sp
->sa_atime
.tv_usec
!= nfs_xdrneg1
)
vap
->va_flags
= fxdr_unsigned(u_long
, sp
->sa_atime
.tv_usec
);
if (sp
->sa_mtime
.tv_sec
!= nfs_xdrneg1
)
fxdr_time(&sp
->sa_mtime
, &vap
->va_mtime
);
if (error
= VOP_SETATTR(vp
, vap
, cred
)) {
error
= VOP_GETATTR(vp
, vap
, cred
);
nfsm_build(fp
, struct nfsv2_fattr
*, NFSX_FATTR
);
nfsrv_lookup(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
register struct nfsv2_fattr
*fp
;
register struct nameidata
*ndp
= &nami
;
struct mbuf
*mb
, *mb2
, *mreq
;
struct vattr va
, *vap
= &va
;
nfsm_srvstrsiz(len
, NFS_MAXNAMLEN
);
ndp
->ni_nameiop
= LOOKUP
| LOCKLEAF
;
if (error
= nfs_namei(ndp
, fhp
, len
, &md
, &dpos
))
bzero((caddr_t
)fhp
, sizeof(nfh
));
fhp
->fh_fsid
= vp
->v_mount
->mnt_stat
.f_fsid
;
if (error
= VFS_VPTOFH(vp
, &fhp
->fh_fid
)) {
error
= VOP_GETATTR(vp
, vap
, cred
);
nfsm_reply(NFSX_FH
+NFSX_FATTR
);
nfsm_build(fp
, struct nfsv2_fattr
*, NFSX_FATTR
);
nfsrv_readlink(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct iovec iv
[(NFS_MAXPATHLEN
+MLEN
-1)/MLEN
];
register struct iovec
*ivp
= iv
;
register struct mbuf
*mp
;
struct mbuf
*mb
, *mb2
, *mp2
, *mp3
, *mreq
;
struct uio io
, *uiop
= &io
;
while (len
< NFS_MAXPATHLEN
) {
MGET(mp
, M_WAIT
, MT_DATA
);
if ((len
+mp
->m_len
) > NFS_MAXPATHLEN
) {
mp
->m_len
= NFS_MAXPATHLEN
-len
;
ivp
->iov_base
= mtod(mp
, caddr_t
);
ivp
->iov_len
= mp
->m_len
;
uiop
->uio_segflg
= UIO_SYSSPACE
;
if (error
= nfsrv_fhtovp(fhp
, TRUE
, &vp
, cred
)) {
if (vp
->v_type
!= VLNK
) {
error
= VOP_READLINK(vp
, uiop
, cred
);
nfsm_reply(NFSX_UNSIGNED
);
if (uiop
->uio_resid
> 0) {
nfsm_adj(mp3
, NFS_MAXPATHLEN
-tlen
, tlen
-len
);
nfsm_build(p
, u_long
*, NFSX_UNSIGNED
);
nfsrv_read(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
register struct iovec
*iv
;
register struct nfsv2_fattr
*fp
;
struct mbuf
*mb
, *mb2
, *mreq
;
struct uio io
, *uiop
= &io
;
struct vattr va
, *vap
= &va
;
int i
, cnt
, len
, left
, siz
, tlen
;
nfsm_disect(p
, u_long
*, NFSX_UNSIGNED
);
off
= fxdr_unsigned(off_t
, *p
);
nfsm_srvstrsiz(cnt
, NFS_MAXDATA
);
if (error
= nfsrv_fhtovp(fhp
, TRUE
, &vp
, cred
))
if (error
= nfsrv_access(vp
, VREAD
| VEXEC
, cred
)) {
* Generate the mbuf list with the uio_iov ref. to it.
MALLOC(iv
, struct iovec
*,
((NFS_MAXDATA
+MLEN
-1)/MLEN
) * sizeof (struct iovec
), M_TEMP
,
MGET(m
, M_WAIT
, MT_DATA
);
siz
= min(M_TRAILINGSPACE(m
), left
);
iv
->iov_base
= mtod(m
, caddr_t
);
uiop
->uio_segflg
= UIO_SYSSPACE
;
error
= VOP_READ(vp
, uiop
, IO_NODELOCKED
, cred
);
FREE((caddr_t
)iv2
, M_TEMP
);
if (error
= VOP_GETATTR(vp
, vap
, cred
))
nfsm_reply(NFSX_FATTR
+NFSX_UNSIGNED
);
nfsm_build(fp
, struct nfsv2_fattr
*, NFSX_FATTR
);
if (cnt
!= tlen
|| tlen
!= len
)
nfsm_adj(m3
, cnt
-tlen
, tlen
-len
);
nfsm_build(p
, u_long
*, NFSX_UNSIGNED
);
nfsrv_write(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct mbuf
*mrep
, *md
, **mrq
;
register struct iovec
*ivp
;
register struct mbuf
*mp
;
register struct nfsv2_fattr
*fp
;
struct iovec iv
[NFS_MAXIOVEC
];
register struct vattr
*vap
= &va
;
struct mbuf
*mb
, *mb2
, *mreq
;
struct uio io
, *uiop
= &io
;
nfsm_disect(p
, u_long
*, 4*NFSX_UNSIGNED
);
off
= fxdr_unsigned(off_t
, *++p
);
len
= fxdr_unsigned(long, *p
);
if (len
> NFS_MAXDATA
|| len
<= 0) {
if (dpos
== (mtod(md
, caddr_t
)+md
->m_len
)) {
siz
= dpos
-mtod(mp
, caddr_t
);
if (error
= nfsrv_fhtovp(fhp
, TRUE
, &vp
, cred
))
if (error
= nfsrv_access(vp
, VWRITE
, cred
)) {
uiop
->uio_rw
= UIO_WRITE
;
uiop
->uio_segflg
= UIO_SYSSPACE
;
* Do up to NFS_MAXIOVEC mbufs of write each iteration of the
while (len
> 0 && uiop
->uio_resid
== 0) {
while (len
> 0 && uiop
->uio_iovcnt
< NFS_MAXIOVEC
&& mp
!= NULL
) {
ivp
->iov_base
= mtod(mp
, caddr_t
);
ivp
->iov_len
= xfer
= len
;
ivp
->iov_len
= xfer
= mp
->m_len
;
if (M_HASCL(mp
) && (((u_long
)ivp
->iov_base
) & CLOFSET
) == 0)
ivp
->iov_op
= NULL
; /* what should it be ?? */
if (len
> 0 && mp
== NULL
) {
if (error
= VOP_WRITE(vp
, uiop
, IO_SYNC
| IO_NODELOCKED
,
error
= VOP_GETATTR(vp
, vap
, cred
);
nfsm_build(fp
, struct nfsv2_fattr
*, NFSX_FATTR
);
* now does a truncate to 0 length via. setattr if it already exists
nfsrv_create(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct mbuf
*mrep
, *md
, **mrq
;
register struct nfsv2_fattr
*fp
;
register struct vattr
*vap
= &va
;
register struct nameidata
*ndp
= &nami
;
struct mbuf
*mb
, *mb2
, *mreq
;
nfsm_srvstrsiz(len
, NFS_MAXNAMLEN
);
ndp
->ni_nameiop
= CREATE
| LOCKPARENT
| LOCKLEAF
;
if (error
= nfs_namei(ndp
, fhp
, len
, &md
, &dpos
))
nfsm_disect(p
, u_long
*, NFSX_SATTR
);
* Iff doesn't exist, create it
* otherwise just truncate to 0 length
* should I set the mode too ??
if (ndp
->ni_vp
== NULL
) {
vap
->va_type
= IFTOVT(fxdr_unsigned(u_long
, *p
));
if (vap
->va_type
== VNON
)
vap
->va_mode
= nfstov_mode(*p
);
rdev
= fxdr_unsigned(long, *(p
+3));
if (vap
->va_type
== VREG
) {
if (error
= VOP_CREATE(ndp
, vap
))
} else if (vap
->va_type
== VCHR
|| vap
->va_type
== VBLK
||
if (vap
->va_type
== VCHR
&& rdev
== 0xffffffff)
if (vap
->va_type
== VFIFO
) {
} else if (error
= suser(cred
, (short *)0)) {
vap
->va_rdev
= (dev_t
)rdev
;
if (error
= VOP_MKNOD(ndp
, vap
, cred
))
ndp
->ni_nameiop
= LOOKUP
| LOCKLEAF
| HASBUF
;
if (error
= nfs_namei(ndp
, fhp
, len
, &md
, &dpos
))
if (error
= VOP_SETATTR(vp
, vap
, cred
)) {
bzero((caddr_t
)fhp
, sizeof(nfh
));
fhp
->fh_fsid
= vp
->v_mount
->mnt_stat
.f_fsid
;
if (error
= VFS_VPTOFH(vp
, &fhp
->fh_fid
)) {
error
= VOP_GETATTR(vp
, vap
, cred
);
nfsm_reply(NFSX_FH
+NFSX_FATTR
);
nfsm_build(fp
, struct nfsv2_fattr
*, NFSX_FATTR
);
if (ndp
->ni_dvp
== ndp
->ni_vp
)
nfsrv_remove(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct mbuf
*mrep
, *md
, **mrq
;
register struct nameidata
*ndp
= &nami
;
nfsm_srvstrsiz(len
, NFS_MAXNAMLEN
);
ndp
->ni_nameiop
= DELETE
| LOCKPARENT
| LOCKLEAF
;
if (error
= nfs_namei(ndp
, fhp
, len
, &md
, &dpos
))
if (vp
->v_type
== VDIR
&&
(error
= suser(cred
, (short *)0)))
* Don't unlink a mounted file.
if (vp
->v_flag
& VROOT
) {
(void) vnode_pager_uncache(vp
);
nfsrv_rename(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct mbuf
*mrep
, *md
, **mrq
;
register struct nameidata
*ndp
;
struct nameidata nami
, tond
;
struct vnode
*fvp
, *tvp
, *tdvp
;
nfsm_srvstrsiz(len
, NFS_MAXNAMLEN
);
* Remember if we are root so that we can reset cr_uid before
* the second nfs_namei() call
ndp
->ni_nameiop
= DELETE
| WANTPARENT
;
if (error
= nfs_namei(ndp
, ffhp
, len
, &md
, &dpos
))
nfsm_strsiz(len2
, NFS_MAXNAMLEN
);
tond
.ni_nameiop
= RENAME
| LOCKPARENT
| LOCKLEAF
| NOCACHE
;
if (error
= nfs_namei(&tond
, tfhp
, len2
, &md
, &dpos
)) {
if (fvp
->v_type
== VDIR
&& tvp
->v_type
!= VDIR
) {
} else if (fvp
->v_type
!= VDIR
&& tvp
->v_type
== VDIR
) {
if (fvp
->v_mount
!= tdvp
->v_mount
) {
if (fvp
== tdvp
|| fvp
== tvp
)
error
= VOP_RENAME(ndp
, &tond
);
nfsrv_link(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct mbuf
*mrep
, *md
, **mrq
;
register struct nameidata
*ndp
= &nami
;
nfsm_srvstrsiz(len
, NFS_MAXNAMLEN
);
if (error
= nfsrv_fhtovp(fhp
, FALSE
, &vp
, cred
))
if (vp
->v_type
== VDIR
&& (error
= suser(cred
, NULL
)))
ndp
->ni_nameiop
= CREATE
| LOCKPARENT
;
if (error
= nfs_namei(ndp
, dfhp
, len
, &md
, &dpos
))
if (vp
->v_mount
!= xp
->v_mount
)
error
= VOP_LINK(vp
, ndp
);
if (ndp
->ni_dvp
== ndp
->ni_vp
)
* nfs symbolic link service
nfsrv_symlink(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct mbuf
*mrep
, *md
, **mrq
;
register struct nameidata
*ndp
= &nami
;
register struct vattr
*vap
= &va
;
nfsm_srvstrsiz(len
, NFS_MAXNAMLEN
);
ndp
->ni_nameiop
= CREATE
| LOCKPARENT
;
if (error
= nfs_namei(ndp
, fhp
, len
, &md
, &dpos
))
nfsm_strsiz(len2
, NFS_MAXPATHLEN
);
MALLOC(pathcp
, caddr_t
, len2
+ 1, M_TEMP
, M_WAITOK
);
io
.uio_segflg
= UIO_SYSSPACE
;
nfsm_disect(sp
, struct nfsv2_sattr
*, NFSX_SATTR
);
if (ndp
->ni_dvp
== ndp
->ni_vp
)
vap
->va_mode
= fxdr_unsigned(u_short
, sp
->sa_mode
);
error
= VOP_SYMLINK(ndp
, vap
, pathcp
);
if (ndp
->ni_dvp
== ndp
->ni_vp
)
nfsrv_mkdir(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct mbuf
*mrep
, *md
, **mrq
;
register struct vattr
*vap
= &va
;
register struct nfsv2_fattr
*fp
;
register struct nameidata
*ndp
= &nami
;
struct mbuf
*mb
, *mb2
, *mreq
;
nfsm_srvstrsiz(len
, NFS_MAXNAMLEN
);
ndp
->ni_nameiop
= CREATE
| LOCKPARENT
;
if (error
= nfs_namei(ndp
, fhp
, len
, &md
, &dpos
))
nfsm_disect(p
, u_long
*, NFSX_UNSIGNED
);
vap
->va_mode
= nfstov_mode(*p
++);
if (error
= VOP_MKDIR(ndp
, vap
))
bzero((caddr_t
)fhp
, sizeof(nfh
));
fhp
->fh_fsid
= vp
->v_mount
->mnt_stat
.f_fsid
;
if (error
= VFS_VPTOFH(vp
, &fhp
->fh_fid
)) {
error
= VOP_GETATTR(vp
, vap
, cred
);
nfsm_reply(NFSX_FH
+NFSX_FATTR
);
nfsm_build(fp
, struct nfsv2_fattr
*, NFSX_FATTR
);
if (ndp
->ni_dvp
== ndp
->ni_vp
)
nfsrv_rmdir(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
struct mbuf
*mrep
, *md
, **mrq
;
register struct nameidata
*ndp
= &nami
;
nfsm_srvstrsiz(len
, NFS_MAXNAMLEN
);
ndp
->ni_nameiop
= DELETE
| LOCKPARENT
| LOCKLEAF
;
if (error
= nfs_namei(ndp
, fhp
, len
, &md
, &dpos
))
if (vp
->v_type
!= VDIR
) {
* Don't unlink a mounted file.
if (ndp
->ni_dvp
== ndp
->ni_vp
)
* - mallocs what it thinks is enough to read
* count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
* - loops around building the reply
* if the output generated exceeds count break out of loop
* The nfsm_clget macro is used here so that the reply will be packed
* tightly in mbuf clusters.
* - it only knows that it has encountered eof when the VOP_READDIR()
* - as such one readdir rpc will return eof false although you are there
* and then the next will return eof
* - it trims out records with d_ino == 0
* this doesn't matter for Unix clients, but they might confuse clients
* NB: It is tempting to set eof to true if the VOP_READDIR() reads less
* than requested, but this may not apply to all filesystems. For
* example, client NFS does not { although it is never remote mounted
* PS: The NFS protocol spec. does not clarify what the "count" byte
* argument is a count of.. just name strings and file id's or the
* entire reply rpc or ...
* I tried just file name and id sizes and it confused the Sun client,
* so I am using the full rpc size now. The "paranoia.." comment refers
* to including the status longwords that are not a part of the dir.
* "entry" structures, but are in the rpc.
nfsrv_readdir(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
register struct mbuf
*mp
;
register struct direct
*dp
;
struct mbuf
*mb
, *mb2
, *mreq
;
int len
, nlen
, rem
, xfer
, tsiz
, i
;
int siz
, cnt
, fullsiz
, eofflag
;
nfsm_disect(p
, u_long
*, 2*NFSX_UNSIGNED
);
toff
= fxdr_unsigned(off_t
, *p
++);
off
= (toff
& ~(DIRBLKSIZ
-1));
on
= (toff
& (DIRBLKSIZ
-1));
cnt
= fxdr_unsigned(int, *p
);
siz
= ((cnt
+DIRBLKSIZ
-1) & ~(DIRBLKSIZ
-1));
if (cnt
> NFS_MAXREADDIR
)
if (error
= nfsrv_fhtovp(fhp
, TRUE
, &vp
, cred
))
if (error
= nfsrv_access(vp
, VEXEC
, cred
)) {
MALLOC(rbuf
, caddr_t
, siz
, M_TEMP
, M_WAITOK
);
io
.uio_segflg
= UIO_SYSSPACE
;
error
= VOP_READDIR(vp
, &io
, cred
, &eofflag
);
free((caddr_t
)rbuf
, M_TEMP
);
* If nothing read, return eof
nfsm_reply(2*NFSX_UNSIGNED
);
nfsm_build(p
, u_long
*, 2*NFSX_UNSIGNED
);
FREE((caddr_t
)rbuf
, M_TEMP
);
* Check for degenerate cases of nothing useful read.
dp
= (struct direct
*)cpos
;
while (cpos
< cend
&& dp
->d_ino
== 0) {
dp
= (struct direct
*)cpos
;
dp
= (struct direct
*)cpos
;
len
= 3*NFSX_UNSIGNED
; /* paranoia, probably can be 0 */
/* Loop through the records and build reply */
rem
= nfsm_rndup(nlen
)-nlen
;
* As noted above, the NFS spec. is not clear about what
* should be included in "count" as totalled up here in
len
+= (4*NFSX_UNSIGNED
+nlen
+rem
);
/* Build the directory record xdr from the direct entry */
*p
= txdr_unsigned(dp
->d_ino
);
*p
= txdr_unsigned(nlen
);
/* And loop arround copying the name */
/* And null pad to a long boundary */
for (i
= 0; i
< rem
; i
++)
/* Finish off the record */
*p
= txdr_unsigned(toff
);
dp
= (struct direct
*)cpos
;
mp
->m_len
= bp
-mtod(mp
, caddr_t
);
nfsrv_statfs(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
register struct statfs
*sf
;
register struct nfsv2_statfs
*sfp
;
struct mbuf
*mb
, *mb2
, *mreq
;
if (error
= nfsrv_fhtovp(fhp
, TRUE
, &vp
, cred
))
error
= VFS_STATFS(vp
->v_mount
, sf
);
nfsm_build(sfp
, struct nfsv2_statfs
*, NFSX_STATFS
);
sfp
->sf_tsize
= txdr_unsigned(NFS_MAXDGRAMDATA
);
sfp
->sf_bsize
= txdr_unsigned(sf
->f_fsize
);
sfp
->sf_blocks
= txdr_unsigned(sf
->f_blocks
);
sfp
->sf_bfree
= txdr_unsigned(sf
->f_bfree
);
sfp
->sf_bavail
= txdr_unsigned(sf
->f_bavail
);
* Null operation, used by clients to ping server
nfsrv_null(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
* No operation, used for obsolete procedures
nfsrv_noop(mrep
, md
, dpos
, cred
, xid
, mrq
, repstat
)
* Perform access checking for vnodes obtained from file handles that would
* refer to files already opened by a Unix client. You cannot just use
* vn_writechk() and VOP_ACCESS() for two reasons.
* 1 - You must check for MNT_EXRDONLY as well as MNT_RDONLY for the write case
* 2 - The owner is to be given access irrespective of mode bits so that
* processes that chmod after opening a file don't break. I don't like
* this because it opens a security hole, but since the nfs server opens
* a security hole the size of a barn door anyhow, what the heck.
nfsrv_access(vp
, flags
, cred
)
register struct vnode
*vp
;
register struct ucred
*cred
;
/* Just vn_writechk() changed to check MNT_EXRDONLY */
* Disallow write attempts on read-only file systems;
* unless the file is a socket or a block or character
* device resident on the file system.
if (vp
->v_mount
->mnt_flag
& (MNT_RDONLY
| MNT_EXRDONLY
)) {
case VREG
: case VDIR
: case VLNK
:
* If there's shared text associated with
* the inode, try to free it up once. If
* we fail, we can't allow writing.
if ((vp
->v_flag
& VTEXT
) && !vnode_pager_uncache(vp
))
if (error
= VOP_GETATTR(vp
, &vattr
, cred
))
if ((error
= VOP_ACCESS(vp
, flags
, cred
)) &&
cred
->cr_uid
!= vattr
.va_uid
)