fast track m_adj from tail didn't adjust pkthdr.len
[unix-history] / usr / src / sys / nfs / nfs_socket.c
/*
* Copyright (c) 1989 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
*
* Redistribution and use in source and binary forms are permitted
* provided that the above copyright notice and this paragraph are
* duplicated in all such forms and that any documentation,
* advertising materials, and other materials related to such
* distribution and use acknowledge that the software was developed
* by the University of California, Berkeley. The name of the
* University may not be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* @(#)nfs_socket.c 7.3 (Berkeley) %G%
*/
/*
* Socket operations for use by nfs (similar to uipc_socket.c, but never
* with copies to/from a uio vector)
* NB: For now, they only work for UDP datagram sockets.
* (Use on stream sockets would require some record boundary mark in the
* stream such as Sun's RM (Section 3.2 of the Sun RPC Message Protocol
* manual, in Networking on the Sun Workstation, Part #800-1324-03
* and different versions of send, receive and reply that do not assume
* an atomic protocol
*/
#include "types.h"
#include "param.h"
#include "uio.h"
#include "user.h"
#include "mount.h"
#include "kernel.h"
#include "malloc.h"
#include "mbuf.h"
#include "vnode.h"
#include "domain.h"
#include "protosw.h"
#include "socket.h"
#include "socketvar.h"
#include "netinet/in.h"
#include "rpcv2.h"
#include "nfsv2.h"
#include "nfs.h"
#include "xdr_subs.h"
#include "nfsm_subs.h"
#include "nfsmount.h"
#define TRUE 1
/* set lock on sockbuf sb, sleep at neg prio */
#define nfs_sblock(sb) { \
while ((sb)->sb_flags & SB_LOCK) { \
(sb)->sb_flags |= SB_WANT; \
sleep((caddr_t)&(sb)->sb_flags, PZERO-1); \
} \
(sb)->sb_flags |= SB_LOCK; \
}
/*
* External data, mostly RPC constants in XDR form
*/
extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
rpc_msgaccepted, rpc_call;
extern u_long nfs_prog, nfs_vers;
int nfsrv_null(),
nfsrv_getattr(),
nfsrv_setattr(),
nfsrv_lookup(),
nfsrv_readlink(),
nfsrv_read(),
nfsrv_write(),
nfsrv_create(),
nfsrv_remove(),
nfsrv_rename(),
nfsrv_link(),
nfsrv_symlink(),
nfsrv_mkdir(),
nfsrv_rmdir(),
nfsrv_readdir(),
nfsrv_statfs(),
nfsrv_noop();
int (*nfsrv_procs[NFS_NPROCS])() = {
nfsrv_null,
nfsrv_getattr,
nfsrv_setattr,
nfsrv_noop,
nfsrv_lookup,
nfsrv_readlink,
nfsrv_read,
nfsrv_noop,
nfsrv_write,
nfsrv_create,
nfsrv_remove,
nfsrv_rename,
nfsrv_link,
nfsrv_symlink,
nfsrv_mkdir,
nfsrv_rmdir,
nfsrv_readdir,
nfsrv_statfs,
};
/*
* This is a stripped down version of sosend() specific to
* udp/ip and uses the mbuf list provdied
*/
nfs_udpsend(so, nam, top, flags, siz)
register struct socket *so;
struct mbuf *nam;
struct mbuf *top;
int flags;
int siz;
{
register int space;
int error = 0, s, dontroute, first = 1;
dontroute =
(flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
(so->so_proto->pr_flags & PR_ATOMIC);
#define snderr(errno) { error = errno; splx(s); goto release; }
#ifdef MGETHDR
top->m_pkthdr.len = siz;
#endif
restart:
nfs_sblock(&so->so_snd);
s = splnet();
if (so->so_state & SS_CANTSENDMORE)
snderr(EPIPE);
if (so->so_error)
snderr(so->so_error);
space = sbspace(&so->so_snd);
if (space < siz) {
sbunlock(&so->so_snd);
nfs_sbwait(&so->so_snd);
splx(s);
goto restart;
}
splx(s);
if (dontroute)
so->so_options |= SO_DONTROUTE;
s = splnet(); /* XXX */
error = (*so->so_proto->pr_usrreq)(so,
PRU_SEND,
top, (caddr_t)nam, (struct mbuf *)0, (struct mbuf *)0);
splx(s);
if (dontroute)
so->so_options &= ~SO_DONTROUTE;
top = (struct mbuf *)0;
release:
sbunlock(&so->so_snd);
if (top)
m_freem(top);
return (error);
}
/*
* This is a stripped down udp specific version of soreceive()
*/
nfs_udpreceive(so, aname, mp)
register struct socket *so;
struct mbuf **aname;
struct mbuf **mp;
{
register struct mbuf *m;
int s, error = 0;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
if (aname)
*aname = 0;
restart:
sblock(&so->so_rcv);
s = splnet();
if (so->so_rcv.sb_cc == 0) {
if (so->so_error) {
error = so->so_error;
so->so_error = 0;
goto release;
}
if (so->so_state & SS_CANTRCVMORE)
goto release;
sbunlock(&so->so_rcv);
sbwait(&so->so_rcv);
splx(s);
goto restart;
}
m = so->so_rcv.sb_mb;
if (m == 0)
panic("nfs_receive 1");
nextrecord = m->m_nextpkt;
if (m->m_type != MT_SONAME)
panic("nfs_receive 1a");
sbfree(&so->so_rcv, m);
if (aname) {
*aname = m;
so->so_rcv.sb_mb = m->m_next;
m->m_next = 0;
m = so->so_rcv.sb_mb;
} else {
MFREE(m, so->so_rcv.sb_mb);
m = so->so_rcv.sb_mb;
}
if (m && m->m_type == MT_RIGHTS)
panic("nfs_receive 2");
if (m && m->m_type == MT_CONTROL) {
sbfree(&so->so_rcv, m);
MFREE(m, so->so_rcv.sb_mb);
m = so->so_rcv.sb_mb;
}
*mp = m;
while (m) {
if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
panic("nfs_receive 3");
sbfree(&so->so_rcv, m);
m = so->so_rcv.sb_mb = m->m_next;
}
so->so_rcv.sb_mb = nextrecord;
so->so_state &= ~SS_RCVATMARK; /* Necessary ?? */
release:
sbunlock(&so->so_rcv);
splx(s);
return (error);
}
struct nfsreq nfsreqh = {
(struct nfsreq *)0,
(struct nfsreq *)0,
(struct mbuf *)0,
(struct mbuf *)0,
(struct nfsmount *)0,
0, 0, 0, 0, 0,
};
struct rpc_replyhead {
u_long r_xid;
u_long r_rep;
};
/*
* Implement receipt of reply on a socket.
* We depend on the way that records are added to the sockbuf
* by sbappend*. In particular, each record (mbufs linked through m_next)
* must begin with an address, followed by optional MT_CONTROL mbuf
* and then zero or more mbufs of data.
* Although the sockbuf is locked, new data may still be appended,
* and thus we must maintain consistency of the sockbuf during that time.
* We must search through the list of received datagrams matching them
* with outstanding requests using the xid, until ours is found.
*/
nfs_udpreply(so, mntp, repl, myrep)
register struct socket *so;
struct nfsmount *mntp;
struct nfsreq *repl, *myrep;
{
register struct mbuf *m;
register struct nfsreq *rep;
register int error = 0, s;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
struct sockaddr_in *sad, *sad2;
struct rpc_replyhead replyh;
struct mbuf *mp;
char *cp;
int cnt, xfer;
int found;
restart:
/* Already received, bye bye */
if (myrep->r_mrep != NULL)
return (0);
/* If a soft mount and we have run out of retries */
if (myrep->r_retry == 0 && myrep->r_timer == 0)
return (ETIMEDOUT);
nfs_sblock(&so->so_rcv);
s = splnet();
if (so->so_rcv.sb_cc == 0) {
if (so->so_error) {
error = so->so_error;
so->so_error = 0;
goto release;
}
if (so->so_state & SS_CANTRCVMORE)
goto release;
sbunlock(&so->so_rcv);
nfs_sbwait(&so->so_rcv);
splx(s);
goto restart;
}
m = so->so_rcv.sb_mb;
if (m == 0)
panic("nfs_soreply 1");
nextrecord = m->m_nextpkt;
/*
* Take off the address, check for rights and ditch any control
* mbufs.
*/
if (m->m_type != MT_SONAME)
panic("nfs reply SONAME");
sad = mtod(m, struct sockaddr_in *);
sad2 = mtod(mntp->nm_sockaddr, struct sockaddr_in *);
found = 0;
if (sad->sin_addr.s_addr != sad2->sin_addr.s_addr)
goto dropit;
sbfree(&so->so_rcv, m);
MFREE(m, so->so_rcv.sb_mb);
m = so->so_rcv.sb_mb;
if (m && m->m_type == MT_RIGHTS)
panic("nfs reply RIGHTS");
if (m && m->m_type == MT_CONTROL) {
sbfree(&so->so_rcv, m);
MFREE(m, so->so_rcv.sb_mb);
m = so->so_rcv.sb_mb;
}
if (m)
m->m_nextpkt = nextrecord;
else {
sbunlock(&so->so_rcv);
splx(s);
goto restart;
}
/*
* Get the xid and check that it is an rpc reply
*/
mp = m;
if (m->m_len >= 2*NFSX_UNSIGNED)
bcopy(mtod(m, caddr_t), (caddr_t)&replyh, 2*NFSX_UNSIGNED);
else {
cnt = 2*NFSX_UNSIGNED;
cp = (caddr_t)&replyh;
while (mp && cnt > 0) {
if (mp->m_len > 0) {
xfer = (mp->m_len >= cnt) ? cnt : mp->m_len;
bcopy(mtod(mp, caddr_t), cp, xfer);
cnt -= xfer;
cp += xfer;
}
if (cnt > 0)
mp = mp->m_next;
}
}
if (replyh.r_rep != rpc_reply || mp == NULL)
goto dropit;
/*
* Loop through the request list to match up the reply
* Iff no match, just drop the datagram
*/
rep = repl;
while (!found && rep) {
if (rep->r_mrep == NULL && replyh.r_xid == rep->r_xid) {
/* Found it.. */
rep->r_mrep = m;
while (m) {
if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
panic("nfs_soreply 3");
sbfree(&so->so_rcv, m);
m = so->so_rcv.sb_mb = m->m_next;
}
so->so_rcv.sb_mb = nextrecord;
if (rep == myrep)
goto release;
found++;
}
rep = rep->r_next;
}
/* Iff not matched to request, drop it */
dropit:
if (!found)
sbdroprecord(&so->so_rcv);
sbunlock(&so->so_rcv);
splx(s);
goto restart;
release:
sbunlock(&so->so_rcv);
splx(s);
return (error);
}
/*
* nfs_request - goes something like this
* - fill in request struct
* - links it into list
* - calls nfs_sosend() for first transmit
* - calls nfs_soreceive() to get reply
* - break down rpc header and return with nfs reply pointed to
* by mrep or error
* nb: always frees up mreq mbuf list
*/
nfs_request(vp, mreq, xid, mp, mrp, mdp, dposp)
struct vnode *vp;
struct mbuf *mreq;
u_long xid;
struct mount *mp;
struct mbuf **mrp;
struct mbuf **mdp;
caddr_t *dposp;
{
register struct mbuf *m, *mrep;
register struct nfsreq *rep;
register u_long *p;
register int len;
struct nfsmount *mntp;
struct mbuf *md;
caddr_t dpos;
char *cp2;
int t1;
int s;
int error;
mntp = vfs_to_nfs(mp);
m = mreq;
MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
rep->r_xid = xid;
rep->r_mntp = mntp;
rep->r_vp = vp;
if (mntp->nm_flag & NFSMNT_SOFT)
rep->r_retry = mntp->nm_retrans;
else
rep->r_retry = VNOVAL;
rep->r_mrep = NULL;
rep->r_mreq = m;
rep->r_timer = rep->r_timeout = mntp->nm_timeo;
len = 0;
while (m) {
len += m->m_len;
m = m->m_next;
}
rep->r_msiz = len;
m = NFSMCOPY(mreq, 0, M_COPYALL, M_WAIT);
/* Chain it into list of outstanding requests */
s = splnet();
rep->r_next = nfsreqh.r_next;
if (rep->r_next != NULL)
rep->r_next->r_prev = rep;
nfsreqh.r_next = rep;
rep->r_prev = &nfsreqh;
splx(s);
/*
* Iff the NFSMCOPY above succeeded, send it off...
* otherwise the timer will retransmit later
*/
if (m != NULL)
error = nfs_udpsend(mntp->nm_so, (struct mbuf *)0, m, 0, len);
error = nfs_udpreply(mntp->nm_so, mntp, nfsreqh.r_next, rep);
s = splnet();
rep->r_prev->r_next = rep->r_next;
if (rep->r_next != NULL)
rep->r_next->r_prev = rep->r_prev;
splx(s);
m_freem(rep->r_mreq);
mrep = md = rep->r_mrep;
FREE((caddr_t)rep, M_NFSREQ);
if (error)
return (error);
/*
* break down the rpc header and check if ok
*/
dpos = mtod(md, caddr_t);
nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED);
p += 2;
if (*p++ == rpc_msgdenied) {
if (*p == rpc_mismatch)
error = EOPNOTSUPP;
else
error = EACCES;
m_freem(mrep);
return (error);
}
/*
* skip over the auth_verf, someday we may want to cache auth_short's
* for nfs_reqhead(), but for now just dump it
*/
if (*++p != 0) {
len = nfsm_rndup(fxdr_unsigned(long, *p));
nfsm_adv(len);
}
nfsm_disect(p, u_long *, NFSX_UNSIGNED);
/* 0 == ok */
if (*p == 0) {
nfsm_disect(p, u_long *, NFSX_UNSIGNED);
if (*p != 0) {
error = fxdr_unsigned(int, *p);
m_freem(mrep);
return (error);
}
*mrp = mrep;
*mdp = md;
*dposp = dpos;
return (0);
}
m_freem(mrep);
return (EPROTONOSUPPORT);
nfsmout:
return (error);
}
/*
* Get a request for the server main loop
* - receive a request via. nfs_soreceive()
* - verify it
* - fill in the cred struct.
*/
nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, proc, cr)
struct socket *so;
u_long prog;
u_long vers;
int maxproc;
struct mbuf **nam;
struct mbuf **mrp;
struct mbuf **mdp;
caddr_t *dposp;
u_long *retxid;
u_long *proc;
register struct ucred *cr;
{
register int i;
register struct mbuf *m;
nfsm_vars;
int len, len2;
if (error = nfs_udpreceive(so, nam, &mrep))
return (error);
md = mrep;
dpos = mtod(mrep, caddr_t);
nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED);
*retxid = *p++;
if (*p++ != rpc_call) {
m_freem(mrep);
return (ERPCMISMATCH);
}
if (*p++ != rpc_vers) {
m_freem(mrep);
return (ERPCMISMATCH);
}
if (*p++ != prog) {
m_freem(mrep);
return (EPROGUNAVAIL);
}
if (*p++ != vers) {
m_freem(mrep);
return (EPROGMISMATCH);
}
*proc = fxdr_unsigned(u_long, *p++);
if (*proc == NFSPROC_NULL) {
*mrp = mrep;
return (0);
}
if (*proc > maxproc || *p++ != rpc_auth_unix) {
m_freem(mrep);
return (EPROCUNAVAIL);
}
len = fxdr_unsigned(int, *p++);
len2 = fxdr_unsigned(int, *++p);
nfsm_adv(nfsm_rndup(len2));
nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED);
cr->cr_uid = fxdr_unsigned(uid_t, *p++);
cr->cr_gid = fxdr_unsigned(gid_t, *p++);
len2 = fxdr_unsigned(int, *p);
if (len2 > 10) {
m_freem(mrep);
return (EBADRPC);
}
nfsm_disect(p, u_long *, (len2+2)*NFSX_UNSIGNED);
for (i = 1; i <= len2; i++)
cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++);
cr->cr_ngroups = len2+1;
/*
* Do we have any use for the verifier.
* According to the "Remote Procedure Call Protocol Spec." it
* should be AUTH_NULL, but some clients make it AUTH_UNIX?
* For now, just skip over it
*/
len2 = fxdr_unsigned(int, *++p);
if (len2 > 0)
nfsm_adv(nfsm_rndup(len2));
*mrp = mrep;
*mdp = md;
*dposp = dpos;
return (0);
nfsmout:
return (error);
}
/*
* Generate the rpc reply header
* siz arg. is used to decide if adding a cluster is worthwhile
*/
nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
int siz;
u_long retxid;
int err;
struct mbuf **mrq;
struct mbuf **mbp;
caddr_t *bposp;
{
nfsm_vars;
NFSMGETHDR(mreq);
mb = mreq;
if ((siz+RPC_REPLYSIZ) > MHLEN)
NFSMCLGET(mreq, M_WAIT);
p = mtod(mreq, u_long *);
mreq->m_len = 6*NFSX_UNSIGNED;
bpos = ((caddr_t)p)+mreq->m_len;
*p++ = retxid;
*p++ = rpc_reply;
if (err == ERPCMISMATCH) {
*p++ = rpc_msgdenied;
*p++ = rpc_mismatch;
*p++ = txdr_unsigned(2);
*p = txdr_unsigned(2);
} else {
*p++ = rpc_msgaccepted;
*p++ = 0;
*p++ = 0;
switch (err) {
case EPROGUNAVAIL:
*p = txdr_unsigned(RPC_PROGUNAVAIL);
break;
case EPROGMISMATCH:
*p = txdr_unsigned(RPC_PROGMISMATCH);
nfsm_build(p, u_long *, 2*NFSX_UNSIGNED);
*p++ = txdr_unsigned(2);
*p = txdr_unsigned(2); /* someday 3 */
break;
case EPROCUNAVAIL:
*p = txdr_unsigned(RPC_PROCUNAVAIL);
break;
default:
*p = 0;
if (err != VNOVAL) {
nfsm_build(p, u_long *, NFSX_UNSIGNED);
*p = txdr_unsigned(err);
}
break;
};
}
*mrq = mreq;
*mbp = mb;
*bposp = bpos;
if (err != 0 && err != VNOVAL)
nfsstats.srvrpc_errs++;
return (0);
}
/*
* Nfs timer routine
* Scan the nfsreq list and retranmit any requests that have timed out
* To avoid retransmission attempts on STREAM sockets (in the future) make
* sure to set the r_retry field to 0.
*/
nfs_timer()
{
register struct nfsreq *rep;
register struct mbuf *m;
register struct socket *so;
int s, len;
s = splnet();
rep = nfsreqh.r_next;
while (rep != NULL) {
if (rep->r_timer > 0)
rep->r_timer--;
else if (rep->r_mrep == NULL && rep->r_retry > 0) {
so = rep->r_mntp->nm_so;
if ((so->so_state & SS_CANTSENDMORE) == 0 &&
!so->so_error &&
sbspace(&so->so_snd) >= rep->r_msiz) {
m = NFSMCOPY(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT);
if (m != NULL) {
nfsstats.rpcretries++;
rep->r_timeout <<= 2; /* x4 backoff */
if (rep->r_timeout > NFS_MAXTIMEO)
rep->r_timeout = NFS_MAXTIMEO;
rep->r_timer = rep->r_timeout;
if (rep->r_retry != VNOVAL)
rep->r_retry--;
#ifdef MGETHDR
m->m_pkthdr.len = rep->r_msiz;
#endif
(*so->so_proto->pr_usrreq)(so, PRU_SEND,
m, (caddr_t)0, (struct mbuf *)0,
(struct mbuf *)0);
}
}
}
rep = rep->r_next;
}
splx(s);
timeout(nfs_timer, (caddr_t)0, hz/10);
}
/*
* nfs_sbwait() is simply sbwait() but at a negative priority so that it
* can not be interrupted by a signal.
*/
nfs_sbwait(sb)
struct sockbuf *sb;
{
sb->sb_flags |= SB_WAIT;
sleep((caddr_t)&sb->sb_cc, PZERO-1);
}