* Copyright (c) 1989, 1991 The Regents of the University of California.
* This code is derived from software contributed to Berkeley by
* Rick Macklem at The University of Guelph.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* @(#)nfs_socket.c 7.23 (Berkeley) 4/20/91
* Socket operations for use by nfs
#include "../netinet/in.h"
#include "../netinet/tcp.h"
* External data, mostly RPC constants in XDR form
extern u_long rpc_reply
, rpc_msgdenied
, rpc_mismatch
, rpc_vers
, rpc_auth_unix
,
rpc_msgaccepted
, rpc_call
;
extern u_long nfs_prog
, nfs_vers
;
/* Maybe these should be bits in a u_long ?? */
extern int nonidempotent
[NFS_NPROCS
];
static int compressrequest
[NFS_NPROCS
] = {
struct mbuf
*nfs_compress(), *nfs_uncompress();
int (*nfsrv_procs
[NFS_NPROCS
])() = {
int nfsrexmtthresh
= NFS_FISHY
;
* Initialize sockets and congestion for a new NFS connection.
* We do not free the sockaddr if error.
register struct nfsmount
*nmp
;
register struct socket
*so
;
nmp
->nm_so
= (struct socket
*)0;
if (error
= socreate(mtod(nmp
->nm_nam
, struct sockaddr
*)->sa_family
,
&nmp
->nm_so
, nmp
->nm_sotype
, nmp
->nm_soproto
))
nmp
->nm_soflags
= so
->so_proto
->pr_flags
;
if (nmp
->nm_sotype
== SOCK_DGRAM
)
bufsize
= min(4 * (nmp
->nm_wsize
+ NFS_MAXPKTHDR
),
bufsize
= min(4 * (nmp
->nm_wsize
+ NFS_MAXPKTHDR
+ sizeof(u_long
)),
NFS_MAXPACKET
+ sizeof(u_long
));
if (error
= soreserve(so
, bufsize
, bufsize
))
* Protocols that do not require connections may be optionally left
* unconnected for servers that reply from a port other than NFS_PORT.
if (nmp
->nm_flag
& NFSMNT_NOCONN
) {
if (nmp
->nm_soflags
& PR_CONNREQUIRED
) {
if (error
= soconnect(so
, nmp
->nm_nam
))
* Wait for the connection to complete. Cribbed from the
* connect system call but with the wait at negative prio.
while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0)
(void) tsleep((caddr_t
)&so
->so_timeo
, PSOCK
, "nfscon", 0);
if (nmp
->nm_sotype
== SOCK_DGRAM
) {
if (nmp
->nm_flag
& (NFSMNT_SOFT
| NFSMNT_SPONGY
| NFSMNT_INT
)) {
so
->so_rcv
.sb_timeo
= (5 * hz
);
so
->so_snd
.sb_timeo
= (5 * hz
);
if (nmp
->nm_flag
& (NFSMNT_SOFT
| NFSMNT_SPONGY
| NFSMNT_INT
)) {
so
->so_rcv
.sb_timeo
= (5 * hz
);
so
->so_snd
.sb_timeo
= (5 * hz
);
if (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) {
MGET(m
, M_WAIT
, MT_SOOPTS
);
sosetopt(so
, SOL_SOCKET
, SO_KEEPALIVE
, m
);
if (so
->so_proto
->pr_domain
->dom_family
== AF_INET
&&
so
->so_proto
->pr_protocol
== IPPROTO_TCP
&&
MGET(m
, M_WAIT
, MT_SOOPTS
);
sosetopt(so
, IPPROTO_TCP
, TCP_NODELAY
, m
);
nmp
->nm_rto
= 10 * NFS_TIMEO
; /* XXX */
so
->so_rcv
.sb_flags
|= SB_NOINTR
;
so
->so_snd
.sb_flags
|= SB_NOINTR
;
/* Initialize other non-zero congestion variables */
nmp
->nm_window
= 2; /* Initial send window */
nmp
->nm_ssthresh
= NFS_MAXWINDOW
; /* Slowstart threshold */
nmp
->nm_rttvar
= nmp
->nm_rto
<< 1;
* Called when a connection is broken on a reliable protocol.
* - clean up the old socket
* - set R_MUSTRESEND for all outstanding requests on mount point
* If this fails the mount point is DEAD!
* nb: Must be called with the nfs_solock() set on the mount point.
register struct nfsreq
*rep
;
register struct nfsmount
*nmp
;
register struct nfsreq
*rp
;
nfs_msg(rep
->r_procp
, nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
while (error
= nfs_connect(nmp
)) {
if ((nmp
->nm_flag
& NFSMNT_INT
) && nfs_sigintr(rep
->r_procp
))
(void) tsleep((caddr_t
)&lbolt
, PSOCK
, "nfscon", 0);
nfs_msg(rep
->r_procp
, nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
* Loop through outstanding request list and fix up all requests
rp
->r_flags
|= R_MUSTRESEND
;
* NFS disconnect. Clean up and unlink.
register struct nfsmount
*nmp
;
register struct socket
*so
;
nmp
->nm_so
= (struct socket
*)0;
* This is the nfs send routine. For connection based socket types, it
* must be called with an nfs_solock() on the socket.
* "rep == NULL" indicates that it has been called from a server.
nfs_send(so
, nam
, top
, rep
)
register struct socket
*so
;
register struct mbuf
*top
;
if (rep
->r_flags
& R_SOFTTERM
) {
if (rep
->r_nmp
->nm_so
== NULL
&&
(error
= nfs_reconnect(rep
, rep
->r_nmp
)))
rep
->r_flags
&= ~R_MUSTRESEND
;
soflags
= rep
->r_nmp
->nm_soflags
;
soflags
= so
->so_proto
->pr_flags
;
if ((soflags
& PR_CONNREQUIRED
) || (so
->so_state
& SS_ISCONNECTED
))
sendnam
= (struct mbuf
*)0;
error
= sosend(so
, sendnam
, (struct uio
*)0, top
,
if (error
== EWOULDBLOCK
&& rep
) {
if (rep
->r_flags
& R_SOFTTERM
)
rep
->r_flags
|= R_MUSTRESEND
;
if (error
&& error
!= EINTR
&& error
!= ERESTART
)
* Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
* done by soreceive(), but for SOCK_STREAM we must deal with the Record
* Mark and consolidate the data into a new mbuf list.
* nb: Sometimes TCP passes the data up to soreceive() in long lists of
* For SOCK_STREAM we must be very careful to read an entire record once
* we have read any of it, even if the system call has been interrupted.
nfs_receive(so
, aname
, mp
, rep
)
register struct socket
*so
;
register struct nfsreq
*rep
;
struct mbuf
*m2
, *mnew
, **mbp
;
int error
, siz
, mlen
, soflags
, rcvflg
;
* Set up arguments for soreceive()
*aname
= (struct mbuf
*)0;
soflags
= rep
->r_nmp
->nm_soflags
;
soflags
= so
->so_proto
->pr_flags
;
* For reliable protocols, lock against other senders/receivers
* in case a reconnect is necessary.
* For SOCK_STREAM, first get the Record Mark to find out how much
* We must lock the socket against other receivers
* until we have an entire rpc request/reply.
if (soflags
& PR_CONNREQUIRED
) {
* Check for fatal errors and resending request.
* Ugh: If a reconnect attempt just happened, nm_so
* would have changed. NULL indicates a failed
* attempt that has essentially shut down this
if (rep
->r_mrep
|| (so
= rep
->r_nmp
->nm_so
) == NULL
||
(rep
->r_flags
& R_SOFTTERM
))
while (rep
->r_flags
& R_MUSTRESEND
) {
m
= m_copym(rep
->r_mreq
, 0, M_COPYALL
, M_WAIT
);
if (error
= nfs_send(so
, rep
->r_nmp
->nm_nam
, m
,
if ((soflags
& PR_ATOMIC
) == 0) {
aio
.iov_base
= (caddr_t
) &len
;
aio
.iov_len
= sizeof(u_long
);
auio
.uio_segflg
= UIO_SYSSPACE
;
auio
.uio_procp
= (struct proc
*)0;
auio
.uio_resid
= sizeof(u_long
);
error
= soreceive(so
, (struct mbuf
**)0, &auio
,
(struct mbuf
**)0, (struct mbuf
**)0, &rcvflg
);
if (error
== EWOULDBLOCK
&& rep
) {
if (rep
->r_flags
& R_SOFTTERM
)
if (rep
->r_flags
& R_MUSTRESEND
)
} while (error
== EWOULDBLOCK
);
if (!error
&& auio
.uio_resid
> 0) {
"short receive (%d/%d) from nfs server %s\n",
sizeof(u_long
) - auio
.uio_resid
,
rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
len
= ntohl(len
) & ~0x80000000;
* This is SERIOUS! We are out of sync with the sender
* and forcing a disconnect/reconnect is all I can do.
if (len
> NFS_MAXPACKET
) {
log(LOG_ERR
, "%s (%d) from nfs server %s\n",
"impossible packet length",
rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
error
= soreceive(so
, (struct mbuf
**)0,
&auio
, mp
, (struct mbuf
**)0, &rcvflg
);
} while (error
== EWOULDBLOCK
|| error
== EINTR
||
if (!error
&& auio
.uio_resid
> 0) {
"short receive (%d/%d) from nfs server %s\n",
len
- auio
.uio_resid
, len
,
rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
auio
.uio_resid
= len
= 1000000; /* Anything Big */
error
= soreceive(so
, (struct mbuf
**)0,
&auio
, mp
, (struct mbuf
**)0, &rcvflg
);
if (error
== EWOULDBLOCK
&& rep
) {
if (rep
->r_flags
& R_SOFTTERM
)
if (rep
->r_flags
& R_MUSTRESEND
)
} while (error
== EWOULDBLOCK
);
if (!error
&& *mp
== NULL
)
if (error
&& rep
&& error
!= EINTR
&& error
!= ERESTART
) {
if (error
!= EPIPE
&& rep
)
"receive error %d from nfs server %s\n",
rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
nfs_disconnect(rep
->r_nmp
);
error
= nfs_reconnect(rep
, rep
->r_nmp
);
if (so
->so_state
& SS_ISCONNECTED
)
getnam
= (struct mbuf
**)0;
auio
.uio_resid
= len
= 1000000;
error
= soreceive(so
, getnam
, &auio
, mp
,
(struct mbuf
**)0, &rcvflg
);
if (error
== EWOULDBLOCK
&& rep
&&
(rep
->r_flags
& R_SOFTTERM
))
} while (error
== EWOULDBLOCK
);
* Search for any mbufs that are not a multiple of 4 bytes long.
* These could cause pointer alignment problems, so copy them to
* All this for something that may never happen.
if (m
->m_next
&& (m
->m_len
& 0x3)) {
printf("nfs_rcv odd length!\n");
MGET(m2
, M_WAIT
, MT_DATA
);
mlen
= M_TRAILINGSPACE(m2
);
siz
= MIN(mlen
, m
->m_len
);
* Implement receipt of reply on a socket.
* We must search through the list of received datagrams matching them
* with outstanding requests using the xid, until ours is found.
register struct nfsreq
*rep
;
* Loop around until we get our own reply
* Lock against other receivers so that I don't get stuck in
* sbwait() after someone else has received my reply for me.
* Also necessary for connection based protocols to avoid
* race conditions during a reconnect.
nfs_solock(&nmp
->nm_flag
);
/* Already received, bye bye */
if (myrep
->r_mrep
!= NULL
) {
nfs_sounlock(&nmp
->nm_flag
);
* Get the next Rpc reply off the socket
if (error
= nfs_receive(nmp
->nm_so
, &nam
, &mp
, myrep
)) {
nfs_sounlock(&nmp
->nm_flag
);
* Ignore routing errors on connectionless protocols??
if (NFSIGNORE_SOERROR(nmp
->nm_soflags
, error
)) {
nmp
->nm_so
->so_error
= 0;
* Otherwise cleanup and return a fatal error.
if (myrep
->r_flags
& R_TIMING
) {
myrep
->r_flags
&= ~R_TIMING
;
if (myrep
->r_flags
& R_SENT
) {
myrep
->r_flags
&= ~R_SENT
;
* Get the xid and check that it is an rpc reply
while (m
&& m
->m_len
== 0)
nfs_sounlock(&nmp
->nm_flag
);
bcopy(mtod(m
, caddr_t
), (caddr_t
)&rxid
, NFSX_UNSIGNED
);
* Loop through the request list to match up the reply
* Iff no match, just drop the datagram
while (rep
!= &nfsreqh
) {
if (rep
->r_mrep
== NULL
&& rxid
== rep
->r_xid
) {
if (rep
->r_flags
& R_TIMING
) {
nfs_updatetimer(rep
->r_nmp
);
rep
->r_flags
&= ~R_TIMING
;
if (rep
->r_flags
& R_SENT
) {
nfs_sounlock(&nmp
->nm_flag
);
* If not matched to a request, drop it.
nfsstats
.rpcunexpected
++;
* nfs_request - goes something like this
* - fill in request struct
* - calls nfs_send() for first transmit
* - calls nfs_receive() to get reply
* - break down rpc header and return with nfs reply pointed to
* nb: always frees up mreq mbuf list
nfs_request(vp
, mreq
, xid
, procnum
, procp
, tryhard
, mp
, mrp
, mdp
, dposp
)
register struct mbuf
*m
, *mrep
;
register struct nfsreq
*rep
;
MALLOC(rep
, struct nfsreq
*, sizeof(struct nfsreq
), M_NFSREQ
, M_WAITOK
);
if ((nmp
->nm_flag
& NFSMNT_SOFT
) ||
((nmp
->nm_flag
& NFSMNT_SPONGY
) && !tryhard
))
rep
->r_retry
= nmp
->nm_retry
;
rep
->r_retry
= NFS_MAXREXMIT
+ 1; /* past clip limit */
rep
->r_flags
= rep
->r_rexmit
= 0;
* - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
* - idempotent requests on SOCK_DGRAM use 0
* - Reliable transports, NFS_RELIABLETIMEO
* Timeouts are still done on reliable transports to ensure detection
* of excessive connection delay.
if (nmp
->nm_sotype
!= SOCK_DGRAM
)
rep
->r_timerinit
= -NFS_RELIABLETIMEO
;
else if (nonidempotent
[procnum
])
rep
->r_timerinit
= -NFS_MINIDEMTIMEO
;
rep
->r_timer
= rep
->r_timerinit
;
mreq
->m_pkthdr
.len
= len
;
mreq
->m_pkthdr
.rcvif
= (struct ifnet
*)0;
if ((nmp
->nm_flag
& NFSMNT_COMPRESS
) && compressrequest
[procnum
]) {
mreq
= nfs_compress(mreq
);
len
= mreq
->m_pkthdr
.len
;
* For non-atomic protocols, insert a Sun RPC Record Mark.
if ((nmp
->nm_soflags
& PR_ATOMIC
) == 0) {
M_PREPEND(mreq
, sizeof(u_long
), M_WAIT
);
*mtod(mreq
, u_long
*) = htonl(0x80000000 | len
);
* Do the client side RPC.
* Chain request into list of outstanding requests. Be sure
* to put it LAST so timer finds oldest requests first.
reph
->r_prev
->r_next
= rep
;
rep
->r_prev
= reph
->r_prev
;
* If backing off another request or avoiding congestion, don't
* send this one now but let timer do it. If not timing a request,
if (nmp
->nm_sent
<= 0 || nmp
->nm_sotype
!= SOCK_DGRAM
||
(nmp
->nm_currexmit
== 0 && nmp
->nm_sent
< nmp
->nm_window
)) {
rep
->r_flags
|= R_TIMING
;
m
= m_copym(mreq
, 0, M_COPYALL
, M_WAIT
);
if (nmp
->nm_soflags
& PR_CONNREQUIRED
)
nfs_solock(&nmp
->nm_flag
);
error
= nfs_send(nmp
->nm_so
, nmp
->nm_nam
, m
, rep
);
if (nmp
->nm_soflags
& PR_CONNREQUIRED
)
nfs_sounlock(&nmp
->nm_flag
);
if (error
&& NFSIGNORE_SOERROR(nmp
->nm_soflags
, error
))
nmp
->nm_so
->so_error
= error
= 0;
* Wait for the reply from our send or the timer's.
error
= nfs_reply(nmp
, rep
);
* RPC done, unlink the request.
rep
->r_prev
->r_next
= rep
->r_next
;
rep
->r_next
->r_prev
= rep
->r_prev
;
* If there was a successful reply and a tprintf msg.
if (!error
&& (rep
->r_flags
& R_TPRINTFMSG
))
nfs_msg(rep
->r_procp
, nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
FREE((caddr_t
)rep
, M_NFSREQ
);
mrep
= nfs_uncompress(mrep
);
* break down the rpc header and check if ok
dpos
= mtod(md
, caddr_t
);
nfsm_disect(tl
, u_long
*, 5*NFSX_UNSIGNED
);
if (*tl
++ == rpc_msgdenied
) {
* skip over the auth_verf, someday we may want to cache auth_short's
* for nfs_reqhead(), but for now just dump it
len
= nfsm_rndup(fxdr_unsigned(long, *tl
));
nfsm_disect(tl
, u_long
*, NFSX_UNSIGNED
);
nfsm_disect(tl
, u_long
*, NFSX_UNSIGNED
);
error
= fxdr_unsigned(int, *tl
);
return (EPROTONOSUPPORT
);
* Get a request for the server main loop
* - receive a request via. nfs_soreceive()
* - fill in the cred struct.
nfs_getreq(so
, prog
, vers
, maxproc
, nam
, mrp
, mdp
, dposp
, retxid
, procnum
, cr
,
register struct ucred
*cr
;
if (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) {
error
= nfs_receive(so
, nam
, &mrep
, (struct nfsreq
*)0);
error
= nfs_receive(so
, nam
, &mrep
, (struct nfsreq
*)0);
} while (!error
&& nfs_badnam(*nam
, msk
, mtch
));
mrep
= nfs_uncompress(mrep
);
dpos
= mtod(mrep
, caddr_t
);
nfsm_disect(tl
, u_long
*, 10*NFSX_UNSIGNED
);
*procnum
= fxdr_unsigned(u_long
, *tl
++);
if (*procnum
== NFSPROC_NULL
) {
if (*procnum
> maxproc
|| *tl
++ != rpc_auth_unix
) {
len
= fxdr_unsigned(int, *tl
++);
if (len
< 0 || len
> RPCAUTH_MAXSIZ
) {
len
= fxdr_unsigned(int, *++tl
);
if (len
< 0 || len
> NFS_MAXNAMLEN
) {
nfsm_adv(nfsm_rndup(len
));
nfsm_disect(tl
, u_long
*, 3*NFSX_UNSIGNED
);
cr
->cr_uid
= fxdr_unsigned(uid_t
, *tl
++);
cr
->cr_gid
= fxdr_unsigned(gid_t
, *tl
++);
len
= fxdr_unsigned(int, *tl
);
if (len
< 0 || len
> RPCAUTH_UNIXGIDS
) {
nfsm_disect(tl
, u_long
*, (len
+ 2)*NFSX_UNSIGNED
);
for (i
= 1; i
<= len
; i
++)
cr
->cr_groups
[i
] = fxdr_unsigned(gid_t
, *tl
++);
cr
->cr_ngroups
= (len
>= NGROUPS
) ? NGROUPS
: (len
+ 1);
* Do we have any use for the verifier.
* According to the "Remote Procedure Call Protocol Spec." it
* should be AUTH_NULL, but some clients make it AUTH_UNIX?
* For now, just skip over it
len
= fxdr_unsigned(int, *++tl
);
if (len
< 0 || len
> RPCAUTH_MAXSIZ
) {
nfsm_adv(nfsm_rndup(len
));
* Generate the rpc reply header
* siz arg. is used to decide if adding a cluster is worthwhile
nfs_rephead(siz
, retxid
, err
, mrq
, mbp
, bposp
)
struct mbuf
*mreq
, *mb
, *mb2
;
if ((siz
+RPC_REPLYSIZ
) > MHLEN
)
tl
= mtod(mreq
, u_long
*);
mreq
->m_len
= 6*NFSX_UNSIGNED
;
bpos
= ((caddr_t
)tl
)+mreq
->m_len
;
if (err
== ERPCMISMATCH
) {
*tl
++ = txdr_unsigned(2);
*tl
= txdr_unsigned(RPC_PROGUNAVAIL
);
*tl
= txdr_unsigned(RPC_PROGMISMATCH
);
nfsm_build(tl
, u_long
*, 2*NFSX_UNSIGNED
);
*tl
++ = txdr_unsigned(2);
*tl
= txdr_unsigned(2); /* someday 3 */
*tl
= txdr_unsigned(RPC_PROCUNAVAIL
);
nfsm_build(tl
, u_long
*, NFSX_UNSIGNED
);
*tl
= txdr_unsigned(err
);
if (err
!= 0 && err
!= VNOVAL
)
* Scan the nfsreq list and retranmit any requests that have timed out
* To avoid retransmission attempts on STREAM sockets (in the future) make
* sure to set the r_retry field to 0 (implies nm_retry == 0).
register struct nfsreq
*rep
;
register struct socket
*so
;
register struct nfsmount
*nmp
;
for (rep
= nfsreqh
.r_next
; rep
!= &nfsreqh
; rep
= rep
->r_next
) {
if (rep
->r_mrep
|| (rep
->r_flags
& R_SOFTTERM
) ||
(so
= nmp
->nm_so
) == NULL
)
if ((nmp
->nm_flag
& NFSMNT_INT
) && nfs_sigintr(rep
->r_procp
)) {
rep
->r_flags
|= R_SOFTTERM
;
if (rep
->r_flags
& R_TIMING
) /* update rtt in mount */
if (++rep
->r_timer
< nmp
->nm_rto
)
/* Do backoff and save new timeout in mount */
if (rep
->r_flags
& R_TIMING
) {
rep
->r_flags
&= ~R_TIMING
;
if (rep
->r_flags
& R_SENT
) {
* Check for too many retries on soft mount.
* nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
if (++rep
->r_rexmit
> NFS_MAXREXMIT
)
rep
->r_rexmit
= NFS_MAXREXMIT
;
* Check for server not responding
if ((rep
->r_flags
& R_TPRINTFMSG
) == 0 &&
rep
->r_rexmit
> NFS_FISHY
) {
nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
rep
->r_flags
|= R_TPRINTFMSG
;
if (rep
->r_rexmit
>= rep
->r_retry
) { /* too many */
rep
->r_flags
|= R_SOFTTERM
;
if (nmp
->nm_sotype
!= SOCK_DGRAM
)
* If there is enough space and the window allows..
if (sbspace(&so
->so_snd
) >= rep
->r_mreq
->m_pkthdr
.len
&&
nmp
->nm_sent
< nmp
->nm_window
&&
(m
= m_copym(rep
->r_mreq
, 0, M_COPYALL
, M_DONTWAIT
))){
if ((nmp
->nm_flag
& NFSMNT_NOCONN
) == 0)
error
= (*so
->so_proto
->pr_usrreq
)(so
, PRU_SEND
, m
,
(caddr_t
)0, (struct mbuf
*)0, (struct mbuf
*)0);
error
= (*so
->so_proto
->pr_usrreq
)(so
, PRU_SEND
, m
,
nmp
->nm_nam
, (struct mbuf
*)0, (struct mbuf
*)0);
if (NFSIGNORE_SOERROR(nmp
->nm_soflags
, error
))
* We need to time the request even though we
rep
->r_flags
|= (R_SENT
|R_TIMING
);
rep
->r_timer
= rep
->r_timerinit
;
timeout(nfs_timer
, (caddr_t
)0, hz
/NFS_HZ
);
* NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
* used here. The timer state is held in the nfsmount structure and
* a single request is used to clock the response. When successful
* the rtt smoothing in nfs_updatetimer is used, when failed the backoff
* is done by nfs_backofftimer. We also log failure messages in these
* Congestion variables are held in the nfshost structure which
* is referenced by nfsmounts and shared per-server. This separation
* makes it possible to do per-mount timing which allows varying disk
* access times to be dealt with, while preserving a network oriented
* congestion control scheme.
* The windowing implements the Jacobson/Karels slowstart algorithm
* with adjusted scaling factors. We start with one request, then send
* 4 more after each success until the ssthresh limit is reached, then
* we increment at a rate proportional to the window. On failure, we
* remember 3/4 the current window and clamp the send limit to 1. Note
* ICMP source quench is not reflected in so->so_error so we ignore that
* NFS behaves much more like a transport protocol with these changes,
* shedding the teenage pedal-to-the-metal tendencies of "other"
* Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
* The TCP algorithm was not forgiving enough. Because the NFS server
* responds only after performing lookups/diskio/etc, we have to be
* more prepared to accept a spiky variance. The TCP algorithm is:
* TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
#define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
register struct nfsmount
*nmp
;
/* If retransmitted, clear and return */
if (nmp
->nm_rexmit
|| nmp
->nm_currexmit
) {
nmp
->nm_rexmit
= nmp
->nm_currexmit
= 0;
/* If have a measurement, do smoothing */
delta
= nmp
->nm_rtt
- (nmp
->nm_srtt
>> 3);
if ((nmp
->nm_srtt
+= delta
) <= 0)
delta
-= (nmp
->nm_rttvar
>> 2);
if ((nmp
->nm_rttvar
+= delta
) <= 0)
nmp
->nm_rttvar
= nmp
->nm_rtt
<< 1;
if (nmp
->nm_rttvar
== 0) nmp
->nm_rttvar
= 2;
nmp
->nm_srtt
= nmp
->nm_rttvar
<< 2;
/* Compute new Retransmission TimeOut and clip */
nmp
->nm_rto
= NFS_RTO(nmp
);
if (nmp
->nm_rto
< NFS_MINTIMEO
)
nmp
->nm_rto
= NFS_MINTIMEO
;
else if (nmp
->nm_rto
> NFS_MAXTIMEO
)
nmp
->nm_rto
= NFS_MAXTIMEO
;
/* Update window estimate */
if (nmp
->nm_window
< nmp
->nm_ssthresh
) /* quickly */
register long incr
= ++nmp
->nm_winext
;
incr
= (incr
* incr
) / nmp
->nm_window
;
if (nmp
->nm_window
> NFS_MAXWINDOW
)
nmp
->nm_window
= NFS_MAXWINDOW
;
register struct nfsmount
*nmp
;
register unsigned long newrto
;
if (++nmp
->nm_rexmit
> 8 * sizeof nmp
->nm_rto
)
nmp
->nm_rexmit
= 8 * sizeof nmp
->nm_rto
;
/* Back off RTO exponentially */
newrto
<<= (nmp
->nm_rexmit
- 1);
if (newrto
== 0 || newrto
> NFS_MAXTIMEO
)
/* If too many retries, message, assume a bogus RTT and re-measure */
if (nmp
->nm_currexmit
< nmp
->nm_rexmit
) {
nmp
->nm_currexmit
= nmp
->nm_rexmit
;
if (nmp
->nm_currexmit
>= nfsrexmtthresh
) {
if (nmp
->nm_currexmit
== nfsrexmtthresh
) {
nmp
->nm_rttvar
+= (nmp
->nm_srtt
>> 2);
/* Close down window but remember this point (3/4 current) for later */
nmp
->nm_ssthresh
= ((nmp
->nm_window
<< 1) + nmp
->nm_window
) >> 2;
* Test for a termination signal pending on procp.
* This is used for NFSMNT_INT mounts.
if (p
&& p
->p_sig
&& (((p
->p_sig
&~ p
->p_sigmask
) &~ p
->p_sigignore
) &
tprintf(tpr
, "nfs server %s: %s\n", server
, msg
);
* Lock a socket against others.
* Necessary for STREAM sockets to ensure you get an entire rpc request/reply
* and also to avoid race conditions between the processes with nfs requests
* in progress when a reconnect is necessary.
while (*flagp
& NFSMNT_SCKLOCK
) {
*flagp
|= NFSMNT_WANTSCK
;
(void) tsleep((caddr_t
)flagp
, PZERO
-1, "nfsolck", 0);
*flagp
|= NFSMNT_SCKLOCK
;
* Unlock the stream socket for others.
if ((*flagp
& NFSMNT_SCKLOCK
) == 0)
*flagp
&= ~NFSMNT_SCKLOCK
;
if (*flagp
& NFSMNT_WANTSCK
) {
*flagp
&= ~NFSMNT_WANTSCK
;
* This function compares two net addresses by family and returns TRUE
* If there is any doubt, return FALSE.
nfs_netaddr_match(nam1
, nam2
)
struct mbuf
*nam1
, *nam2
;
register struct sockaddr
*saddr1
, *saddr2
;
saddr1
= mtod(nam1
, struct sockaddr
*);
saddr2
= mtod(nam2
, struct sockaddr
*);
if (saddr1
->sa_family
!= saddr2
->sa_family
)
* Must do each address family separately since unused fields
* are undefined values and not always zeroed.
switch (saddr1
->sa_family
) {
if (((struct sockaddr_in
*)saddr1
)->sin_addr
.s_addr
==
((struct sockaddr_in
*)saddr2
)->sin_addr
.s_addr
)
* Check the hostname fields for nfsd's mask and match fields.
* - Bitwise AND the mask with the host address field
* - Compare for == with match
* return TRUE if not equal
nfs_badnam(nam
, msk
, mtch
)
register struct mbuf
*nam
, *msk
, *mtch
;
switch (mtod(nam
, struct sockaddr
*)->sa_family
) {
return ((mtod(nam
, struct sockaddr_in
*)->sin_addr
.s_addr
&
mtod(msk
, struct sockaddr_in
*)->sin_addr
.s_addr
) !=
mtod(mtch
, struct sockaddr_in
*)->sin_addr
.s_addr
);
printf("nfs_badmatch, unknown sa_family\n");