add prototypes for NFS vnode ops
[unix-history] / usr / src / sys / nfs / nfs_socket.c
CommitLineData
a2907882 1/*
f777974b 2 * Copyright (c) 1989, 1991 The Regents of the University of California.
a2907882
KM
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
dbf0c423 8 * %sccs.include.redist.c%
a2907882 9 *
79993818 10 * @(#)nfs_socket.c 7.21 (Berkeley) %G%
a2907882
KM
11 */
12
13/*
f0f1cbaa 14 * Socket operations for use by nfs
a2907882
KM
15 */
16
a2907882 17#include "param.h"
2f08b65a 18#include "proc.h"
a2907882
KM
19#include "mount.h"
20#include "kernel.h"
21#include "malloc.h"
22#include "mbuf.h"
23#include "vnode.h"
24#include "domain.h"
25#include "protosw.h"
26#include "socket.h"
27#include "socketvar.h"
f777974b 28#include "syslog.h"
79993818 29#include "tprintf.h"
37ced908
KM
30#include "../netinet/in.h"
31#include "../netinet/tcp.h"
f777974b 32
a2907882
KM
33#include "rpcv2.h"
34#include "nfsv2.h"
35#include "nfs.h"
36#include "xdr_subs.h"
37#include "nfsm_subs.h"
38#include "nfsmount.h"
39
40#define TRUE 1
170bfd05 41#define FALSE 0
a2907882 42
a2907882
KM
43/*
44 * External data, mostly RPC constants in XDR form
45 */
46extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
47 rpc_msgaccepted, rpc_call;
48extern u_long nfs_prog, nfs_vers;
170bfd05 49/* Maybe these should be bits in a u_long ?? */
f0f1cbaa 50extern int nonidempotent[NFS_NPROCS];
958df9fb
KM
51static int compressrequest[NFS_NPROCS] = {
52 FALSE,
53 TRUE,
54 TRUE,
55 FALSE,
56 TRUE,
57 TRUE,
58 TRUE,
59 FALSE,
60 FALSE,
61 TRUE,
62 TRUE,
63 TRUE,
64 TRUE,
65 TRUE,
66 TRUE,
67 TRUE,
68 TRUE,
69 TRUE,
70};
f0f1cbaa
KM
71int nfs_sbwait();
72void nfs_disconnect();
958df9fb 73struct mbuf *nfs_compress(), *nfs_uncompress();
f0f1cbaa 74
a2907882
KM
75int nfsrv_null(),
76 nfsrv_getattr(),
77 nfsrv_setattr(),
78 nfsrv_lookup(),
79 nfsrv_readlink(),
80 nfsrv_read(),
81 nfsrv_write(),
82 nfsrv_create(),
83 nfsrv_remove(),
84 nfsrv_rename(),
85 nfsrv_link(),
86 nfsrv_symlink(),
87 nfsrv_mkdir(),
88 nfsrv_rmdir(),
89 nfsrv_readdir(),
90 nfsrv_statfs(),
91 nfsrv_noop();
92
93int (*nfsrv_procs[NFS_NPROCS])() = {
94 nfsrv_null,
95 nfsrv_getattr,
96 nfsrv_setattr,
97 nfsrv_noop,
98 nfsrv_lookup,
99 nfsrv_readlink,
100 nfsrv_read,
101 nfsrv_noop,
102 nfsrv_write,
103 nfsrv_create,
104 nfsrv_remove,
105 nfsrv_rename,
106 nfsrv_link,
107 nfsrv_symlink,
108 nfsrv_mkdir,
109 nfsrv_rmdir,
110 nfsrv_readdir,
111 nfsrv_statfs,
112};
113
2f08b65a
KM
114struct nfsreq nfsreqh;
115int nfsrexmtthresh = NFS_FISHY;
f0f1cbaa 116int nfs_tcpnodelay = 1;
2f08b65a
KM
117
118/*
f0f1cbaa 119 * Initialize sockets and congestion for a new NFS connection.
2f08b65a
KM
120 * We do not free the sockaddr if error.
121 */
f0f1cbaa 122nfs_connect(nmp)
2f08b65a 123 register struct nfsmount *nmp;
2f08b65a 124{
f0f1cbaa
KM
125 register struct socket *so;
126 int s, error;
2f08b65a 127 struct mbuf *m;
2f08b65a 128
f0f1cbaa
KM
129 nmp->nm_so = (struct socket *)0;
130 if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family,
131 &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
2f08b65a 132 goto bad;
f0f1cbaa
KM
133 so = nmp->nm_so;
134 nmp->nm_soflags = so->so_proto->pr_flags;
2f08b65a 135
f0f1cbaa
KM
136 /*
137 * Protocols that do not require connections may be optionally left
138 * unconnected for servers that reply from a port other than NFS_PORT.
139 */
140 if (nmp->nm_flag & NFSMNT_NOCONN) {
141 if (nmp->nm_soflags & PR_CONNREQUIRED) {
142 error = ENOTCONN;
2f08b65a
KM
143 goto bad;
144 }
f0f1cbaa
KM
145 } else {
146 if (error = soconnect(so, nmp->nm_nam))
2f08b65a 147 goto bad;
f0f1cbaa
KM
148
149 /*
150 * Wait for the connection to complete. Cribbed from the
151 * connect system call but with the wait at negative prio.
152 */
153 s = splnet();
154 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
170bfd05 155 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
f0f1cbaa
KM
156 splx(s);
157 if (so->so_error) {
158 error = so->so_error;
159 goto bad;
160 }
2f08b65a 161 }
f0f1cbaa 162 if (nmp->nm_sotype == SOCK_DGRAM) {
170bfd05 163 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
f0f1cbaa
KM
164 so->so_rcv.sb_timeo = (5 * hz);
165 so->so_snd.sb_timeo = (5 * hz);
166 } else {
167 so->so_rcv.sb_timeo = 0;
168 so->so_snd.sb_timeo = 0;
169 }
f777974b
MK
170 if (error = soreserve(so,
171 min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR), NFS_MAXPACKET),
172 min(4 * (nmp->nm_rsize + NFS_MAXPKTHDR), NFS_MAXPACKET)))
f0f1cbaa
KM
173 goto bad;
174 } else {
170bfd05 175 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
f0f1cbaa
KM
176 so->so_rcv.sb_timeo = (5 * hz);
177 so->so_snd.sb_timeo = (5 * hz);
178 } else {
179 so->so_rcv.sb_timeo = 0;
180 so->so_snd.sb_timeo = 0;
181 }
182 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
183 MGET(m, M_WAIT, MT_SOOPTS);
184 *mtod(m, int *) = 1;
185 m->m_len = sizeof(int);
186 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
187 }
188 if (so->so_proto->pr_domain->dom_family == AF_INET &&
189 so->so_proto->pr_protocol == IPPROTO_TCP &&
190 nfs_tcpnodelay) {
191 MGET(m, M_WAIT, MT_SOOPTS);
192 *mtod(m, int *) = 1;
193 m->m_len = sizeof(int);
194 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
195 }
196 if (error = soreserve(so,
f777974b
MK
197 min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
198 NFS_MAXPACKET + sizeof(u_long)),
199 min(4 * (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long)),
200 NFS_MAXPACKET + sizeof(u_long))))
f0f1cbaa
KM
201 goto bad;
202 }
203 so->so_rcv.sb_flags |= SB_NOINTR;
204 so->so_snd.sb_flags |= SB_NOINTR;
2f08b65a 205
f0f1cbaa
KM
206 /* Initialize other non-zero congestion variables */
207 nmp->nm_rto = NFS_TIMEO;
208 nmp->nm_window = 2; /* Initial send window */
209 nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */
210 nmp->nm_rttvar = nmp->nm_rto << 1;
211 nmp->nm_sent = 0;
212 nmp->nm_currexmit = 0;
213 return (0);
2f08b65a 214
f0f1cbaa
KM
215bad:
216 nfs_disconnect(nmp);
217 return (error);
218}
2f08b65a 219
f0f1cbaa
KM
220/*
221 * Reconnect routine:
222 * Called when a connection is broken on a reliable protocol.
223 * - clean up the old socket
224 * - nfs_connect() again
225 * - set R_MUSTRESEND for all outstanding requests on mount point
226 * If this fails the mount point is DEAD!
227 * nb: Must be called with the nfs_solock() set on the mount point.
228 */
229nfs_reconnect(rep, nmp)
230 register struct nfsreq *rep;
231 register struct nfsmount *nmp;
232{
233 register struct nfsreq *rp;
f0f1cbaa 234 int error;
2f08b65a 235
79993818
MK
236 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
237 "trying reconnect");
f0f1cbaa 238 while (error = nfs_connect(nmp)) {
d4e5799e
KM
239#ifdef lint
240 error = error;
241#endif /* lint */
f0f1cbaa
KM
242 if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
243 return (EINTR);
170bfd05 244 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
2f08b65a 245 }
79993818
MK
246 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
247 "reconnected");
f0f1cbaa
KM
248
249 /*
250 * Loop through outstanding request list and fix up all requests
251 * on old socket.
252 */
253 rp = nfsreqh.r_next;
254 while (rp != &nfsreqh) {
255 if (rp->r_nmp == nmp)
256 rp->r_flags |= R_MUSTRESEND;
257 rp = rp->r_next;
2f08b65a
KM
258 }
259 return (0);
2f08b65a
KM
260}
261
262/*
263 * NFS disconnect. Clean up and unlink.
264 */
f0f1cbaa 265void
2f08b65a
KM
266nfs_disconnect(nmp)
267 register struct nfsmount *nmp;
268{
f0f1cbaa 269 register struct socket *so;
2f08b65a 270
f0f1cbaa
KM
271 if (nmp->nm_so) {
272 so = nmp->nm_so;
273 nmp->nm_so = (struct socket *)0;
274 soshutdown(so, 2);
275 soclose(so);
2f08b65a
KM
276 }
277}
a2907882
KM
278
279/*
f0f1cbaa
KM
280 * This is the nfs send routine. For connection based socket types, it
281 * must be called with an nfs_solock() on the socket.
282 * "rep == NULL" indicates that it has been called from a server.
a2907882 283 */
f0f1cbaa 284nfs_send(so, nam, top, rep)
a2907882
KM
285 register struct socket *so;
286 struct mbuf *nam;
f0f1cbaa
KM
287 register struct mbuf *top;
288 struct nfsreq *rep;
a2907882 289{
f0f1cbaa
KM
290 struct mbuf *sendnam;
291 int error, soflags;
a2907882 292
f0f1cbaa
KM
293 if (rep) {
294 if (rep->r_flags & R_SOFTTERM) {
2f08b65a 295 m_freem(top);
f0f1cbaa 296 return (EINTR);
2f08b65a 297 }
5044b7a3 298 if (rep->r_nmp->nm_so == NULL &&
f0f1cbaa
KM
299 (error = nfs_reconnect(rep, rep->r_nmp)))
300 return (error);
301 rep->r_flags &= ~R_MUSTRESEND;
5044b7a3 302 so = rep->r_nmp->nm_so;
f0f1cbaa
KM
303 soflags = rep->r_nmp->nm_soflags;
304 } else
305 soflags = so->so_proto->pr_flags;
306 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
307 sendnam = (struct mbuf *)0;
308 else
309 sendnam = nam;
310
311 error = sosend(so, sendnam, (struct uio *)0, top,
312 (struct mbuf *)0, 0);
313 if (error == EWOULDBLOCK && rep) {
314 if (rep->r_flags & R_SOFTTERM)
315 error = EINTR;
316 else {
317 rep->r_flags |= R_MUSTRESEND;
318 error = 0;
2f08b65a 319 }
a2907882 320 }
f0f1cbaa
KM
321 /*
322 * Ignore socket errors??
323 */
324 if (error && error != EINTR && error != ERESTART)
325 error = 0;
a2907882
KM
326 return (error);
327}
328
329/*
f0f1cbaa
KM
330 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
331 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
332 * Mark and consolidate the data into a new mbuf list.
333 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
334 * small mbufs.
335 * For SOCK_STREAM we must be very careful to read an entire record once
336 * we have read any of it, even if the system call has been interrupted.
a2907882 337 */
f0f1cbaa 338nfs_receive(so, aname, mp, rep)
a2907882
KM
339 register struct socket *so;
340 struct mbuf **aname;
341 struct mbuf **mp;
f0f1cbaa 342 register struct nfsreq *rep;
a2907882 343{
f0f1cbaa
KM
344 struct uio auio;
345 struct iovec aio;
a2907882 346 register struct mbuf *m;
958df9fb 347 struct mbuf *m2, *mnew, **mbp;
f0f1cbaa
KM
348 caddr_t fcp, tcp;
349 u_long len;
350 struct mbuf **getnam;
79993818 351 int error, siz, mlen, soflags, rcvflg;
a2907882 352
f0f1cbaa
KM
353 /*
354 * Set up arguments for soreceive()
355 */
356 *mp = (struct mbuf *)0;
357 *aname = (struct mbuf *)0;
358 if (rep)
359 soflags = rep->r_nmp->nm_soflags;
360 else
361 soflags = so->so_proto->pr_flags;
a2907882 362
f0f1cbaa
KM
363 /*
364 * For reliable protocols, lock against other senders/receivers
365 * in case a reconnect is necessary.
366 * For SOCK_STREAM, first get the Record Mark to find out how much
367 * more there is to get.
368 * We must lock the socket against other receivers
369 * until we have an entire rpc request/reply.
370 */
371 if (soflags & PR_CONNREQUIRED) {
372tryagain:
373 /*
374 * Check for fatal errors and resending request.
375 */
376 if (rep) {
377 /*
378 * Ugh: If a reconnect attempt just happened, nm_so
379 * would have changed. NULL indicates a failed
380 * attempt that has essentially shut down this
381 * mount point.
382 */
383 if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
384 (rep->r_flags & R_SOFTTERM))
385 return (EINTR);
386 while (rep->r_flags & R_MUSTRESEND) {
387 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
388 nfsstats.rpcretries++;
389 if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
390 rep))
391 goto errout;
2f08b65a 392 }
e8540f59 393 }
f0f1cbaa
KM
394 if ((soflags & PR_ATOMIC) == 0) {
395 aio.iov_base = (caddr_t) &len;
396 aio.iov_len = sizeof(u_long);
397 auio.uio_iov = &aio;
398 auio.uio_iovcnt = 1;
399 auio.uio_segflg = UIO_SYSSPACE;
400 auio.uio_rw = UIO_READ;
401 auio.uio_offset = 0;
402 auio.uio_resid = sizeof(u_long);
403 do {
79993818
MK
404 rcvflg = MSG_WAITALL;
405 error = soreceive(so, (struct mbuf **)0, &auio,
f0f1cbaa 406 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
79993818 407 if (error == EWOULDBLOCK && rep) {
f0f1cbaa
KM
408 if (rep->r_flags & R_SOFTTERM)
409 return (EINTR);
410 if (rep->r_flags & R_MUSTRESEND)
411 goto tryagain;
79993818 412 }
f0f1cbaa 413 } while (error == EWOULDBLOCK);
79993818
MK
414 if (!error && auio.uio_resid > 0) {
415 if (rep)
416 log(LOG_INFO,
417 "short receive (%d/%d) from nfs server %s\n",
418 sizeof(u_long) - auio.uio_resid,
419 sizeof(u_long),
420 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
421 error = EPIPE;
422 }
f0f1cbaa
KM
423 if (error)
424 goto errout;
425 len = ntohl(len) & ~0x80000000;
426 /*
427 * This is SERIOUS! We are out of sync with the sender
428 * and forcing a disconnect/reconnect is all I can do.
429 */
430 if (len > NFS_MAXPACKET) {
79993818
MK
431 if (rep)
432 log(LOG_ERR, "%s (%d) from nfs server %s\n",
433 "impossible packet length",
434 len,
435 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
436 error = EFBIG;
437 goto errout;
f0f1cbaa
KM
438 }
439 auio.uio_resid = len;
440 do {
79993818 441 rcvflg = MSG_WAITALL;
f0f1cbaa
KM
442 error = soreceive(so, (struct mbuf **)0,
443 &auio, mp, (struct mbuf **)0, &rcvflg);
444 } while (error == EWOULDBLOCK || error == EINTR ||
445 error == ERESTART);
79993818
MK
446 if (!error && auio.uio_resid > 0) {
447 if (rep)
448 log(LOG_INFO,
449 "short receive (%d/%d) from nfs server %s\n",
450 len - auio.uio_resid, len,
451 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
452 error = EPIPE;
453 }
2f08b65a 454 } else {
f0f1cbaa
KM
455 auio.uio_resid = len = 1000000; /* Anything Big */
456 do {
79993818 457 rcvflg = 0;
f0f1cbaa
KM
458 error = soreceive(so, (struct mbuf **)0,
459 &auio, mp, (struct mbuf **)0, &rcvflg);
460 if (error == EWOULDBLOCK && rep) {
461 if (rep->r_flags & R_SOFTTERM)
462 return (EINTR);
463 if (rep->r_flags & R_MUSTRESEND)
464 goto tryagain;
465 }
466 } while (error == EWOULDBLOCK);
467 if (!error && *mp == NULL)
468 error = EPIPE;
469 len -= auio.uio_resid;
2f08b65a 470 }
f0f1cbaa
KM
471errout:
472 if (error && rep && error != EINTR && error != ERESTART) {
473 m_freem(*mp);
474 *mp = (struct mbuf *)0;
79993818
MK
475 if (error != EPIPE && rep)
476 log(LOG_INFO,
477 "receive error %d from nfs server %s\n",
478 error,
479 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
f0f1cbaa
KM
480 nfs_disconnect(rep->r_nmp);
481 error = nfs_reconnect(rep, rep->r_nmp);
482 if (!error)
483 goto tryagain;
2f08b65a 484 }
f0f1cbaa
KM
485 } else {
486 if (so->so_state & SS_ISCONNECTED)
487 getnam = (struct mbuf **)0;
488 else
489 getnam = aname;
490 auio.uio_resid = len = 1000000;
491 do {
79993818 492 rcvflg = 0;
f0f1cbaa
KM
493 error = soreceive(so, getnam, &auio, mp,
494 (struct mbuf **)0, &rcvflg);
495 if (error == EWOULDBLOCK && rep &&
496 (rep->r_flags & R_SOFTTERM))
497 return (EINTR);
498 } while (error == EWOULDBLOCK);
499 len -= auio.uio_resid;
500 }
501 if (error) {
502 m_freem(*mp);
503 *mp = (struct mbuf *)0;
504 }
505 /*
506 * Search for any mbufs that are not a multiple of 4 bytes long.
507 * These could cause pointer alignment problems, so copy them to
508 * well aligned mbufs.
509 */
510 m = *mp;
511 mbp = mp;
512 while (m) {
513 /*
514 * All this for something that may never happen.
515 */
958df9fb 516 if (m->m_next && (m->m_len & 0x3)) {
f0f1cbaa 517 printf("nfs_rcv odd length!\n");
d4e5799e 518 mlen = 0;
f0f1cbaa 519 while (m) {
958df9fb
KM
520 fcp = mtod(m, caddr_t);
521 while (m->m_len > 0) {
522 if (mlen == 0) {
523 MGET(m2, M_WAIT, MT_DATA);
524 if (len >= MINCLSIZE)
525 MCLGET(m2, M_WAIT);
526 m2->m_len = 0;
527 mlen = M_TRAILINGSPACE(m2);
528 tcp = mtod(m2, caddr_t);
529 *mbp = m2;
530 mbp = &m2->m_next;
531 }
532 siz = MIN(mlen, m->m_len);
533 bcopy(fcp, tcp, siz);
534 m2->m_len += siz;
535 mlen -= siz;
536 len -= siz;
537 tcp += siz;
538 m->m_len -= siz;
539 fcp += siz;
f0f1cbaa 540 }
958df9fb
KM
541 MFREE(m, mnew);
542 m = mnew;
f0f1cbaa 543 }
f0f1cbaa 544 break;
2f08b65a 545 }
f0f1cbaa
KM
546 len -= m->m_len;
547 mbp = &m->m_next;
548 m = m->m_next;
a2907882 549 }
a2907882
KM
550 return (error);
551}
552
a2907882 553/*
f0f1cbaa 554 * Implement receipt of reply on a socket.
a2907882
KM
555 * We must search through the list of received datagrams matching them
556 * with outstanding requests using the xid, until ours is found.
557 */
f0f1cbaa
KM
558/* ARGSUSED */
559nfs_reply(nmp, myrep)
560 struct nfsmount *nmp;
ffe6f482 561 struct nfsreq *myrep;
a2907882
KM
562{
563 register struct mbuf *m;
564 register struct nfsreq *rep;
f0f1cbaa 565 register int error = 0;
958df9fb 566 u_long rxid;
f0f1cbaa
KM
567 struct mbuf *mp, *nam;
568 char *cp;
569 int cnt, xfer;
a2907882
KM
570
571 /*
f0f1cbaa 572 * Loop around until we get our own reply
a2907882 573 */
f0f1cbaa
KM
574 for (;;) {
575 /*
576 * Lock against other receivers so that I don't get stuck in
577 * sbwait() after someone else has received my reply for me.
578 * Also necessary for connection based protocols to avoid
579 * race conditions during a reconnect.
580 */
170bfd05 581 nfs_solock(&nmp->nm_flag);
f0f1cbaa
KM
582 /* Already received, bye bye */
583 if (myrep->r_mrep != NULL) {
584 nfs_sounlock(&nmp->nm_flag);
585 return (0);
586 }
587 /*
588 * Get the next Rpc reply off the socket
589 */
590 if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
591 nfs_sounlock(&nmp->nm_flag);
a2907882 592
f0f1cbaa
KM
593 /*
594 * Ignore routing errors on connectionless protocols??
595 */
596 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
597 nmp->nm_so->so_error = 0;
598 continue;
a2907882 599 }
f0f1cbaa
KM
600
601 /*
602 * Otherwise cleanup and return a fatal error.
603 */
604 if (myrep->r_flags & R_TIMING) {
605 myrep->r_flags &= ~R_TIMING;
606 nmp->nm_rtt = -1;
a2907882 607 }
f0f1cbaa
KM
608 if (myrep->r_flags & R_SENT) {
609 myrep->r_flags &= ~R_SENT;
610 nmp->nm_sent--;
2f08b65a 611 }
f0f1cbaa
KM
612 return (error);
613 }
614
615 /*
616 * Get the xid and check that it is an rpc reply
617 */
618 m = mp;
958df9fb
KM
619 while (m && m->m_len == 0)
620 m = m->m_next;
621 if (m == NULL) {
f0f1cbaa
KM
622 nfsstats.rpcinvalid++;
623 m_freem(mp);
624 nfs_sounlock(&nmp->nm_flag);
625 continue;
626 }
958df9fb 627 bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
f0f1cbaa
KM
628 /*
629 * Loop through the request list to match up the reply
630 * Iff no match, just drop the datagram
631 */
632 m = mp;
633 rep = nfsreqh.r_next;
634 while (rep != &nfsreqh) {
958df9fb 635 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
f0f1cbaa
KM
636 /* Found it.. */
637 rep->r_mrep = m;
638 /*
639 * Update timing
640 */
641 if (rep->r_flags & R_TIMING) {
642 nfs_updatetimer(rep->r_nmp);
643 rep->r_flags &= ~R_TIMING;
644 rep->r_nmp->nm_rtt = -1;
645 }
646 if (rep->r_flags & R_SENT) {
647 rep->r_flags &= ~R_SENT;
648 rep->r_nmp->nm_sent--;
649 }
650 break;
2f08b65a 651 }
f0f1cbaa 652 rep = rep->r_next;
a2907882 653 }
f0f1cbaa
KM
654 nfs_sounlock(&nmp->nm_flag);
655 if (nam)
656 m_freem(nam);
657 /*
658 * If not matched to a request, drop it.
659 * If it's mine, get out.
660 */
661 if (rep == &nfsreqh) {
662 nfsstats.rpcunexpected++;
663 m_freem(m);
664 } else if (rep == myrep)
665 return (0);
a2907882 666 }
a2907882
KM
667}
668
669/*
670 * nfs_request - goes something like this
671 * - fill in request struct
672 * - links it into list
f0f1cbaa
KM
673 * - calls nfs_send() for first transmit
674 * - calls nfs_receive() to get reply
a2907882
KM
675 * - break down rpc header and return with nfs reply pointed to
676 * by mrep or error
677 * nb: always frees up mreq mbuf list
678 */
170bfd05 679nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
a2907882
KM
680 struct vnode *vp;
681 struct mbuf *mreq;
682 u_long xid;
f0f1cbaa
KM
683 int procnum;
684 struct proc *procp;
170bfd05 685 int tryhard;
a2907882
KM
686 struct mount *mp;
687 struct mbuf **mrp;
688 struct mbuf **mdp;
689 caddr_t *dposp;
690{
691 register struct mbuf *m, *mrep;
692 register struct nfsreq *rep;
693 register u_long *p;
694 register int len;
f0f1cbaa 695 struct nfsmount *nmp;
a2907882 696 struct mbuf *md;
ffe6f482 697 struct nfsreq *reph;
a2907882
KM
698 caddr_t dpos;
699 char *cp2;
700 int t1;
958df9fb 701 int s, compressed;
f0f1cbaa 702 int error = 0;
a2907882 703
f0f1cbaa 704 nmp = VFSTONFS(mp);
a2907882
KM
705 m = mreq;
706 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
707 rep->r_xid = xid;
f0f1cbaa 708 rep->r_nmp = nmp;
a2907882 709 rep->r_vp = vp;
f0f1cbaa 710 rep->r_procp = procp;
170bfd05
KM
711 if ((nmp->nm_flag & NFSMNT_SOFT) ||
712 ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
f0f1cbaa 713 rep->r_retry = nmp->nm_retry;
a2907882 714 else
2f08b65a
KM
715 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
716 rep->r_flags = rep->r_rexmit = 0;
f0f1cbaa
KM
717 /*
718 * Three cases:
719 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
720 * - idempotent requests on SOCK_DGRAM use 0
721 * - Reliable transports, NFS_RELIABLETIMEO
722 * Timeouts are still done on reliable transports to ensure detection
170bfd05 723 * of excessive connection delay.
f0f1cbaa
KM
724 */
725 if (nmp->nm_sotype != SOCK_DGRAM)
726 rep->r_timerinit = -NFS_RELIABLETIMEO;
727 else if (nonidempotent[procnum])
728 rep->r_timerinit = -NFS_MINIDEMTIMEO;
729 else
730 rep->r_timerinit = 0;
731 rep->r_timer = rep->r_timerinit;
a2907882 732 rep->r_mrep = NULL;
a2907882
KM
733 len = 0;
734 while (m) {
735 len += m->m_len;
736 m = m->m_next;
737 }
f0f1cbaa
KM
738 mreq->m_pkthdr.len = len;
739 mreq->m_pkthdr.rcvif = (struct ifnet *)0;
958df9fb
KM
740 compressed = 0;
741 m = mreq;
742 if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
743 mreq = nfs_compress(mreq);
744 if (mreq != m) {
745 len = mreq->m_pkthdr.len;
746 compressed++;
747 }
748 }
f0f1cbaa
KM
749 /*
750 * For non-atomic protocols, insert a Sun RPC Record Mark.
751 */
752 if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
753 M_PREPEND(mreq, sizeof(u_long), M_WAIT);
754 *mtod(mreq, u_long *) = htonl(0x80000000 | len);
755 }
756 rep->r_mreq = mreq;
a2907882 757
2f08b65a
KM
758 /*
759 * Do the client side RPC.
760 */
761 nfsstats.rpcrequests++;
f0f1cbaa
KM
762 /*
763 * Chain request into list of outstanding requests. Be sure
764 * to put it LAST so timer finds oldest requests first.
765 */
a2907882 766 s = splnet();
2f08b65a 767 reph = &nfsreqh;
f0f1cbaa
KM
768 reph->r_prev->r_next = rep;
769 rep->r_prev = reph->r_prev;
ffe6f482
KM
770 reph->r_prev = rep;
771 rep->r_next = reph;
2f08b65a
KM
772 /*
773 * If backing off another request or avoiding congestion, don't
774 * send this one now but let timer do it. If not timing a request,
775 * do it now.
776 */
f0f1cbaa
KM
777 if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
778 (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
779 nmp->nm_sent++;
780 rep->r_flags |= R_SENT;
781 if (nmp->nm_rtt == -1) {
782 nmp->nm_rtt = 0;
783 rep->r_flags |= R_TIMING;
784 }
785 splx(s);
786 m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
787 if (nmp->nm_soflags & PR_CONNREQUIRED)
170bfd05 788 nfs_solock(&nmp->nm_flag);
f0f1cbaa
KM
789 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
790 if (nmp->nm_soflags & PR_CONNREQUIRED)
791 nfs_sounlock(&nmp->nm_flag);
792 if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
793 nmp->nm_so->so_error = error = 0;
794 } else
2f08b65a 795 splx(s);
a2907882 796
2f08b65a
KM
797 /*
798 * Wait for the reply from our send or the timer's.
799 */
f0f1cbaa
KM
800 if (!error)
801 error = nfs_reply(nmp, rep);
a2907882 802
2f08b65a
KM
803 /*
804 * RPC done, unlink the request.
805 */
a2907882
KM
806 s = splnet();
807 rep->r_prev->r_next = rep->r_next;
ffe6f482 808 rep->r_next->r_prev = rep->r_prev;
a2907882 809 splx(s);
f0f1cbaa
KM
810
811 /*
812 * If there was a successful reply and a tprintf msg.
813 * tprintf a response.
814 */
79993818
MK
815 if (!error && (rep->r_flags & R_TPRINTFMSG))
816 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
817 "is alive again");
a2907882 818 m_freem(rep->r_mreq);
958df9fb 819 mrep = rep->r_mrep;
a2907882
KM
820 FREE((caddr_t)rep, M_NFSREQ);
821 if (error)
822 return (error);
823
958df9fb
KM
824 if (compressed)
825 mrep = nfs_uncompress(mrep);
826 md = mrep;
a2907882
KM
827 /*
828 * break down the rpc header and check if ok
829 */
830 dpos = mtod(md, caddr_t);
831 nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED);
832 p += 2;
833 if (*p++ == rpc_msgdenied) {
834 if (*p == rpc_mismatch)
835 error = EOPNOTSUPP;
836 else
837 error = EACCES;
838 m_freem(mrep);
839 return (error);
840 }
841 /*
842 * skip over the auth_verf, someday we may want to cache auth_short's
843 * for nfs_reqhead(), but for now just dump it
844 */
845 if (*++p != 0) {
846 len = nfsm_rndup(fxdr_unsigned(long, *p));
847 nfsm_adv(len);
848 }
849 nfsm_disect(p, u_long *, NFSX_UNSIGNED);
850 /* 0 == ok */
851 if (*p == 0) {
852 nfsm_disect(p, u_long *, NFSX_UNSIGNED);
853 if (*p != 0) {
854 error = fxdr_unsigned(int, *p);
855 m_freem(mrep);
856 return (error);
857 }
858 *mrp = mrep;
859 *mdp = md;
860 *dposp = dpos;
861 return (0);
862 }
863 m_freem(mrep);
864 return (EPROTONOSUPPORT);
865nfsmout:
866 return (error);
867}
868
869/*
870 * Get a request for the server main loop
871 * - receive a request via. nfs_soreceive()
872 * - verify it
873 * - fill in the cred struct.
874 */
d4e5799e 875nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
958df9fb 876 msk, mtch, wascomp)
a2907882
KM
877 struct socket *so;
878 u_long prog;
879 u_long vers;
880 int maxproc;
881 struct mbuf **nam;
882 struct mbuf **mrp;
883 struct mbuf **mdp;
884 caddr_t *dposp;
885 u_long *retxid;
d4e5799e 886 u_long *procnum;
a2907882 887 register struct ucred *cr;
f0f1cbaa 888 struct mbuf *msk, *mtch;
958df9fb 889 int *wascomp;
a2907882
KM
890{
891 register int i;
0bd503ad
KM
892 register u_long *p;
893 register long t1;
894 caddr_t dpos, cp2;
895 int error = 0;
896 struct mbuf *mrep, *md;
897 int len;
a2907882 898
f0f1cbaa 899 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
f0f1cbaa 900 error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
f0f1cbaa
KM
901 } else {
902 mrep = (struct mbuf *)0;
903 do {
904 if (mrep) {
905 m_freem(*nam);
906 m_freem(mrep);
907 }
908 error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
909 } while (!error && nfs_badnam(*nam, msk, mtch));
910 }
911 if (error)
a2907882
KM
912 return (error);
913 md = mrep;
958df9fb
KM
914 mrep = nfs_uncompress(mrep);
915 if (mrep != md) {
916 *wascomp = 1;
917 md = mrep;
918 } else
919 *wascomp = 0;
a2907882
KM
920 dpos = mtod(mrep, caddr_t);
921 nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED);
922 *retxid = *p++;
923 if (*p++ != rpc_call) {
924 m_freem(mrep);
925 return (ERPCMISMATCH);
926 }
927 if (*p++ != rpc_vers) {
928 m_freem(mrep);
929 return (ERPCMISMATCH);
930 }
931 if (*p++ != prog) {
932 m_freem(mrep);
933 return (EPROGUNAVAIL);
934 }
935 if (*p++ != vers) {
936 m_freem(mrep);
937 return (EPROGMISMATCH);
938 }
d4e5799e
KM
939 *procnum = fxdr_unsigned(u_long, *p++);
940 if (*procnum == NFSPROC_NULL) {
a2907882
KM
941 *mrp = mrep;
942 return (0);
943 }
d4e5799e 944 if (*procnum > maxproc || *p++ != rpc_auth_unix) {
a2907882
KM
945 m_freem(mrep);
946 return (EPROCUNAVAIL);
947 }
f0f1cbaa
KM
948 len = fxdr_unsigned(int, *p++);
949 if (len < 0 || len > RPCAUTH_MAXSIZ) {
950 m_freem(mrep);
951 return (EBADRPC);
952 }
0bd503ad 953 len = fxdr_unsigned(int, *++p);
f0f1cbaa
KM
954 if (len < 0 || len > NFS_MAXNAMLEN) {
955 m_freem(mrep);
956 return (EBADRPC);
957 }
0bd503ad 958 nfsm_adv(nfsm_rndup(len));
a2907882
KM
959 nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED);
960 cr->cr_uid = fxdr_unsigned(uid_t, *p++);
961 cr->cr_gid = fxdr_unsigned(gid_t, *p++);
0bd503ad 962 len = fxdr_unsigned(int, *p);
f0f1cbaa 963 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
a2907882
KM
964 m_freem(mrep);
965 return (EBADRPC);
966 }
0bd503ad
KM
967 nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED);
968 for (i = 1; i <= len; i++)
f0f1cbaa
KM
969 if (i < NGROUPS)
970 cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++);
971 else
972 p++;
973 cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
a2907882
KM
974 /*
975 * Do we have any use for the verifier.
976 * According to the "Remote Procedure Call Protocol Spec." it
977 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
978 * For now, just skip over it
979 */
0bd503ad 980 len = fxdr_unsigned(int, *++p);
f0f1cbaa
KM
981 if (len < 0 || len > RPCAUTH_MAXSIZ) {
982 m_freem(mrep);
983 return (EBADRPC);
984 }
0bd503ad
KM
985 if (len > 0)
986 nfsm_adv(nfsm_rndup(len));
a2907882
KM
987 *mrp = mrep;
988 *mdp = md;
989 *dposp = dpos;
990 return (0);
991nfsmout:
992 return (error);
993}
994
995/*
996 * Generate the rpc reply header
997 * siz arg. is used to decide if adding a cluster is worthwhile
998 */
999nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
1000 int siz;
1001 u_long retxid;
1002 int err;
1003 struct mbuf **mrq;
1004 struct mbuf **mbp;
1005 caddr_t *bposp;
1006{
0bd503ad
KM
1007 register u_long *p;
1008 register long t1;
1009 caddr_t bpos;
1010 struct mbuf *mreq, *mb, *mb2;
a2907882
KM
1011
1012 NFSMGETHDR(mreq);
1013 mb = mreq;
1014 if ((siz+RPC_REPLYSIZ) > MHLEN)
f0f1cbaa 1015 MCLGET(mreq, M_WAIT);
a2907882
KM
1016 p = mtod(mreq, u_long *);
1017 mreq->m_len = 6*NFSX_UNSIGNED;
1018 bpos = ((caddr_t)p)+mreq->m_len;
1019 *p++ = retxid;
1020 *p++ = rpc_reply;
1021 if (err == ERPCMISMATCH) {
1022 *p++ = rpc_msgdenied;
1023 *p++ = rpc_mismatch;
1024 *p++ = txdr_unsigned(2);
1025 *p = txdr_unsigned(2);
1026 } else {
1027 *p++ = rpc_msgaccepted;
1028 *p++ = 0;
1029 *p++ = 0;
1030 switch (err) {
1031 case EPROGUNAVAIL:
1032 *p = txdr_unsigned(RPC_PROGUNAVAIL);
1033 break;
1034 case EPROGMISMATCH:
1035 *p = txdr_unsigned(RPC_PROGMISMATCH);
1036 nfsm_build(p, u_long *, 2*NFSX_UNSIGNED);
1037 *p++ = txdr_unsigned(2);
1038 *p = txdr_unsigned(2); /* someday 3 */
1039 break;
1040 case EPROCUNAVAIL:
1041 *p = txdr_unsigned(RPC_PROCUNAVAIL);
1042 break;
1043 default:
1044 *p = 0;
1045 if (err != VNOVAL) {
1046 nfsm_build(p, u_long *, NFSX_UNSIGNED);
1047 *p = txdr_unsigned(err);
1048 }
1049 break;
1050 };
1051 }
1052 *mrq = mreq;
1053 *mbp = mb;
1054 *bposp = bpos;
1055 if (err != 0 && err != VNOVAL)
1056 nfsstats.srvrpc_errs++;
1057 return (0);
1058}
1059
1060/*
1061 * Nfs timer routine
1062 * Scan the nfsreq list and retranmit any requests that have timed out
1063 * To avoid retransmission attempts on STREAM sockets (in the future) make
2f08b65a 1064 * sure to set the r_retry field to 0 (implies nm_retry == 0).
a2907882
KM
1065 */
1066nfs_timer()
1067{
1068 register struct nfsreq *rep;
1069 register struct mbuf *m;
1070 register struct socket *so;
f0f1cbaa 1071 register struct nfsmount *nmp;
2f08b65a 1072 int s, error;
a2907882
KM
1073
1074 s = splnet();
f0f1cbaa
KM
1075 for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
1076 nmp = rep->r_nmp;
1077 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
1078 (so = nmp->nm_so) == NULL)
1079 continue;
1080 if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
1081 rep->r_flags |= R_SOFTTERM;
1082 continue;
1083 }
2f08b65a 1084 if (rep->r_flags & R_TIMING) /* update rtt in mount */
f0f1cbaa 1085 nmp->nm_rtt++;
f0f1cbaa
KM
1086 /* If not timed out */
1087 if (++rep->r_timer < nmp->nm_rto)
2f08b65a
KM
1088 continue;
1089 /* Do backoff and save new timeout in mount */
1090 if (rep->r_flags & R_TIMING) {
f0f1cbaa 1091 nfs_backofftimer(nmp);
2f08b65a 1092 rep->r_flags &= ~R_TIMING;
f0f1cbaa 1093 nmp->nm_rtt = -1;
2f08b65a
KM
1094 }
1095 if (rep->r_flags & R_SENT) {
1096 rep->r_flags &= ~R_SENT;
f0f1cbaa 1097 nmp->nm_sent--;
2f08b65a 1098 }
f0f1cbaa
KM
1099
1100 /*
1101 * Check for too many retries on soft mount.
1102 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
1103 */
1104 if (++rep->r_rexmit > NFS_MAXREXMIT)
2f08b65a 1105 rep->r_rexmit = NFS_MAXREXMIT;
2f08b65a 1106
f0f1cbaa
KM
1107 /*
1108 * Check for server not responding
1109 */
1110 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
170bfd05 1111 rep->r_rexmit > NFS_FISHY) {
79993818
MK
1112 nfs_msg(rep->r_procp,
1113 nmp->nm_mountp->mnt_stat.f_mntfromname,
1114 "not responding");
f0f1cbaa
KM
1115 rep->r_flags |= R_TPRINTFMSG;
1116 }
170bfd05 1117 if (rep->r_rexmit >= rep->r_retry) { /* too many */
f0f1cbaa
KM
1118 nfsstats.rpctimeouts++;
1119 rep->r_flags |= R_SOFTTERM;
1120 continue;
1121 }
170bfd05
KM
1122 if (nmp->nm_sotype != SOCK_DGRAM)
1123 continue;
f0f1cbaa
KM
1124
1125 /*
1126 * If there is enough space and the window allows..
1127 * Resend it
1128 */
1129 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1130 nmp->nm_sent < nmp->nm_window &&
1131 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1132 nfsstats.rpcretries++;
1133 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1134 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1135 (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
1136 else
1137 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1138 nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
1139 if (error) {
1140 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1141 so->so_error = 0;
1142 } else {
1143 /*
1144 * We need to time the request even though we
1145 * are retransmitting.
1146 */
1147 nmp->nm_rtt = 0;
1148 nmp->nm_sent++;
1149 rep->r_flags |= (R_SENT|R_TIMING);
1150 rep->r_timer = rep->r_timerinit;
1151 }
1152 }
2f08b65a
KM
1153 }
1154 splx(s);
1155 timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
1156}
1157
1158/*
1159 * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
1160 * used here. The timer state is held in the nfsmount structure and
1161 * a single request is used to clock the response. When successful
1162 * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
1163 * is done by nfs_backofftimer. We also log failure messages in these
1164 * routines.
1165 *
1166 * Congestion variables are held in the nfshost structure which
1167 * is referenced by nfsmounts and shared per-server. This separation
1168 * makes it possible to do per-mount timing which allows varying disk
1169 * access times to be dealt with, while preserving a network oriented
1170 * congestion control scheme.
1171 *
1172 * The windowing implements the Jacobson/Karels slowstart algorithm
1173 * with adjusted scaling factors. We start with one request, then send
1174 * 4 more after each success until the ssthresh limit is reached, then
1175 * we increment at a rate proportional to the window. On failure, we
1176 * remember 3/4 the current window and clamp the send limit to 1. Note
1177 * ICMP source quench is not reflected in so->so_error so we ignore that
1178 * for now.
1179 *
1180 * NFS behaves much more like a transport protocol with these changes,
1181 * shedding the teenage pedal-to-the-metal tendencies of "other"
1182 * implementations.
1183 *
1184 * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
1185 */
1186
1187/*
1188 * The TCP algorithm was not forgiving enough. Because the NFS server
1189 * responds only after performing lookups/diskio/etc, we have to be
1190 * more prepared to accept a spiky variance. The TCP algorithm is:
f0f1cbaa 1191 * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
2f08b65a 1192 */
f0f1cbaa 1193#define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
2f08b65a 1194
f0f1cbaa
KM
1195nfs_updatetimer(nmp)
1196 register struct nfsmount *nmp;
2f08b65a 1197{
2f08b65a
KM
1198
1199 /* If retransmitted, clear and return */
f0f1cbaa
KM
1200 if (nmp->nm_rexmit || nmp->nm_currexmit) {
1201 nmp->nm_rexmit = nmp->nm_currexmit = 0;
2f08b65a
KM
1202 return;
1203 }
1204 /* If have a measurement, do smoothing */
f0f1cbaa 1205 if (nmp->nm_srtt) {
2f08b65a 1206 register short delta;
f0f1cbaa
KM
1207 delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
1208 if ((nmp->nm_srtt += delta) <= 0)
1209 nmp->nm_srtt = 1;
2f08b65a
KM
1210 if (delta < 0)
1211 delta = -delta;
f0f1cbaa
KM
1212 delta -= (nmp->nm_rttvar >> 2);
1213 if ((nmp->nm_rttvar += delta) <= 0)
1214 nmp->nm_rttvar = 1;
2f08b65a
KM
1215 /* Else initialize */
1216 } else {
f0f1cbaa
KM
1217 nmp->nm_rttvar = nmp->nm_rtt << 1;
1218 if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
1219 nmp->nm_srtt = nmp->nm_rttvar << 2;
2f08b65a
KM
1220 }
1221 /* Compute new Retransmission TimeOut and clip */
f0f1cbaa
KM
1222 nmp->nm_rto = NFS_RTO(nmp);
1223 if (nmp->nm_rto < NFS_MINTIMEO)
1224 nmp->nm_rto = NFS_MINTIMEO;
1225 else if (nmp->nm_rto > NFS_MAXTIMEO)
1226 nmp->nm_rto = NFS_MAXTIMEO;
2f08b65a
KM
1227
1228 /* Update window estimate */
f0f1cbaa
KM
1229 if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */
1230 nmp->nm_window += 4;
2f08b65a 1231 else { /* slowly */
f0f1cbaa
KM
1232 register long incr = ++nmp->nm_winext;
1233 incr = (incr * incr) / nmp->nm_window;
2f08b65a 1234 if (incr > 0) {
f0f1cbaa
KM
1235 nmp->nm_winext = 0;
1236 ++nmp->nm_window;
2f08b65a
KM
1237 }
1238 }
f0f1cbaa
KM
1239 if (nmp->nm_window > NFS_MAXWINDOW)
1240 nmp->nm_window = NFS_MAXWINDOW;
2f08b65a
KM
1241}
1242
f0f1cbaa
KM
1243nfs_backofftimer(nmp)
1244 register struct nfsmount *nmp;
2f08b65a 1245{
2f08b65a
KM
1246 register unsigned long newrto;
1247
1248 /* Clip shift count */
f0f1cbaa
KM
1249 if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
1250 nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
2f08b65a 1251 /* Back off RTO exponentially */
f0f1cbaa
KM
1252 newrto = NFS_RTO(nmp);
1253 newrto <<= (nmp->nm_rexmit - 1);
2f08b65a
KM
1254 if (newrto == 0 || newrto > NFS_MAXTIMEO)
1255 newrto = NFS_MAXTIMEO;
f0f1cbaa 1256 nmp->nm_rto = newrto;
2f08b65a
KM
1257
1258 /* If too many retries, message, assume a bogus RTT and re-measure */
f0f1cbaa
KM
1259 if (nmp->nm_currexmit < nmp->nm_rexmit) {
1260 nmp->nm_currexmit = nmp->nm_rexmit;
1261 if (nmp->nm_currexmit >= nfsrexmtthresh) {
1262 if (nmp->nm_currexmit == nfsrexmtthresh) {
1263 nmp->nm_rttvar += (nmp->nm_srtt >> 2);
1264 nmp->nm_srtt = 0;
a2907882
KM
1265 }
1266 }
a2907882 1267 }
2f08b65a 1268 /* Close down window but remember this point (3/4 current) for later */
f0f1cbaa
KM
1269 nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
1270 nmp->nm_window = 1;
1271 nmp->nm_winext = 0;
a2907882
KM
1272}
1273
1274/*
f0f1cbaa
KM
1275 * Test for a termination signal pending on procp.
1276 * This is used for NFSMNT_INT mounts.
a2907882 1277 */
f0f1cbaa
KM
1278nfs_sigintr(p)
1279 register struct proc *p;
1280{
1281 if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
1282 NFSINT_SIGMASK))
1283 return (1);
1284 else
1285 return (0);
1286}
2f08b65a 1287
79993818
MK
1288nfs_msg(p, server, msg)
1289 struct proc *p;
1290 char *server, *msg;
1291{
1292 tpr_t tpr;
1293
1294 if (p)
1295 tpr = tprintf_open(p);
1296 else
1297 tpr = NULL;
1298 tprintf(tpr, "nfs server %s: %s\n", server, msg);
1299 tprintf_close(tpr);
1300}
1301
f0f1cbaa
KM
1302/*
1303 * Lock a socket against others.
1304 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1305 * and also to avoid race conditions between the processes with nfs requests
1306 * in progress when a reconnect is necessary.
1307 */
170bfd05
KM
1308nfs_solock(flagp)
1309 register int *flagp;
a2907882 1310{
2f08b65a 1311
f0f1cbaa
KM
1312 while (*flagp & NFSMNT_SCKLOCK) {
1313 *flagp |= NFSMNT_WANTSCK;
170bfd05 1314 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
2f08b65a 1315 }
f0f1cbaa
KM
1316 *flagp |= NFSMNT_SCKLOCK;
1317}
2f08b65a 1318
f0f1cbaa
KM
1319/*
1320 * Unlock the stream socket for others.
1321 */
1322nfs_sounlock(flagp)
170bfd05 1323 register int *flagp;
f0f1cbaa
KM
1324{
1325
1326 if ((*flagp & NFSMNT_SCKLOCK) == 0)
1327 panic("nfs sounlock");
1328 *flagp &= ~NFSMNT_SCKLOCK;
1329 if (*flagp & NFSMNT_WANTSCK) {
1330 *flagp &= ~NFSMNT_WANTSCK;
1331 wakeup((caddr_t)flagp);
2f08b65a 1332 }
f0f1cbaa
KM
1333}
1334
1335/*
1336 * This function compares two net addresses by family and returns TRUE
1337 * if they are the same.
1338 * If there is any doubt, return FALSE.
1339 */
1340nfs_netaddr_match(nam1, nam2)
1341 struct mbuf *nam1, *nam2;
1342{
1343 register struct sockaddr *saddr1, *saddr2;
1344
1345 saddr1 = mtod(nam1, struct sockaddr *);
1346 saddr2 = mtod(nam2, struct sockaddr *);
1347 if (saddr1->sa_family != saddr2->sa_family)
1348 return (0);
1349
1350 /*
1351 * Must do each address family separately since unused fields
1352 * are undefined values and not always zeroed.
1353 */
1354 switch (saddr1->sa_family) {
1355 case AF_INET:
1356 if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
1357 ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
1358 return (1);
1359 break;
1360 default:
1361 break;
1362 };
1363 return (0);
1364}
1365
1366/*
1367 * Check the hostname fields for nfsd's mask and match fields.
1368 * By address family:
1369 * - Bitwise AND the mask with the host address field
1370 * - Compare for == with match
1371 * return TRUE if not equal
1372 */
1373nfs_badnam(nam, msk, mtch)
1374 register struct mbuf *nam, *msk, *mtch;
1375{
1376 switch (mtod(nam, struct sockaddr *)->sa_family) {
1377 case AF_INET:
1378 return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
1379 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
1380 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
1381 default:
1382 printf("nfs_badmatch, unknown sa_family\n");
1383 return (0);
1384 };
a2907882 1385}