breada generalizes to breadn
[unix-history] / usr / src / sys / nfs / nfs_socket.c
CommitLineData
a2907882 1/*
f777974b 2 * Copyright (c) 1989, 1991 The Regents of the University of California.
a2907882
KM
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
dbf0c423 8 * %sccs.include.redist.c%
a2907882 9 *
3e005f29 10 * @(#)nfs_socket.c 7.23 (Berkeley) %G%
a2907882
KM
11 */
12
13/*
f0f1cbaa 14 * Socket operations for use by nfs
a2907882
KM
15 */
16
a2907882 17#include "param.h"
2f08b65a 18#include "proc.h"
a2907882
KM
19#include "mount.h"
20#include "kernel.h"
21#include "malloc.h"
22#include "mbuf.h"
206e686b 23#include "namei.h"
a2907882
KM
24#include "vnode.h"
25#include "domain.h"
26#include "protosw.h"
27#include "socket.h"
28#include "socketvar.h"
f777974b 29#include "syslog.h"
79993818 30#include "tprintf.h"
37ced908
KM
31#include "../netinet/in.h"
32#include "../netinet/tcp.h"
f777974b 33
a2907882
KM
34#include "rpcv2.h"
35#include "nfsv2.h"
36#include "nfs.h"
37#include "xdr_subs.h"
38#include "nfsm_subs.h"
39#include "nfsmount.h"
40
41#define TRUE 1
170bfd05 42#define FALSE 0
a2907882 43
a2907882
KM
44/*
45 * External data, mostly RPC constants in XDR form
46 */
47extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
48 rpc_msgaccepted, rpc_call;
49extern u_long nfs_prog, nfs_vers;
170bfd05 50/* Maybe these should be bits in a u_long ?? */
f0f1cbaa 51extern int nonidempotent[NFS_NPROCS];
958df9fb
KM
52static int compressrequest[NFS_NPROCS] = {
53 FALSE,
54 TRUE,
55 TRUE,
56 FALSE,
57 TRUE,
58 TRUE,
59 TRUE,
60 FALSE,
61 FALSE,
62 TRUE,
63 TRUE,
64 TRUE,
65 TRUE,
66 TRUE,
67 TRUE,
68 TRUE,
69 TRUE,
70 TRUE,
71};
f0f1cbaa
KM
72int nfs_sbwait();
73void nfs_disconnect();
958df9fb 74struct mbuf *nfs_compress(), *nfs_uncompress();
f0f1cbaa 75
a2907882
KM
76int nfsrv_null(),
77 nfsrv_getattr(),
78 nfsrv_setattr(),
79 nfsrv_lookup(),
80 nfsrv_readlink(),
81 nfsrv_read(),
82 nfsrv_write(),
83 nfsrv_create(),
84 nfsrv_remove(),
85 nfsrv_rename(),
86 nfsrv_link(),
87 nfsrv_symlink(),
88 nfsrv_mkdir(),
89 nfsrv_rmdir(),
90 nfsrv_readdir(),
91 nfsrv_statfs(),
92 nfsrv_noop();
93
94int (*nfsrv_procs[NFS_NPROCS])() = {
95 nfsrv_null,
96 nfsrv_getattr,
97 nfsrv_setattr,
98 nfsrv_noop,
99 nfsrv_lookup,
100 nfsrv_readlink,
101 nfsrv_read,
102 nfsrv_noop,
103 nfsrv_write,
104 nfsrv_create,
105 nfsrv_remove,
106 nfsrv_rename,
107 nfsrv_link,
108 nfsrv_symlink,
109 nfsrv_mkdir,
110 nfsrv_rmdir,
111 nfsrv_readdir,
112 nfsrv_statfs,
113};
114
2f08b65a
KM
115struct nfsreq nfsreqh;
116int nfsrexmtthresh = NFS_FISHY;
f0f1cbaa 117int nfs_tcpnodelay = 1;
2f08b65a
KM
118
119/*
f0f1cbaa 120 * Initialize sockets and congestion for a new NFS connection.
2f08b65a
KM
121 * We do not free the sockaddr if error.
122 */
f0f1cbaa 123nfs_connect(nmp)
2f08b65a 124 register struct nfsmount *nmp;
2f08b65a 125{
f0f1cbaa 126 register struct socket *so;
3e005f29 127 int s, error, bufsize;
2f08b65a 128 struct mbuf *m;
2f08b65a 129
f0f1cbaa
KM
130 nmp->nm_so = (struct socket *)0;
131 if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family,
132 &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
2f08b65a 133 goto bad;
f0f1cbaa
KM
134 so = nmp->nm_so;
135 nmp->nm_soflags = so->so_proto->pr_flags;
2f08b65a 136
3e005f29
MK
137 if (nmp->nm_sotype == SOCK_DGRAM)
138 bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR),
139 NFS_MAXPACKET);
140 else
141 bufsize = min(4 * (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long)),
142 NFS_MAXPACKET + sizeof(u_long));
143 if (error = soreserve(so, bufsize, bufsize))
144 goto bad;
145
f0f1cbaa
KM
146 /*
147 * Protocols that do not require connections may be optionally left
148 * unconnected for servers that reply from a port other than NFS_PORT.
149 */
150 if (nmp->nm_flag & NFSMNT_NOCONN) {
151 if (nmp->nm_soflags & PR_CONNREQUIRED) {
152 error = ENOTCONN;
2f08b65a
KM
153 goto bad;
154 }
f0f1cbaa
KM
155 } else {
156 if (error = soconnect(so, nmp->nm_nam))
2f08b65a 157 goto bad;
f0f1cbaa
KM
158
159 /*
160 * Wait for the connection to complete. Cribbed from the
161 * connect system call but with the wait at negative prio.
162 */
163 s = splnet();
164 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
170bfd05 165 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
f0f1cbaa
KM
166 splx(s);
167 if (so->so_error) {
168 error = so->so_error;
169 goto bad;
170 }
2f08b65a 171 }
f0f1cbaa 172 if (nmp->nm_sotype == SOCK_DGRAM) {
170bfd05 173 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
f0f1cbaa
KM
174 so->so_rcv.sb_timeo = (5 * hz);
175 so->so_snd.sb_timeo = (5 * hz);
176 } else {
177 so->so_rcv.sb_timeo = 0;
178 so->so_snd.sb_timeo = 0;
179 }
3e005f29 180 nmp->nm_rto = NFS_TIMEO;
f0f1cbaa 181 } else {
170bfd05 182 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
f0f1cbaa
KM
183 so->so_rcv.sb_timeo = (5 * hz);
184 so->so_snd.sb_timeo = (5 * hz);
185 } else {
186 so->so_rcv.sb_timeo = 0;
187 so->so_snd.sb_timeo = 0;
188 }
189 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
190 MGET(m, M_WAIT, MT_SOOPTS);
191 *mtod(m, int *) = 1;
192 m->m_len = sizeof(int);
193 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
194 }
195 if (so->so_proto->pr_domain->dom_family == AF_INET &&
196 so->so_proto->pr_protocol == IPPROTO_TCP &&
197 nfs_tcpnodelay) {
198 MGET(m, M_WAIT, MT_SOOPTS);
199 *mtod(m, int *) = 1;
200 m->m_len = sizeof(int);
201 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
202 }
3e005f29 203 nmp->nm_rto = 10 * NFS_TIMEO; /* XXX */
f0f1cbaa
KM
204 }
205 so->so_rcv.sb_flags |= SB_NOINTR;
206 so->so_snd.sb_flags |= SB_NOINTR;
2f08b65a 207
f0f1cbaa 208 /* Initialize other non-zero congestion variables */
3e005f29
MK
209 nmp->nm_window = 2; /* Initial send window */
210 nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */
f0f1cbaa
KM
211 nmp->nm_rttvar = nmp->nm_rto << 1;
212 nmp->nm_sent = 0;
213 nmp->nm_currexmit = 0;
214 return (0);
2f08b65a 215
f0f1cbaa
KM
216bad:
217 nfs_disconnect(nmp);
218 return (error);
219}
2f08b65a 220
f0f1cbaa
KM
221/*
222 * Reconnect routine:
223 * Called when a connection is broken on a reliable protocol.
224 * - clean up the old socket
225 * - nfs_connect() again
226 * - set R_MUSTRESEND for all outstanding requests on mount point
227 * If this fails the mount point is DEAD!
228 * nb: Must be called with the nfs_solock() set on the mount point.
229 */
230nfs_reconnect(rep, nmp)
231 register struct nfsreq *rep;
232 register struct nfsmount *nmp;
233{
234 register struct nfsreq *rp;
f0f1cbaa 235 int error;
2f08b65a 236
79993818
MK
237 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
238 "trying reconnect");
f0f1cbaa 239 while (error = nfs_connect(nmp)) {
d4e5799e
KM
240#ifdef lint
241 error = error;
242#endif /* lint */
f0f1cbaa
KM
243 if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
244 return (EINTR);
170bfd05 245 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
2f08b65a 246 }
79993818
MK
247 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
248 "reconnected");
f0f1cbaa
KM
249
250 /*
251 * Loop through outstanding request list and fix up all requests
252 * on old socket.
253 */
254 rp = nfsreqh.r_next;
255 while (rp != &nfsreqh) {
256 if (rp->r_nmp == nmp)
257 rp->r_flags |= R_MUSTRESEND;
258 rp = rp->r_next;
2f08b65a
KM
259 }
260 return (0);
2f08b65a
KM
261}
262
263/*
264 * NFS disconnect. Clean up and unlink.
265 */
f0f1cbaa 266void
2f08b65a
KM
267nfs_disconnect(nmp)
268 register struct nfsmount *nmp;
269{
f0f1cbaa 270 register struct socket *so;
2f08b65a 271
f0f1cbaa
KM
272 if (nmp->nm_so) {
273 so = nmp->nm_so;
274 nmp->nm_so = (struct socket *)0;
275 soshutdown(so, 2);
276 soclose(so);
2f08b65a
KM
277 }
278}
a2907882
KM
279
280/*
f0f1cbaa
KM
281 * This is the nfs send routine. For connection based socket types, it
282 * must be called with an nfs_solock() on the socket.
283 * "rep == NULL" indicates that it has been called from a server.
a2907882 284 */
f0f1cbaa 285nfs_send(so, nam, top, rep)
a2907882
KM
286 register struct socket *so;
287 struct mbuf *nam;
f0f1cbaa
KM
288 register struct mbuf *top;
289 struct nfsreq *rep;
a2907882 290{
f0f1cbaa
KM
291 struct mbuf *sendnam;
292 int error, soflags;
a2907882 293
f0f1cbaa
KM
294 if (rep) {
295 if (rep->r_flags & R_SOFTTERM) {
2f08b65a 296 m_freem(top);
f0f1cbaa 297 return (EINTR);
2f08b65a 298 }
5044b7a3 299 if (rep->r_nmp->nm_so == NULL &&
f0f1cbaa
KM
300 (error = nfs_reconnect(rep, rep->r_nmp)))
301 return (error);
302 rep->r_flags &= ~R_MUSTRESEND;
5044b7a3 303 so = rep->r_nmp->nm_so;
f0f1cbaa
KM
304 soflags = rep->r_nmp->nm_soflags;
305 } else
306 soflags = so->so_proto->pr_flags;
307 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
308 sendnam = (struct mbuf *)0;
309 else
310 sendnam = nam;
311
312 error = sosend(so, sendnam, (struct uio *)0, top,
313 (struct mbuf *)0, 0);
314 if (error == EWOULDBLOCK && rep) {
315 if (rep->r_flags & R_SOFTTERM)
316 error = EINTR;
317 else {
318 rep->r_flags |= R_MUSTRESEND;
319 error = 0;
2f08b65a 320 }
a2907882 321 }
f0f1cbaa
KM
322 /*
323 * Ignore socket errors??
324 */
325 if (error && error != EINTR && error != ERESTART)
326 error = 0;
a2907882
KM
327 return (error);
328}
329
330/*
f0f1cbaa
KM
331 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
332 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
333 * Mark and consolidate the data into a new mbuf list.
334 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
335 * small mbufs.
336 * For SOCK_STREAM we must be very careful to read an entire record once
337 * we have read any of it, even if the system call has been interrupted.
a2907882 338 */
f0f1cbaa 339nfs_receive(so, aname, mp, rep)
a2907882
KM
340 register struct socket *so;
341 struct mbuf **aname;
342 struct mbuf **mp;
f0f1cbaa 343 register struct nfsreq *rep;
a2907882 344{
f0f1cbaa
KM
345 struct uio auio;
346 struct iovec aio;
a2907882 347 register struct mbuf *m;
958df9fb 348 struct mbuf *m2, *mnew, **mbp;
f0f1cbaa
KM
349 caddr_t fcp, tcp;
350 u_long len;
351 struct mbuf **getnam;
79993818 352 int error, siz, mlen, soflags, rcvflg;
a2907882 353
f0f1cbaa
KM
354 /*
355 * Set up arguments for soreceive()
356 */
357 *mp = (struct mbuf *)0;
358 *aname = (struct mbuf *)0;
359 if (rep)
360 soflags = rep->r_nmp->nm_soflags;
361 else
362 soflags = so->so_proto->pr_flags;
a2907882 363
f0f1cbaa
KM
364 /*
365 * For reliable protocols, lock against other senders/receivers
366 * in case a reconnect is necessary.
367 * For SOCK_STREAM, first get the Record Mark to find out how much
368 * more there is to get.
369 * We must lock the socket against other receivers
370 * until we have an entire rpc request/reply.
371 */
372 if (soflags & PR_CONNREQUIRED) {
373tryagain:
374 /*
375 * Check for fatal errors and resending request.
376 */
377 if (rep) {
378 /*
379 * Ugh: If a reconnect attempt just happened, nm_so
380 * would have changed. NULL indicates a failed
381 * attempt that has essentially shut down this
382 * mount point.
383 */
384 if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
385 (rep->r_flags & R_SOFTTERM))
386 return (EINTR);
387 while (rep->r_flags & R_MUSTRESEND) {
388 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
389 nfsstats.rpcretries++;
390 if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
391 rep))
392 goto errout;
2f08b65a 393 }
e8540f59 394 }
f0f1cbaa
KM
395 if ((soflags & PR_ATOMIC) == 0) {
396 aio.iov_base = (caddr_t) &len;
397 aio.iov_len = sizeof(u_long);
398 auio.uio_iov = &aio;
399 auio.uio_iovcnt = 1;
400 auio.uio_segflg = UIO_SYSSPACE;
401 auio.uio_rw = UIO_READ;
206e686b 402 auio.uio_procp = (struct proc *)0;
f0f1cbaa
KM
403 auio.uio_offset = 0;
404 auio.uio_resid = sizeof(u_long);
405 do {
79993818
MK
406 rcvflg = MSG_WAITALL;
407 error = soreceive(so, (struct mbuf **)0, &auio,
f0f1cbaa 408 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
79993818 409 if (error == EWOULDBLOCK && rep) {
f0f1cbaa
KM
410 if (rep->r_flags & R_SOFTTERM)
411 return (EINTR);
412 if (rep->r_flags & R_MUSTRESEND)
413 goto tryagain;
79993818 414 }
f0f1cbaa 415 } while (error == EWOULDBLOCK);
79993818
MK
416 if (!error && auio.uio_resid > 0) {
417 if (rep)
418 log(LOG_INFO,
419 "short receive (%d/%d) from nfs server %s\n",
420 sizeof(u_long) - auio.uio_resid,
421 sizeof(u_long),
422 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
423 error = EPIPE;
424 }
f0f1cbaa
KM
425 if (error)
426 goto errout;
427 len = ntohl(len) & ~0x80000000;
428 /*
429 * This is SERIOUS! We are out of sync with the sender
430 * and forcing a disconnect/reconnect is all I can do.
431 */
432 if (len > NFS_MAXPACKET) {
79993818
MK
433 if (rep)
434 log(LOG_ERR, "%s (%d) from nfs server %s\n",
435 "impossible packet length",
436 len,
437 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
438 error = EFBIG;
439 goto errout;
f0f1cbaa
KM
440 }
441 auio.uio_resid = len;
442 do {
79993818 443 rcvflg = MSG_WAITALL;
f0f1cbaa
KM
444 error = soreceive(so, (struct mbuf **)0,
445 &auio, mp, (struct mbuf **)0, &rcvflg);
446 } while (error == EWOULDBLOCK || error == EINTR ||
447 error == ERESTART);
79993818
MK
448 if (!error && auio.uio_resid > 0) {
449 if (rep)
450 log(LOG_INFO,
451 "short receive (%d/%d) from nfs server %s\n",
452 len - auio.uio_resid, len,
453 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
454 error = EPIPE;
455 }
2f08b65a 456 } else {
f0f1cbaa
KM
457 auio.uio_resid = len = 1000000; /* Anything Big */
458 do {
79993818 459 rcvflg = 0;
f0f1cbaa
KM
460 error = soreceive(so, (struct mbuf **)0,
461 &auio, mp, (struct mbuf **)0, &rcvflg);
462 if (error == EWOULDBLOCK && rep) {
463 if (rep->r_flags & R_SOFTTERM)
464 return (EINTR);
465 if (rep->r_flags & R_MUSTRESEND)
466 goto tryagain;
467 }
468 } while (error == EWOULDBLOCK);
469 if (!error && *mp == NULL)
470 error = EPIPE;
471 len -= auio.uio_resid;
2f08b65a 472 }
f0f1cbaa
KM
473errout:
474 if (error && rep && error != EINTR && error != ERESTART) {
475 m_freem(*mp);
476 *mp = (struct mbuf *)0;
79993818
MK
477 if (error != EPIPE && rep)
478 log(LOG_INFO,
479 "receive error %d from nfs server %s\n",
480 error,
481 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
f0f1cbaa
KM
482 nfs_disconnect(rep->r_nmp);
483 error = nfs_reconnect(rep, rep->r_nmp);
484 if (!error)
485 goto tryagain;
2f08b65a 486 }
f0f1cbaa
KM
487 } else {
488 if (so->so_state & SS_ISCONNECTED)
489 getnam = (struct mbuf **)0;
490 else
491 getnam = aname;
492 auio.uio_resid = len = 1000000;
493 do {
79993818 494 rcvflg = 0;
f0f1cbaa
KM
495 error = soreceive(so, getnam, &auio, mp,
496 (struct mbuf **)0, &rcvflg);
497 if (error == EWOULDBLOCK && rep &&
498 (rep->r_flags & R_SOFTTERM))
499 return (EINTR);
500 } while (error == EWOULDBLOCK);
501 len -= auio.uio_resid;
502 }
503 if (error) {
504 m_freem(*mp);
505 *mp = (struct mbuf *)0;
506 }
507 /*
508 * Search for any mbufs that are not a multiple of 4 bytes long.
509 * These could cause pointer alignment problems, so copy them to
510 * well aligned mbufs.
511 */
512 m = *mp;
513 mbp = mp;
514 while (m) {
515 /*
516 * All this for something that may never happen.
517 */
958df9fb 518 if (m->m_next && (m->m_len & 0x3)) {
f0f1cbaa 519 printf("nfs_rcv odd length!\n");
d4e5799e 520 mlen = 0;
f0f1cbaa 521 while (m) {
958df9fb
KM
522 fcp = mtod(m, caddr_t);
523 while (m->m_len > 0) {
524 if (mlen == 0) {
525 MGET(m2, M_WAIT, MT_DATA);
526 if (len >= MINCLSIZE)
527 MCLGET(m2, M_WAIT);
528 m2->m_len = 0;
529 mlen = M_TRAILINGSPACE(m2);
530 tcp = mtod(m2, caddr_t);
531 *mbp = m2;
532 mbp = &m2->m_next;
533 }
534 siz = MIN(mlen, m->m_len);
535 bcopy(fcp, tcp, siz);
536 m2->m_len += siz;
537 mlen -= siz;
538 len -= siz;
539 tcp += siz;
540 m->m_len -= siz;
541 fcp += siz;
f0f1cbaa 542 }
958df9fb
KM
543 MFREE(m, mnew);
544 m = mnew;
f0f1cbaa 545 }
f0f1cbaa 546 break;
2f08b65a 547 }
f0f1cbaa
KM
548 len -= m->m_len;
549 mbp = &m->m_next;
550 m = m->m_next;
a2907882 551 }
a2907882
KM
552 return (error);
553}
554
a2907882 555/*
f0f1cbaa 556 * Implement receipt of reply on a socket.
a2907882
KM
557 * We must search through the list of received datagrams matching them
558 * with outstanding requests using the xid, until ours is found.
559 */
f0f1cbaa
KM
560/* ARGSUSED */
561nfs_reply(nmp, myrep)
562 struct nfsmount *nmp;
ffe6f482 563 struct nfsreq *myrep;
a2907882
KM
564{
565 register struct mbuf *m;
566 register struct nfsreq *rep;
f0f1cbaa 567 register int error = 0;
958df9fb 568 u_long rxid;
f0f1cbaa
KM
569 struct mbuf *mp, *nam;
570 char *cp;
571 int cnt, xfer;
a2907882
KM
572
573 /*
f0f1cbaa 574 * Loop around until we get our own reply
a2907882 575 */
f0f1cbaa
KM
576 for (;;) {
577 /*
578 * Lock against other receivers so that I don't get stuck in
579 * sbwait() after someone else has received my reply for me.
580 * Also necessary for connection based protocols to avoid
581 * race conditions during a reconnect.
582 */
170bfd05 583 nfs_solock(&nmp->nm_flag);
f0f1cbaa
KM
584 /* Already received, bye bye */
585 if (myrep->r_mrep != NULL) {
586 nfs_sounlock(&nmp->nm_flag);
587 return (0);
588 }
589 /*
590 * Get the next Rpc reply off the socket
591 */
592 if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
593 nfs_sounlock(&nmp->nm_flag);
a2907882 594
f0f1cbaa
KM
595 /*
596 * Ignore routing errors on connectionless protocols??
597 */
598 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
599 nmp->nm_so->so_error = 0;
600 continue;
a2907882 601 }
f0f1cbaa
KM
602
603 /*
604 * Otherwise cleanup and return a fatal error.
605 */
606 if (myrep->r_flags & R_TIMING) {
607 myrep->r_flags &= ~R_TIMING;
608 nmp->nm_rtt = -1;
a2907882 609 }
f0f1cbaa
KM
610 if (myrep->r_flags & R_SENT) {
611 myrep->r_flags &= ~R_SENT;
612 nmp->nm_sent--;
2f08b65a 613 }
f0f1cbaa
KM
614 return (error);
615 }
616
617 /*
618 * Get the xid and check that it is an rpc reply
619 */
620 m = mp;
958df9fb
KM
621 while (m && m->m_len == 0)
622 m = m->m_next;
623 if (m == NULL) {
f0f1cbaa
KM
624 nfsstats.rpcinvalid++;
625 m_freem(mp);
626 nfs_sounlock(&nmp->nm_flag);
627 continue;
628 }
958df9fb 629 bcopy(mtod(m, caddr_t), (caddr_t)&rxid, NFSX_UNSIGNED);
f0f1cbaa
KM
630 /*
631 * Loop through the request list to match up the reply
632 * Iff no match, just drop the datagram
633 */
634 m = mp;
635 rep = nfsreqh.r_next;
636 while (rep != &nfsreqh) {
958df9fb 637 if (rep->r_mrep == NULL && rxid == rep->r_xid) {
f0f1cbaa
KM
638 /* Found it.. */
639 rep->r_mrep = m;
640 /*
641 * Update timing
642 */
643 if (rep->r_flags & R_TIMING) {
644 nfs_updatetimer(rep->r_nmp);
645 rep->r_flags &= ~R_TIMING;
646 rep->r_nmp->nm_rtt = -1;
647 }
648 if (rep->r_flags & R_SENT) {
649 rep->r_flags &= ~R_SENT;
650 rep->r_nmp->nm_sent--;
651 }
652 break;
2f08b65a 653 }
f0f1cbaa 654 rep = rep->r_next;
a2907882 655 }
f0f1cbaa
KM
656 nfs_sounlock(&nmp->nm_flag);
657 if (nam)
658 m_freem(nam);
659 /*
660 * If not matched to a request, drop it.
661 * If it's mine, get out.
662 */
663 if (rep == &nfsreqh) {
664 nfsstats.rpcunexpected++;
665 m_freem(m);
666 } else if (rep == myrep)
667 return (0);
a2907882 668 }
a2907882
KM
669}
670
671/*
672 * nfs_request - goes something like this
673 * - fill in request struct
674 * - links it into list
f0f1cbaa
KM
675 * - calls nfs_send() for first transmit
676 * - calls nfs_receive() to get reply
a2907882
KM
677 * - break down rpc header and return with nfs reply pointed to
678 * by mrep or error
679 * nb: always frees up mreq mbuf list
680 */
170bfd05 681nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
a2907882
KM
682 struct vnode *vp;
683 struct mbuf *mreq;
684 u_long xid;
f0f1cbaa
KM
685 int procnum;
686 struct proc *procp;
170bfd05 687 int tryhard;
a2907882
KM
688 struct mount *mp;
689 struct mbuf **mrp;
690 struct mbuf **mdp;
691 caddr_t *dposp;
692{
693 register struct mbuf *m, *mrep;
694 register struct nfsreq *rep;
206e686b 695 register u_long *tl;
a2907882 696 register int len;
f0f1cbaa 697 struct nfsmount *nmp;
a2907882 698 struct mbuf *md;
ffe6f482 699 struct nfsreq *reph;
a2907882
KM
700 caddr_t dpos;
701 char *cp2;
702 int t1;
958df9fb 703 int s, compressed;
f0f1cbaa 704 int error = 0;
a2907882 705
f0f1cbaa 706 nmp = VFSTONFS(mp);
a2907882
KM
707 m = mreq;
708 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
709 rep->r_xid = xid;
f0f1cbaa 710 rep->r_nmp = nmp;
a2907882 711 rep->r_vp = vp;
f0f1cbaa 712 rep->r_procp = procp;
170bfd05
KM
713 if ((nmp->nm_flag & NFSMNT_SOFT) ||
714 ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
f0f1cbaa 715 rep->r_retry = nmp->nm_retry;
a2907882 716 else
2f08b65a
KM
717 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
718 rep->r_flags = rep->r_rexmit = 0;
f0f1cbaa
KM
719 /*
720 * Three cases:
721 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
722 * - idempotent requests on SOCK_DGRAM use 0
723 * - Reliable transports, NFS_RELIABLETIMEO
724 * Timeouts are still done on reliable transports to ensure detection
170bfd05 725 * of excessive connection delay.
f0f1cbaa
KM
726 */
727 if (nmp->nm_sotype != SOCK_DGRAM)
728 rep->r_timerinit = -NFS_RELIABLETIMEO;
729 else if (nonidempotent[procnum])
730 rep->r_timerinit = -NFS_MINIDEMTIMEO;
731 else
732 rep->r_timerinit = 0;
733 rep->r_timer = rep->r_timerinit;
a2907882 734 rep->r_mrep = NULL;
a2907882
KM
735 len = 0;
736 while (m) {
737 len += m->m_len;
738 m = m->m_next;
739 }
f0f1cbaa
KM
740 mreq->m_pkthdr.len = len;
741 mreq->m_pkthdr.rcvif = (struct ifnet *)0;
958df9fb
KM
742 compressed = 0;
743 m = mreq;
744 if ((nmp->nm_flag & NFSMNT_COMPRESS) && compressrequest[procnum]) {
745 mreq = nfs_compress(mreq);
746 if (mreq != m) {
747 len = mreq->m_pkthdr.len;
748 compressed++;
749 }
750 }
f0f1cbaa
KM
751 /*
752 * For non-atomic protocols, insert a Sun RPC Record Mark.
753 */
754 if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
755 M_PREPEND(mreq, sizeof(u_long), M_WAIT);
756 *mtod(mreq, u_long *) = htonl(0x80000000 | len);
757 }
758 rep->r_mreq = mreq;
a2907882 759
2f08b65a
KM
760 /*
761 * Do the client side RPC.
762 */
763 nfsstats.rpcrequests++;
f0f1cbaa
KM
764 /*
765 * Chain request into list of outstanding requests. Be sure
766 * to put it LAST so timer finds oldest requests first.
767 */
a2907882 768 s = splnet();
2f08b65a 769 reph = &nfsreqh;
f0f1cbaa
KM
770 reph->r_prev->r_next = rep;
771 rep->r_prev = reph->r_prev;
ffe6f482
KM
772 reph->r_prev = rep;
773 rep->r_next = reph;
2f08b65a
KM
774 /*
775 * If backing off another request or avoiding congestion, don't
776 * send this one now but let timer do it. If not timing a request,
777 * do it now.
778 */
f0f1cbaa
KM
779 if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
780 (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
781 nmp->nm_sent++;
782 rep->r_flags |= R_SENT;
783 if (nmp->nm_rtt == -1) {
784 nmp->nm_rtt = 0;
785 rep->r_flags |= R_TIMING;
786 }
787 splx(s);
788 m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
789 if (nmp->nm_soflags & PR_CONNREQUIRED)
170bfd05 790 nfs_solock(&nmp->nm_flag);
f0f1cbaa
KM
791 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
792 if (nmp->nm_soflags & PR_CONNREQUIRED)
793 nfs_sounlock(&nmp->nm_flag);
794 if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
795 nmp->nm_so->so_error = error = 0;
796 } else
2f08b65a 797 splx(s);
a2907882 798
2f08b65a
KM
799 /*
800 * Wait for the reply from our send or the timer's.
801 */
f0f1cbaa
KM
802 if (!error)
803 error = nfs_reply(nmp, rep);
a2907882 804
2f08b65a
KM
805 /*
806 * RPC done, unlink the request.
807 */
a2907882
KM
808 s = splnet();
809 rep->r_prev->r_next = rep->r_next;
ffe6f482 810 rep->r_next->r_prev = rep->r_prev;
a2907882 811 splx(s);
f0f1cbaa
KM
812
813 /*
814 * If there was a successful reply and a tprintf msg.
815 * tprintf a response.
816 */
79993818
MK
817 if (!error && (rep->r_flags & R_TPRINTFMSG))
818 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
819 "is alive again");
a2907882 820 m_freem(rep->r_mreq);
958df9fb 821 mrep = rep->r_mrep;
a2907882
KM
822 FREE((caddr_t)rep, M_NFSREQ);
823 if (error)
824 return (error);
825
958df9fb
KM
826 if (compressed)
827 mrep = nfs_uncompress(mrep);
828 md = mrep;
a2907882
KM
829 /*
830 * break down the rpc header and check if ok
831 */
832 dpos = mtod(md, caddr_t);
206e686b
KM
833 nfsm_disect(tl, u_long *, 5*NFSX_UNSIGNED);
834 tl += 2;
835 if (*tl++ == rpc_msgdenied) {
836 if (*tl == rpc_mismatch)
a2907882
KM
837 error = EOPNOTSUPP;
838 else
839 error = EACCES;
840 m_freem(mrep);
841 return (error);
842 }
843 /*
844 * skip over the auth_verf, someday we may want to cache auth_short's
845 * for nfs_reqhead(), but for now just dump it
846 */
206e686b
KM
847 if (*++tl != 0) {
848 len = nfsm_rndup(fxdr_unsigned(long, *tl));
a2907882
KM
849 nfsm_adv(len);
850 }
206e686b 851 nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
a2907882 852 /* 0 == ok */
206e686b
KM
853 if (*tl == 0) {
854 nfsm_disect(tl, u_long *, NFSX_UNSIGNED);
855 if (*tl != 0) {
856 error = fxdr_unsigned(int, *tl);
a2907882
KM
857 m_freem(mrep);
858 return (error);
859 }
860 *mrp = mrep;
861 *mdp = md;
862 *dposp = dpos;
863 return (0);
864 }
865 m_freem(mrep);
866 return (EPROTONOSUPPORT);
867nfsmout:
868 return (error);
869}
870
871/*
872 * Get a request for the server main loop
873 * - receive a request via. nfs_soreceive()
874 * - verify it
875 * - fill in the cred struct.
876 */
d4e5799e 877nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
958df9fb 878 msk, mtch, wascomp)
a2907882
KM
879 struct socket *so;
880 u_long prog;
881 u_long vers;
882 int maxproc;
883 struct mbuf **nam;
884 struct mbuf **mrp;
885 struct mbuf **mdp;
886 caddr_t *dposp;
887 u_long *retxid;
d4e5799e 888 u_long *procnum;
a2907882 889 register struct ucred *cr;
f0f1cbaa 890 struct mbuf *msk, *mtch;
958df9fb 891 int *wascomp;
a2907882
KM
892{
893 register int i;
206e686b 894 register u_long *tl;
0bd503ad
KM
895 register long t1;
896 caddr_t dpos, cp2;
897 int error = 0;
898 struct mbuf *mrep, *md;
899 int len;
a2907882 900
f0f1cbaa 901 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
f0f1cbaa 902 error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
f0f1cbaa
KM
903 } else {
904 mrep = (struct mbuf *)0;
905 do {
906 if (mrep) {
907 m_freem(*nam);
908 m_freem(mrep);
909 }
910 error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
911 } while (!error && nfs_badnam(*nam, msk, mtch));
912 }
913 if (error)
a2907882
KM
914 return (error);
915 md = mrep;
958df9fb
KM
916 mrep = nfs_uncompress(mrep);
917 if (mrep != md) {
918 *wascomp = 1;
919 md = mrep;
920 } else
921 *wascomp = 0;
a2907882 922 dpos = mtod(mrep, caddr_t);
206e686b
KM
923 nfsm_disect(tl, u_long *, 10*NFSX_UNSIGNED);
924 *retxid = *tl++;
925 if (*tl++ != rpc_call) {
a2907882
KM
926 m_freem(mrep);
927 return (ERPCMISMATCH);
928 }
206e686b 929 if (*tl++ != rpc_vers) {
a2907882
KM
930 m_freem(mrep);
931 return (ERPCMISMATCH);
932 }
206e686b 933 if (*tl++ != prog) {
a2907882
KM
934 m_freem(mrep);
935 return (EPROGUNAVAIL);
936 }
206e686b 937 if (*tl++ != vers) {
a2907882
KM
938 m_freem(mrep);
939 return (EPROGMISMATCH);
940 }
206e686b 941 *procnum = fxdr_unsigned(u_long, *tl++);
d4e5799e 942 if (*procnum == NFSPROC_NULL) {
a2907882
KM
943 *mrp = mrep;
944 return (0);
945 }
206e686b 946 if (*procnum > maxproc || *tl++ != rpc_auth_unix) {
a2907882
KM
947 m_freem(mrep);
948 return (EPROCUNAVAIL);
949 }
206e686b 950 len = fxdr_unsigned(int, *tl++);
f0f1cbaa
KM
951 if (len < 0 || len > RPCAUTH_MAXSIZ) {
952 m_freem(mrep);
953 return (EBADRPC);
954 }
206e686b 955 len = fxdr_unsigned(int, *++tl);
f0f1cbaa
KM
956 if (len < 0 || len > NFS_MAXNAMLEN) {
957 m_freem(mrep);
958 return (EBADRPC);
959 }
0bd503ad 960 nfsm_adv(nfsm_rndup(len));
206e686b
KM
961 nfsm_disect(tl, u_long *, 3*NFSX_UNSIGNED);
962 cr->cr_uid = fxdr_unsigned(uid_t, *tl++);
963 cr->cr_gid = fxdr_unsigned(gid_t, *tl++);
964 len = fxdr_unsigned(int, *tl);
f0f1cbaa 965 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
a2907882
KM
966 m_freem(mrep);
967 return (EBADRPC);
968 }
206e686b 969 nfsm_disect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
0bd503ad 970 for (i = 1; i <= len; i++)
f0f1cbaa 971 if (i < NGROUPS)
206e686b 972 cr->cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
f0f1cbaa 973 else
206e686b 974 tl++;
f0f1cbaa 975 cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
a2907882
KM
976 /*
977 * Do we have any use for the verifier.
978 * According to the "Remote Procedure Call Protocol Spec." it
979 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
980 * For now, just skip over it
981 */
206e686b 982 len = fxdr_unsigned(int, *++tl);
f0f1cbaa
KM
983 if (len < 0 || len > RPCAUTH_MAXSIZ) {
984 m_freem(mrep);
985 return (EBADRPC);
986 }
0bd503ad
KM
987 if (len > 0)
988 nfsm_adv(nfsm_rndup(len));
a2907882
KM
989 *mrp = mrep;
990 *mdp = md;
991 *dposp = dpos;
992 return (0);
993nfsmout:
994 return (error);
995}
996
997/*
998 * Generate the rpc reply header
999 * siz arg. is used to decide if adding a cluster is worthwhile
1000 */
1001nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
1002 int siz;
1003 u_long retxid;
1004 int err;
1005 struct mbuf **mrq;
1006 struct mbuf **mbp;
1007 caddr_t *bposp;
1008{
206e686b 1009 register u_long *tl;
0bd503ad
KM
1010 register long t1;
1011 caddr_t bpos;
1012 struct mbuf *mreq, *mb, *mb2;
a2907882
KM
1013
1014 NFSMGETHDR(mreq);
1015 mb = mreq;
1016 if ((siz+RPC_REPLYSIZ) > MHLEN)
f0f1cbaa 1017 MCLGET(mreq, M_WAIT);
206e686b 1018 tl = mtod(mreq, u_long *);
a2907882 1019 mreq->m_len = 6*NFSX_UNSIGNED;
206e686b
KM
1020 bpos = ((caddr_t)tl)+mreq->m_len;
1021 *tl++ = retxid;
1022 *tl++ = rpc_reply;
a2907882 1023 if (err == ERPCMISMATCH) {
206e686b
KM
1024 *tl++ = rpc_msgdenied;
1025 *tl++ = rpc_mismatch;
1026 *tl++ = txdr_unsigned(2);
1027 *tl = txdr_unsigned(2);
a2907882 1028 } else {
206e686b
KM
1029 *tl++ = rpc_msgaccepted;
1030 *tl++ = 0;
1031 *tl++ = 0;
a2907882
KM
1032 switch (err) {
1033 case EPROGUNAVAIL:
206e686b 1034 *tl = txdr_unsigned(RPC_PROGUNAVAIL);
a2907882
KM
1035 break;
1036 case EPROGMISMATCH:
206e686b
KM
1037 *tl = txdr_unsigned(RPC_PROGMISMATCH);
1038 nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
1039 *tl++ = txdr_unsigned(2);
1040 *tl = txdr_unsigned(2); /* someday 3 */
a2907882
KM
1041 break;
1042 case EPROCUNAVAIL:
206e686b 1043 *tl = txdr_unsigned(RPC_PROCUNAVAIL);
a2907882
KM
1044 break;
1045 default:
206e686b 1046 *tl = 0;
a2907882 1047 if (err != VNOVAL) {
206e686b
KM
1048 nfsm_build(tl, u_long *, NFSX_UNSIGNED);
1049 *tl = txdr_unsigned(err);
a2907882
KM
1050 }
1051 break;
1052 };
1053 }
1054 *mrq = mreq;
1055 *mbp = mb;
1056 *bposp = bpos;
1057 if (err != 0 && err != VNOVAL)
1058 nfsstats.srvrpc_errs++;
1059 return (0);
1060}
1061
1062/*
1063 * Nfs timer routine
1064 * Scan the nfsreq list and retranmit any requests that have timed out
1065 * To avoid retransmission attempts on STREAM sockets (in the future) make
2f08b65a 1066 * sure to set the r_retry field to 0 (implies nm_retry == 0).
a2907882
KM
1067 */
1068nfs_timer()
1069{
1070 register struct nfsreq *rep;
1071 register struct mbuf *m;
1072 register struct socket *so;
f0f1cbaa 1073 register struct nfsmount *nmp;
2f08b65a 1074 int s, error;
a2907882
KM
1075
1076 s = splnet();
f0f1cbaa
KM
1077 for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
1078 nmp = rep->r_nmp;
1079 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
1080 (so = nmp->nm_so) == NULL)
1081 continue;
1082 if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
1083 rep->r_flags |= R_SOFTTERM;
1084 continue;
1085 }
2f08b65a 1086 if (rep->r_flags & R_TIMING) /* update rtt in mount */
f0f1cbaa 1087 nmp->nm_rtt++;
f0f1cbaa
KM
1088 /* If not timed out */
1089 if (++rep->r_timer < nmp->nm_rto)
2f08b65a
KM
1090 continue;
1091 /* Do backoff and save new timeout in mount */
1092 if (rep->r_flags & R_TIMING) {
f0f1cbaa 1093 nfs_backofftimer(nmp);
2f08b65a 1094 rep->r_flags &= ~R_TIMING;
f0f1cbaa 1095 nmp->nm_rtt = -1;
2f08b65a
KM
1096 }
1097 if (rep->r_flags & R_SENT) {
1098 rep->r_flags &= ~R_SENT;
f0f1cbaa 1099 nmp->nm_sent--;
2f08b65a 1100 }
f0f1cbaa
KM
1101
1102 /*
1103 * Check for too many retries on soft mount.
1104 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
1105 */
1106 if (++rep->r_rexmit > NFS_MAXREXMIT)
2f08b65a 1107 rep->r_rexmit = NFS_MAXREXMIT;
2f08b65a 1108
f0f1cbaa
KM
1109 /*
1110 * Check for server not responding
1111 */
1112 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
170bfd05 1113 rep->r_rexmit > NFS_FISHY) {
79993818
MK
1114 nfs_msg(rep->r_procp,
1115 nmp->nm_mountp->mnt_stat.f_mntfromname,
1116 "not responding");
f0f1cbaa
KM
1117 rep->r_flags |= R_TPRINTFMSG;
1118 }
170bfd05 1119 if (rep->r_rexmit >= rep->r_retry) { /* too many */
f0f1cbaa
KM
1120 nfsstats.rpctimeouts++;
1121 rep->r_flags |= R_SOFTTERM;
1122 continue;
1123 }
170bfd05
KM
1124 if (nmp->nm_sotype != SOCK_DGRAM)
1125 continue;
f0f1cbaa
KM
1126
1127 /*
1128 * If there is enough space and the window allows..
1129 * Resend it
1130 */
1131 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1132 nmp->nm_sent < nmp->nm_window &&
1133 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1134 nfsstats.rpcretries++;
1135 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1136 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1137 (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
1138 else
1139 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1140 nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
1141 if (error) {
1142 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1143 so->so_error = 0;
1144 } else {
1145 /*
1146 * We need to time the request even though we
1147 * are retransmitting.
1148 */
1149 nmp->nm_rtt = 0;
1150 nmp->nm_sent++;
1151 rep->r_flags |= (R_SENT|R_TIMING);
1152 rep->r_timer = rep->r_timerinit;
1153 }
1154 }
2f08b65a
KM
1155 }
1156 splx(s);
1157 timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
1158}
1159
1160/*
1161 * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
1162 * used here. The timer state is held in the nfsmount structure and
1163 * a single request is used to clock the response. When successful
1164 * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
1165 * is done by nfs_backofftimer. We also log failure messages in these
1166 * routines.
1167 *
1168 * Congestion variables are held in the nfshost structure which
1169 * is referenced by nfsmounts and shared per-server. This separation
1170 * makes it possible to do per-mount timing which allows varying disk
1171 * access times to be dealt with, while preserving a network oriented
1172 * congestion control scheme.
1173 *
1174 * The windowing implements the Jacobson/Karels slowstart algorithm
1175 * with adjusted scaling factors. We start with one request, then send
1176 * 4 more after each success until the ssthresh limit is reached, then
1177 * we increment at a rate proportional to the window. On failure, we
1178 * remember 3/4 the current window and clamp the send limit to 1. Note
1179 * ICMP source quench is not reflected in so->so_error so we ignore that
1180 * for now.
1181 *
1182 * NFS behaves much more like a transport protocol with these changes,
1183 * shedding the teenage pedal-to-the-metal tendencies of "other"
1184 * implementations.
1185 *
1186 * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
1187 */
1188
1189/*
1190 * The TCP algorithm was not forgiving enough. Because the NFS server
1191 * responds only after performing lookups/diskio/etc, we have to be
1192 * more prepared to accept a spiky variance. The TCP algorithm is:
f0f1cbaa 1193 * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
2f08b65a 1194 */
f0f1cbaa 1195#define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
2f08b65a 1196
f0f1cbaa
KM
1197nfs_updatetimer(nmp)
1198 register struct nfsmount *nmp;
2f08b65a 1199{
2f08b65a
KM
1200
1201 /* If retransmitted, clear and return */
f0f1cbaa
KM
1202 if (nmp->nm_rexmit || nmp->nm_currexmit) {
1203 nmp->nm_rexmit = nmp->nm_currexmit = 0;
2f08b65a
KM
1204 return;
1205 }
1206 /* If have a measurement, do smoothing */
f0f1cbaa 1207 if (nmp->nm_srtt) {
2f08b65a 1208 register short delta;
f0f1cbaa
KM
1209 delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
1210 if ((nmp->nm_srtt += delta) <= 0)
1211 nmp->nm_srtt = 1;
2f08b65a
KM
1212 if (delta < 0)
1213 delta = -delta;
f0f1cbaa
KM
1214 delta -= (nmp->nm_rttvar >> 2);
1215 if ((nmp->nm_rttvar += delta) <= 0)
1216 nmp->nm_rttvar = 1;
2f08b65a
KM
1217 /* Else initialize */
1218 } else {
f0f1cbaa
KM
1219 nmp->nm_rttvar = nmp->nm_rtt << 1;
1220 if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
1221 nmp->nm_srtt = nmp->nm_rttvar << 2;
2f08b65a
KM
1222 }
1223 /* Compute new Retransmission TimeOut and clip */
f0f1cbaa
KM
1224 nmp->nm_rto = NFS_RTO(nmp);
1225 if (nmp->nm_rto < NFS_MINTIMEO)
1226 nmp->nm_rto = NFS_MINTIMEO;
1227 else if (nmp->nm_rto > NFS_MAXTIMEO)
1228 nmp->nm_rto = NFS_MAXTIMEO;
2f08b65a
KM
1229
1230 /* Update window estimate */
f0f1cbaa
KM
1231 if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */
1232 nmp->nm_window += 4;
2f08b65a 1233 else { /* slowly */
f0f1cbaa
KM
1234 register long incr = ++nmp->nm_winext;
1235 incr = (incr * incr) / nmp->nm_window;
2f08b65a 1236 if (incr > 0) {
f0f1cbaa
KM
1237 nmp->nm_winext = 0;
1238 ++nmp->nm_window;
2f08b65a
KM
1239 }
1240 }
f0f1cbaa
KM
1241 if (nmp->nm_window > NFS_MAXWINDOW)
1242 nmp->nm_window = NFS_MAXWINDOW;
2f08b65a
KM
1243}
1244
f0f1cbaa
KM
1245nfs_backofftimer(nmp)
1246 register struct nfsmount *nmp;
2f08b65a 1247{
2f08b65a
KM
1248 register unsigned long newrto;
1249
1250 /* Clip shift count */
f0f1cbaa
KM
1251 if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
1252 nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
2f08b65a 1253 /* Back off RTO exponentially */
f0f1cbaa
KM
1254 newrto = NFS_RTO(nmp);
1255 newrto <<= (nmp->nm_rexmit - 1);
2f08b65a
KM
1256 if (newrto == 0 || newrto > NFS_MAXTIMEO)
1257 newrto = NFS_MAXTIMEO;
f0f1cbaa 1258 nmp->nm_rto = newrto;
2f08b65a
KM
1259
1260 /* If too many retries, message, assume a bogus RTT and re-measure */
f0f1cbaa
KM
1261 if (nmp->nm_currexmit < nmp->nm_rexmit) {
1262 nmp->nm_currexmit = nmp->nm_rexmit;
1263 if (nmp->nm_currexmit >= nfsrexmtthresh) {
1264 if (nmp->nm_currexmit == nfsrexmtthresh) {
1265 nmp->nm_rttvar += (nmp->nm_srtt >> 2);
1266 nmp->nm_srtt = 0;
a2907882
KM
1267 }
1268 }
a2907882 1269 }
2f08b65a 1270 /* Close down window but remember this point (3/4 current) for later */
f0f1cbaa
KM
1271 nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
1272 nmp->nm_window = 1;
1273 nmp->nm_winext = 0;
a2907882
KM
1274}
1275
1276/*
f0f1cbaa
KM
1277 * Test for a termination signal pending on procp.
1278 * This is used for NFSMNT_INT mounts.
a2907882 1279 */
f0f1cbaa
KM
1280nfs_sigintr(p)
1281 register struct proc *p;
1282{
1283 if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
1284 NFSINT_SIGMASK))
1285 return (1);
1286 else
1287 return (0);
1288}
2f08b65a 1289
79993818
MK
1290nfs_msg(p, server, msg)
1291 struct proc *p;
1292 char *server, *msg;
1293{
1294 tpr_t tpr;
1295
1296 if (p)
1297 tpr = tprintf_open(p);
1298 else
1299 tpr = NULL;
1300 tprintf(tpr, "nfs server %s: %s\n", server, msg);
1301 tprintf_close(tpr);
1302}
1303
f0f1cbaa
KM
1304/*
1305 * Lock a socket against others.
1306 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1307 * and also to avoid race conditions between the processes with nfs requests
1308 * in progress when a reconnect is necessary.
1309 */
170bfd05
KM
1310nfs_solock(flagp)
1311 register int *flagp;
a2907882 1312{
2f08b65a 1313
f0f1cbaa
KM
1314 while (*flagp & NFSMNT_SCKLOCK) {
1315 *flagp |= NFSMNT_WANTSCK;
170bfd05 1316 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
2f08b65a 1317 }
f0f1cbaa
KM
1318 *flagp |= NFSMNT_SCKLOCK;
1319}
2f08b65a 1320
f0f1cbaa
KM
1321/*
1322 * Unlock the stream socket for others.
1323 */
1324nfs_sounlock(flagp)
170bfd05 1325 register int *flagp;
f0f1cbaa
KM
1326{
1327
1328 if ((*flagp & NFSMNT_SCKLOCK) == 0)
1329 panic("nfs sounlock");
1330 *flagp &= ~NFSMNT_SCKLOCK;
1331 if (*flagp & NFSMNT_WANTSCK) {
1332 *flagp &= ~NFSMNT_WANTSCK;
1333 wakeup((caddr_t)flagp);
2f08b65a 1334 }
f0f1cbaa
KM
1335}
1336
1337/*
1338 * This function compares two net addresses by family and returns TRUE
1339 * if they are the same.
1340 * If there is any doubt, return FALSE.
1341 */
1342nfs_netaddr_match(nam1, nam2)
1343 struct mbuf *nam1, *nam2;
1344{
1345 register struct sockaddr *saddr1, *saddr2;
1346
1347 saddr1 = mtod(nam1, struct sockaddr *);
1348 saddr2 = mtod(nam2, struct sockaddr *);
1349 if (saddr1->sa_family != saddr2->sa_family)
1350 return (0);
1351
1352 /*
1353 * Must do each address family separately since unused fields
1354 * are undefined values and not always zeroed.
1355 */
1356 switch (saddr1->sa_family) {
1357 case AF_INET:
1358 if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
1359 ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
1360 return (1);
1361 break;
1362 default:
1363 break;
1364 };
1365 return (0);
1366}
1367
1368/*
1369 * Check the hostname fields for nfsd's mask and match fields.
1370 * By address family:
1371 * - Bitwise AND the mask with the host address field
1372 * - Compare for == with match
1373 * return TRUE if not equal
1374 */
1375nfs_badnam(nam, msk, mtch)
1376 register struct mbuf *nam, *msk, *mtch;
1377{
1378 switch (mtod(nam, struct sockaddr *)->sa_family) {
1379 case AF_INET:
1380 return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
1381 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
1382 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
1383 default:
1384 printf("nfs_badmatch, unknown sa_family\n");
1385 return (0);
1386 };
a2907882 1387}