reset optind before getopt() -- fixes 'make -k clean' failure
[unix-history] / usr / src / sys / nfs / nfs_socket.c
CommitLineData
a2907882
KM
1/*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms are permitted
9 * provided that the above copyright notice and this paragraph are
10 * duplicated in all such forms and that any documentation,
11 * advertising materials, and other materials related to such
12 * distribution and use acknowledge that the software was developed
13 * by the University of California, Berkeley. The name of the
14 * University may not be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19 *
170bfd05 20 * @(#)nfs_socket.c 7.17 (Berkeley) %G%
a2907882
KM
21 */
22
23/*
f0f1cbaa 24 * Socket operations for use by nfs
a2907882
KM
25 */
26
27#include "types.h"
28#include "param.h"
29#include "uio.h"
30#include "user.h"
2f08b65a
KM
31#include "proc.h"
32#include "signal.h"
a2907882
KM
33#include "mount.h"
34#include "kernel.h"
35#include "malloc.h"
36#include "mbuf.h"
37#include "vnode.h"
38#include "domain.h"
39#include "protosw.h"
40#include "socket.h"
41#include "socketvar.h"
37ced908
KM
42#include "../netinet/in.h"
43#include "../netinet/tcp.h"
a2907882
KM
44#include "rpcv2.h"
45#include "nfsv2.h"
46#include "nfs.h"
47#include "xdr_subs.h"
48#include "nfsm_subs.h"
49#include "nfsmount.h"
50
2f08b65a 51#include "syslog.h"
2f08b65a 52
a2907882 53#define TRUE 1
170bfd05 54#define FALSE 0
a2907882 55
a2907882
KM
56/*
57 * External data, mostly RPC constants in XDR form
58 */
59extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
60 rpc_msgaccepted, rpc_call;
61extern u_long nfs_prog, nfs_vers;
170bfd05 62/* Maybe these should be bits in a u_long ?? */
f0f1cbaa
KM
63extern int nonidempotent[NFS_NPROCS];
64int nfs_sbwait();
65void nfs_disconnect();
66
a2907882
KM
67int nfsrv_null(),
68 nfsrv_getattr(),
69 nfsrv_setattr(),
70 nfsrv_lookup(),
71 nfsrv_readlink(),
72 nfsrv_read(),
73 nfsrv_write(),
74 nfsrv_create(),
75 nfsrv_remove(),
76 nfsrv_rename(),
77 nfsrv_link(),
78 nfsrv_symlink(),
79 nfsrv_mkdir(),
80 nfsrv_rmdir(),
81 nfsrv_readdir(),
82 nfsrv_statfs(),
83 nfsrv_noop();
84
85int (*nfsrv_procs[NFS_NPROCS])() = {
86 nfsrv_null,
87 nfsrv_getattr,
88 nfsrv_setattr,
89 nfsrv_noop,
90 nfsrv_lookup,
91 nfsrv_readlink,
92 nfsrv_read,
93 nfsrv_noop,
94 nfsrv_write,
95 nfsrv_create,
96 nfsrv_remove,
97 nfsrv_rename,
98 nfsrv_link,
99 nfsrv_symlink,
100 nfsrv_mkdir,
101 nfsrv_rmdir,
102 nfsrv_readdir,
103 nfsrv_statfs,
104};
105
2f08b65a
KM
106struct nfsreq nfsreqh;
107int nfsrexmtthresh = NFS_FISHY;
f0f1cbaa 108int nfs_tcpnodelay = 1;
2f08b65a
KM
109
110/*
f0f1cbaa 111 * Initialize sockets and congestion for a new NFS connection.
2f08b65a
KM
112 * We do not free the sockaddr if error.
113 */
f0f1cbaa 114nfs_connect(nmp)
2f08b65a 115 register struct nfsmount *nmp;
2f08b65a 116{
f0f1cbaa
KM
117 register struct socket *so;
118 int s, error;
2f08b65a 119 struct mbuf *m;
2f08b65a 120
f0f1cbaa
KM
121 nmp->nm_so = (struct socket *)0;
122 if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family,
123 &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
2f08b65a 124 goto bad;
f0f1cbaa
KM
125 so = nmp->nm_so;
126 nmp->nm_soflags = so->so_proto->pr_flags;
2f08b65a 127
f0f1cbaa
KM
128 /*
129 * Protocols that do not require connections may be optionally left
130 * unconnected for servers that reply from a port other than NFS_PORT.
131 */
132 if (nmp->nm_flag & NFSMNT_NOCONN) {
133 if (nmp->nm_soflags & PR_CONNREQUIRED) {
134 error = ENOTCONN;
2f08b65a
KM
135 goto bad;
136 }
f0f1cbaa
KM
137 } else {
138 if (error = soconnect(so, nmp->nm_nam))
2f08b65a 139 goto bad;
f0f1cbaa
KM
140
141 /*
142 * Wait for the connection to complete. Cribbed from the
143 * connect system call but with the wait at negative prio.
144 */
145 s = splnet();
146 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0)
170bfd05 147 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "nfscon", 0);
f0f1cbaa
KM
148 splx(s);
149 if (so->so_error) {
150 error = so->so_error;
151 goto bad;
152 }
2f08b65a 153 }
f0f1cbaa 154 if (nmp->nm_sotype == SOCK_DGRAM) {
170bfd05 155 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
f0f1cbaa
KM
156 so->so_rcv.sb_timeo = (5 * hz);
157 so->so_snd.sb_timeo = (5 * hz);
158 } else {
159 so->so_rcv.sb_timeo = 0;
160 so->so_snd.sb_timeo = 0;
161 }
162 if (error = soreserve(so, nmp->nm_wsize + NFS_MAXPKTHDR,
170bfd05 163 nmp->nm_rsize + NFS_MAXPKTHDR))
f0f1cbaa
KM
164 goto bad;
165 } else {
170bfd05 166 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_SPONGY | NFSMNT_INT)) {
f0f1cbaa
KM
167 so->so_rcv.sb_timeo = (5 * hz);
168 so->so_snd.sb_timeo = (5 * hz);
169 } else {
170 so->so_rcv.sb_timeo = 0;
171 so->so_snd.sb_timeo = 0;
172 }
173 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
174 MGET(m, M_WAIT, MT_SOOPTS);
175 *mtod(m, int *) = 1;
176 m->m_len = sizeof(int);
177 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
178 }
179 if (so->so_proto->pr_domain->dom_family == AF_INET &&
180 so->so_proto->pr_protocol == IPPROTO_TCP &&
181 nfs_tcpnodelay) {
182 MGET(m, M_WAIT, MT_SOOPTS);
183 *mtod(m, int *) = 1;
184 m->m_len = sizeof(int);
185 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
186 }
187 if (error = soreserve(so,
170bfd05 188 nmp->nm_wsize + NFS_MAXPKTHDR + sizeof(u_long),
f0f1cbaa
KM
189 nmp->nm_rsize + NFS_MAXPKTHDR + sizeof(u_long)))
190 goto bad;
191 }
192 so->so_rcv.sb_flags |= SB_NOINTR;
193 so->so_snd.sb_flags |= SB_NOINTR;
2f08b65a 194
f0f1cbaa
KM
195 /* Initialize other non-zero congestion variables */
196 nmp->nm_rto = NFS_TIMEO;
197 nmp->nm_window = 2; /* Initial send window */
198 nmp->nm_ssthresh = NFS_MAXWINDOW; /* Slowstart threshold */
199 nmp->nm_rttvar = nmp->nm_rto << 1;
200 nmp->nm_sent = 0;
201 nmp->nm_currexmit = 0;
202 return (0);
2f08b65a 203
f0f1cbaa
KM
204bad:
205 nfs_disconnect(nmp);
206 return (error);
207}
2f08b65a 208
f0f1cbaa
KM
209/*
210 * Reconnect routine:
211 * Called when a connection is broken on a reliable protocol.
212 * - clean up the old socket
213 * - nfs_connect() again
214 * - set R_MUSTRESEND for all outstanding requests on mount point
215 * If this fails the mount point is DEAD!
216 * nb: Must be called with the nfs_solock() set on the mount point.
217 */
218nfs_reconnect(rep, nmp)
219 register struct nfsreq *rep;
220 register struct nfsmount *nmp;
221{
222 register struct nfsreq *rp;
f0f1cbaa 223 int error;
2f08b65a 224
f0f1cbaa 225 if (rep->r_procp)
5580a343 226 tprintf(rep->r_procp->p_session,
f0f1cbaa
KM
227 "Nfs server %s, trying reconnect\n",
228 nmp->nm_mountp->mnt_stat.f_mntfromname);
229 else
5580a343 230 tprintf(NULL, "Nfs server %s, trying a reconnect\n",
f0f1cbaa
KM
231 nmp->nm_mountp->mnt_stat.f_mntfromname);
232 while (error = nfs_connect(nmp)) {
d4e5799e
KM
233#ifdef lint
234 error = error;
235#endif /* lint */
f0f1cbaa
KM
236 if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp))
237 return (EINTR);
170bfd05 238 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
2f08b65a 239 }
f0f1cbaa 240 if (rep->r_procp)
5580a343 241 tprintf(rep->r_procp->p_session,
f0f1cbaa
KM
242 "Nfs server %s, reconnected\n",
243 nmp->nm_mountp->mnt_stat.f_mntfromname);
244 else
5580a343 245 tprintf(NULL, "Nfs server %s, reconnected\n",
f0f1cbaa
KM
246 nmp->nm_mountp->mnt_stat.f_mntfromname);
247
248 /*
249 * Loop through outstanding request list and fix up all requests
250 * on old socket.
251 */
252 rp = nfsreqh.r_next;
253 while (rp != &nfsreqh) {
254 if (rp->r_nmp == nmp)
255 rp->r_flags |= R_MUSTRESEND;
256 rp = rp->r_next;
2f08b65a
KM
257 }
258 return (0);
2f08b65a
KM
259}
260
261/*
262 * NFS disconnect. Clean up and unlink.
263 */
f0f1cbaa 264void
2f08b65a
KM
265nfs_disconnect(nmp)
266 register struct nfsmount *nmp;
267{
f0f1cbaa 268 register struct socket *so;
2f08b65a 269
f0f1cbaa
KM
270 if (nmp->nm_so) {
271 so = nmp->nm_so;
272 nmp->nm_so = (struct socket *)0;
273 soshutdown(so, 2);
274 soclose(so);
2f08b65a
KM
275 }
276}
a2907882
KM
277
278/*
f0f1cbaa
KM
279 * This is the nfs send routine. For connection based socket types, it
280 * must be called with an nfs_solock() on the socket.
281 * "rep == NULL" indicates that it has been called from a server.
a2907882 282 */
f0f1cbaa 283nfs_send(so, nam, top, rep)
a2907882
KM
284 register struct socket *so;
285 struct mbuf *nam;
f0f1cbaa
KM
286 register struct mbuf *top;
287 struct nfsreq *rep;
a2907882 288{
f0f1cbaa
KM
289 struct mbuf *sendnam;
290 int error, soflags;
a2907882 291
f0f1cbaa
KM
292 if (rep) {
293 if (rep->r_flags & R_SOFTTERM) {
2f08b65a 294 m_freem(top);
f0f1cbaa 295 return (EINTR);
2f08b65a 296 }
5044b7a3 297 if (rep->r_nmp->nm_so == NULL &&
f0f1cbaa
KM
298 (error = nfs_reconnect(rep, rep->r_nmp)))
299 return (error);
300 rep->r_flags &= ~R_MUSTRESEND;
5044b7a3 301 so = rep->r_nmp->nm_so;
f0f1cbaa
KM
302 soflags = rep->r_nmp->nm_soflags;
303 } else
304 soflags = so->so_proto->pr_flags;
305 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
306 sendnam = (struct mbuf *)0;
307 else
308 sendnam = nam;
309
310 error = sosend(so, sendnam, (struct uio *)0, top,
311 (struct mbuf *)0, 0);
312 if (error == EWOULDBLOCK && rep) {
313 if (rep->r_flags & R_SOFTTERM)
314 error = EINTR;
315 else {
316 rep->r_flags |= R_MUSTRESEND;
317 error = 0;
2f08b65a 318 }
a2907882 319 }
f0f1cbaa
KM
320 /*
321 * Ignore socket errors??
322 */
323 if (error && error != EINTR && error != ERESTART)
324 error = 0;
a2907882
KM
325 return (error);
326}
327
328/*
f0f1cbaa
KM
329 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
330 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
331 * Mark and consolidate the data into a new mbuf list.
332 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
333 * small mbufs.
334 * For SOCK_STREAM we must be very careful to read an entire record once
335 * we have read any of it, even if the system call has been interrupted.
a2907882 336 */
f0f1cbaa 337nfs_receive(so, aname, mp, rep)
a2907882
KM
338 register struct socket *so;
339 struct mbuf **aname;
340 struct mbuf **mp;
f0f1cbaa 341 register struct nfsreq *rep;
a2907882 342{
f0f1cbaa
KM
343 struct uio auio;
344 struct iovec aio;
a2907882 345 register struct mbuf *m;
f0f1cbaa
KM
346 struct mbuf *m2, *m3, *mnew, **mbp;
347 caddr_t fcp, tcp;
348 u_long len;
349 struct mbuf **getnam;
350 int error, siz, mlen, soflags, rcvflg = MSG_WAITALL;
a2907882 351
f0f1cbaa
KM
352 /*
353 * Set up arguments for soreceive()
354 */
355 *mp = (struct mbuf *)0;
356 *aname = (struct mbuf *)0;
357 if (rep)
358 soflags = rep->r_nmp->nm_soflags;
359 else
360 soflags = so->so_proto->pr_flags;
a2907882 361
f0f1cbaa
KM
362 /*
363 * For reliable protocols, lock against other senders/receivers
364 * in case a reconnect is necessary.
365 * For SOCK_STREAM, first get the Record Mark to find out how much
366 * more there is to get.
367 * We must lock the socket against other receivers
368 * until we have an entire rpc request/reply.
369 */
370 if (soflags & PR_CONNREQUIRED) {
371tryagain:
372 /*
373 * Check for fatal errors and resending request.
374 */
375 if (rep) {
376 /*
377 * Ugh: If a reconnect attempt just happened, nm_so
378 * would have changed. NULL indicates a failed
379 * attempt that has essentially shut down this
380 * mount point.
381 */
382 if (rep->r_mrep || (so = rep->r_nmp->nm_so) == NULL ||
383 (rep->r_flags & R_SOFTTERM))
384 return (EINTR);
385 while (rep->r_flags & R_MUSTRESEND) {
386 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
387 nfsstats.rpcretries++;
388 if (error = nfs_send(so, rep->r_nmp->nm_nam, m,
389 rep))
390 goto errout;
2f08b65a 391 }
e8540f59 392 }
f0f1cbaa
KM
393 if ((soflags & PR_ATOMIC) == 0) {
394 aio.iov_base = (caddr_t) &len;
395 aio.iov_len = sizeof(u_long);
396 auio.uio_iov = &aio;
397 auio.uio_iovcnt = 1;
398 auio.uio_segflg = UIO_SYSSPACE;
399 auio.uio_rw = UIO_READ;
400 auio.uio_offset = 0;
401 auio.uio_resid = sizeof(u_long);
402 do {
403 error = soreceive(so, (struct mbuf **)0, &auio,
404 (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
405 if (error == EWOULDBLOCK && rep) {
406 if (rep->r_flags & R_SOFTTERM)
407 return (EINTR);
408 if (rep->r_flags & R_MUSTRESEND)
409 goto tryagain;
410 }
411 } while (error == EWOULDBLOCK);
412 if (!error && auio.uio_resid > 0)
413 error = EPIPE;
414 if (error)
415 goto errout;
416 len = ntohl(len) & ~0x80000000;
417 /*
418 * This is SERIOUS! We are out of sync with the sender
419 * and forcing a disconnect/reconnect is all I can do.
420 */
421 if (len > NFS_MAXPACKET) {
422 error = EFBIG;
423 goto errout;
424 }
425 auio.uio_resid = len;
426 do {
427 error = soreceive(so, (struct mbuf **)0,
428 &auio, mp, (struct mbuf **)0, &rcvflg);
429 } while (error == EWOULDBLOCK || error == EINTR ||
430 error == ERESTART);
431 if (!error && auio.uio_resid > 0)
432 error = EPIPE;
2f08b65a 433 } else {
f0f1cbaa
KM
434 auio.uio_resid = len = 1000000; /* Anything Big */
435 do {
436 error = soreceive(so, (struct mbuf **)0,
437 &auio, mp, (struct mbuf **)0, &rcvflg);
438 if (error == EWOULDBLOCK && rep) {
439 if (rep->r_flags & R_SOFTTERM)
440 return (EINTR);
441 if (rep->r_flags & R_MUSTRESEND)
442 goto tryagain;
443 }
444 } while (error == EWOULDBLOCK);
445 if (!error && *mp == NULL)
446 error = EPIPE;
447 len -= auio.uio_resid;
2f08b65a 448 }
f0f1cbaa
KM
449errout:
450 if (error && rep && error != EINTR && error != ERESTART) {
451 m_freem(*mp);
452 *mp = (struct mbuf *)0;
453 nfs_disconnect(rep->r_nmp);
454 error = nfs_reconnect(rep, rep->r_nmp);
455 if (!error)
456 goto tryagain;
2f08b65a 457 }
f0f1cbaa
KM
458 } else {
459 if (so->so_state & SS_ISCONNECTED)
460 getnam = (struct mbuf **)0;
461 else
462 getnam = aname;
463 auio.uio_resid = len = 1000000;
464 do {
465 error = soreceive(so, getnam, &auio, mp,
466 (struct mbuf **)0, &rcvflg);
467 if (error == EWOULDBLOCK && rep &&
468 (rep->r_flags & R_SOFTTERM))
469 return (EINTR);
470 } while (error == EWOULDBLOCK);
471 len -= auio.uio_resid;
472 }
473 if (error) {
474 m_freem(*mp);
475 *mp = (struct mbuf *)0;
476 }
477 /*
478 * Search for any mbufs that are not a multiple of 4 bytes long.
479 * These could cause pointer alignment problems, so copy them to
480 * well aligned mbufs.
481 */
482 m = *mp;
483 mbp = mp;
484 while (m) {
485 /*
486 * All this for something that may never happen.
487 */
488 if (m->m_len & 0x3) {
489 printf("nfs_rcv odd length!\n");
490 fcp = mtod(m, caddr_t);
491 mnew = m2 = (struct mbuf *)0;
d4e5799e
KM
492#ifdef lint
493 m3 = (struct mbuf *)0;
494 mlen = 0;
495#endif /* lint */
f0f1cbaa
KM
496 while (m) {
497 if (m2 == NULL || mlen == 0) {
498 MGET(m2, M_WAIT, MT_DATA);
499 if (len > MINCLSIZE)
500 MCLGET(m2, M_WAIT);
501 m2->m_len = 0;
502 mlen = M_TRAILINGSPACE(m2);
503 tcp = mtod(m2, caddr_t);
504 if (mnew) {
505 m3->m_next = m2;
506 m3 = m2;
507 } else
508 mnew = m3 = m2;
509 }
510 siz = (mlen > m->m_len) ? m->m_len : mlen;
511 bcopy(fcp, tcp, siz);
512 m2->m_len += siz;
513 mlen -= siz;
514 len -= siz;
515 tcp += siz;
516 m->m_len -= siz;
517 fcp += siz;
518 if (m->m_len == 0) {
519 do {
520 m = m->m_next;
521 } while (m && m->m_len == 0);
522 if (m)
523 fcp = mtod(m, caddr_t);
524 }
525 }
526 m = *mbp;
527 *mbp = mnew;
528 m_freem(m);
529 break;
2f08b65a 530 }
f0f1cbaa
KM
531 len -= m->m_len;
532 mbp = &m->m_next;
533 m = m->m_next;
a2907882 534 }
a2907882
KM
535 return (error);
536}
537
a2907882
KM
538struct rpc_replyhead {
539 u_long r_xid;
540 u_long r_rep;
541};
542
543/*
f0f1cbaa 544 * Implement receipt of reply on a socket.
a2907882
KM
545 * We must search through the list of received datagrams matching them
546 * with outstanding requests using the xid, until ours is found.
547 */
f0f1cbaa
KM
548/* ARGSUSED */
549nfs_reply(nmp, myrep)
550 struct nfsmount *nmp;
ffe6f482 551 struct nfsreq *myrep;
a2907882
KM
552{
553 register struct mbuf *m;
554 register struct nfsreq *rep;
f0f1cbaa 555 register int error = 0;
a2907882 556 struct rpc_replyhead replyh;
f0f1cbaa
KM
557 struct mbuf *mp, *nam;
558 char *cp;
559 int cnt, xfer;
a2907882
KM
560
561 /*
f0f1cbaa 562 * Loop around until we get our own reply
a2907882 563 */
f0f1cbaa
KM
564 for (;;) {
565 /*
566 * Lock against other receivers so that I don't get stuck in
567 * sbwait() after someone else has received my reply for me.
568 * Also necessary for connection based protocols to avoid
569 * race conditions during a reconnect.
570 */
170bfd05 571 nfs_solock(&nmp->nm_flag);
f0f1cbaa
KM
572 /* Already received, bye bye */
573 if (myrep->r_mrep != NULL) {
574 nfs_sounlock(&nmp->nm_flag);
575 return (0);
576 }
577 /*
578 * Get the next Rpc reply off the socket
579 */
580 if (error = nfs_receive(nmp->nm_so, &nam, &mp, myrep)) {
581 nfs_sounlock(&nmp->nm_flag);
a2907882 582
f0f1cbaa
KM
583 /*
584 * Ignore routing errors on connectionless protocols??
585 */
586 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
587 nmp->nm_so->so_error = 0;
588 continue;
a2907882 589 }
f0f1cbaa
KM
590
591 /*
592 * Otherwise cleanup and return a fatal error.
593 */
594 if (myrep->r_flags & R_TIMING) {
595 myrep->r_flags &= ~R_TIMING;
596 nmp->nm_rtt = -1;
a2907882 597 }
f0f1cbaa
KM
598 if (myrep->r_flags & R_SENT) {
599 myrep->r_flags &= ~R_SENT;
600 nmp->nm_sent--;
2f08b65a 601 }
f0f1cbaa
KM
602 return (error);
603 }
604
605 /*
606 * Get the xid and check that it is an rpc reply
607 */
608 m = mp;
609 if (m->m_len >= 2*NFSX_UNSIGNED)
610 bcopy(mtod(m, caddr_t), (caddr_t)&replyh,
611 2*NFSX_UNSIGNED);
612 else {
613 cnt = 2*NFSX_UNSIGNED;
614 cp = (caddr_t)&replyh;
615 while (m && cnt > 0) {
616 if (m->m_len > 0) {
617 xfer = (m->m_len >= cnt) ? cnt :
618 m->m_len;
619 bcopy(mtod(m, caddr_t), cp, xfer);
620 cnt -= xfer;
621 cp += xfer;
622 }
623 if (cnt > 0)
624 m = m->m_next;
2f08b65a 625 }
f0f1cbaa
KM
626 }
627 if (replyh.r_rep != rpc_reply || m == NULL) {
628 nfsstats.rpcinvalid++;
629 m_freem(mp);
630 nfs_sounlock(&nmp->nm_flag);
631 continue;
632 }
633 /*
634 * Loop through the request list to match up the reply
635 * Iff no match, just drop the datagram
636 */
637 m = mp;
638 rep = nfsreqh.r_next;
639 while (rep != &nfsreqh) {
640 if (rep->r_mrep == NULL && replyh.r_xid == rep->r_xid) {
641 /* Found it.. */
642 rep->r_mrep = m;
643 /*
644 * Update timing
645 */
646 if (rep->r_flags & R_TIMING) {
647 nfs_updatetimer(rep->r_nmp);
648 rep->r_flags &= ~R_TIMING;
649 rep->r_nmp->nm_rtt = -1;
650 }
651 if (rep->r_flags & R_SENT) {
652 rep->r_flags &= ~R_SENT;
653 rep->r_nmp->nm_sent--;
654 }
655 break;
2f08b65a 656 }
f0f1cbaa 657 rep = rep->r_next;
a2907882 658 }
f0f1cbaa
KM
659 nfs_sounlock(&nmp->nm_flag);
660 if (nam)
661 m_freem(nam);
662 /*
663 * If not matched to a request, drop it.
664 * If it's mine, get out.
665 */
666 if (rep == &nfsreqh) {
667 nfsstats.rpcunexpected++;
668 m_freem(m);
669 } else if (rep == myrep)
670 return (0);
a2907882 671 }
a2907882
KM
672}
673
674/*
675 * nfs_request - goes something like this
676 * - fill in request struct
677 * - links it into list
f0f1cbaa
KM
678 * - calls nfs_send() for first transmit
679 * - calls nfs_receive() to get reply
a2907882
KM
680 * - break down rpc header and return with nfs reply pointed to
681 * by mrep or error
682 * nb: always frees up mreq mbuf list
683 */
170bfd05 684nfs_request(vp, mreq, xid, procnum, procp, tryhard, mp, mrp, mdp, dposp)
a2907882
KM
685 struct vnode *vp;
686 struct mbuf *mreq;
687 u_long xid;
f0f1cbaa
KM
688 int procnum;
689 struct proc *procp;
170bfd05 690 int tryhard;
a2907882
KM
691 struct mount *mp;
692 struct mbuf **mrp;
693 struct mbuf **mdp;
694 caddr_t *dposp;
695{
696 register struct mbuf *m, *mrep;
697 register struct nfsreq *rep;
698 register u_long *p;
699 register int len;
f0f1cbaa 700 struct nfsmount *nmp;
a2907882 701 struct mbuf *md;
ffe6f482 702 struct nfsreq *reph;
a2907882
KM
703 caddr_t dpos;
704 char *cp2;
705 int t1;
706 int s;
f0f1cbaa 707 int error = 0;
a2907882 708
f0f1cbaa 709 nmp = VFSTONFS(mp);
a2907882
KM
710 m = mreq;
711 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
712 rep->r_xid = xid;
f0f1cbaa 713 rep->r_nmp = nmp;
a2907882 714 rep->r_vp = vp;
f0f1cbaa 715 rep->r_procp = procp;
170bfd05
KM
716 if ((nmp->nm_flag & NFSMNT_SOFT) ||
717 ((nmp->nm_flag & NFSMNT_SPONGY) && !tryhard))
f0f1cbaa 718 rep->r_retry = nmp->nm_retry;
a2907882 719 else
2f08b65a
KM
720 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
721 rep->r_flags = rep->r_rexmit = 0;
f0f1cbaa
KM
722 /*
723 * Three cases:
724 * - non-idempotent requests on SOCK_DGRAM use NFS_MINIDEMTIMEO
725 * - idempotent requests on SOCK_DGRAM use 0
726 * - Reliable transports, NFS_RELIABLETIMEO
727 * Timeouts are still done on reliable transports to ensure detection
170bfd05 728 * of excessive connection delay.
f0f1cbaa
KM
729 */
730 if (nmp->nm_sotype != SOCK_DGRAM)
731 rep->r_timerinit = -NFS_RELIABLETIMEO;
732 else if (nonidempotent[procnum])
733 rep->r_timerinit = -NFS_MINIDEMTIMEO;
734 else
735 rep->r_timerinit = 0;
736 rep->r_timer = rep->r_timerinit;
a2907882 737 rep->r_mrep = NULL;
a2907882
KM
738 len = 0;
739 while (m) {
740 len += m->m_len;
741 m = m->m_next;
742 }
f0f1cbaa
KM
743 mreq->m_pkthdr.len = len;
744 mreq->m_pkthdr.rcvif = (struct ifnet *)0;
745 /*
746 * For non-atomic protocols, insert a Sun RPC Record Mark.
747 */
748 if ((nmp->nm_soflags & PR_ATOMIC) == 0) {
749 M_PREPEND(mreq, sizeof(u_long), M_WAIT);
750 *mtod(mreq, u_long *) = htonl(0x80000000 | len);
751 }
752 rep->r_mreq = mreq;
a2907882 753
2f08b65a
KM
754 /*
755 * Do the client side RPC.
756 */
757 nfsstats.rpcrequests++;
f0f1cbaa
KM
758 /*
759 * Chain request into list of outstanding requests. Be sure
760 * to put it LAST so timer finds oldest requests first.
761 */
a2907882 762 s = splnet();
2f08b65a 763 reph = &nfsreqh;
f0f1cbaa
KM
764 reph->r_prev->r_next = rep;
765 rep->r_prev = reph->r_prev;
ffe6f482
KM
766 reph->r_prev = rep;
767 rep->r_next = reph;
2f08b65a
KM
768 /*
769 * If backing off another request or avoiding congestion, don't
770 * send this one now but let timer do it. If not timing a request,
771 * do it now.
772 */
f0f1cbaa
KM
773 if (nmp->nm_sent <= 0 || nmp->nm_sotype != SOCK_DGRAM ||
774 (nmp->nm_currexmit == 0 && nmp->nm_sent < nmp->nm_window)) {
775 nmp->nm_sent++;
776 rep->r_flags |= R_SENT;
777 if (nmp->nm_rtt == -1) {
778 nmp->nm_rtt = 0;
779 rep->r_flags |= R_TIMING;
780 }
781 splx(s);
782 m = m_copym(mreq, 0, M_COPYALL, M_WAIT);
783 if (nmp->nm_soflags & PR_CONNREQUIRED)
170bfd05 784 nfs_solock(&nmp->nm_flag);
f0f1cbaa
KM
785 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
786 if (nmp->nm_soflags & PR_CONNREQUIRED)
787 nfs_sounlock(&nmp->nm_flag);
788 if (error && NFSIGNORE_SOERROR(nmp->nm_soflags, error))
789 nmp->nm_so->so_error = error = 0;
790 } else
2f08b65a 791 splx(s);
a2907882 792
2f08b65a
KM
793 /*
794 * Wait for the reply from our send or the timer's.
795 */
f0f1cbaa
KM
796 if (!error)
797 error = nfs_reply(nmp, rep);
a2907882 798
2f08b65a
KM
799 /*
800 * RPC done, unlink the request.
801 */
a2907882
KM
802 s = splnet();
803 rep->r_prev->r_next = rep->r_next;
ffe6f482 804 rep->r_next->r_prev = rep->r_prev;
a2907882 805 splx(s);
f0f1cbaa
KM
806
807 /*
808 * If there was a successful reply and a tprintf msg.
809 * tprintf a response.
810 */
811 if (!error && (rep->r_flags & R_TPRINTFMSG)) {
812 if (rep->r_procp)
5580a343 813 tprintf(rep->r_procp->p_session,
f0f1cbaa
KM
814 "Nfs server %s, is alive again\n",
815 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
816 else
5580a343 817 tprintf(NULL, "Nfs server %s, is alive again\n",
f0f1cbaa
KM
818 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
819 }
a2907882
KM
820 m_freem(rep->r_mreq);
821 mrep = md = rep->r_mrep;
822 FREE((caddr_t)rep, M_NFSREQ);
823 if (error)
824 return (error);
825
826 /*
827 * break down the rpc header and check if ok
828 */
829 dpos = mtod(md, caddr_t);
830 nfsm_disect(p, u_long *, 5*NFSX_UNSIGNED);
831 p += 2;
832 if (*p++ == rpc_msgdenied) {
833 if (*p == rpc_mismatch)
834 error = EOPNOTSUPP;
835 else
836 error = EACCES;
837 m_freem(mrep);
838 return (error);
839 }
840 /*
841 * skip over the auth_verf, someday we may want to cache auth_short's
842 * for nfs_reqhead(), but for now just dump it
843 */
844 if (*++p != 0) {
845 len = nfsm_rndup(fxdr_unsigned(long, *p));
846 nfsm_adv(len);
847 }
848 nfsm_disect(p, u_long *, NFSX_UNSIGNED);
849 /* 0 == ok */
850 if (*p == 0) {
851 nfsm_disect(p, u_long *, NFSX_UNSIGNED);
852 if (*p != 0) {
853 error = fxdr_unsigned(int, *p);
854 m_freem(mrep);
855 return (error);
856 }
857 *mrp = mrep;
858 *mdp = md;
859 *dposp = dpos;
860 return (0);
861 }
862 m_freem(mrep);
863 return (EPROTONOSUPPORT);
864nfsmout:
865 return (error);
866}
867
868/*
869 * Get a request for the server main loop
870 * - receive a request via. nfs_soreceive()
871 * - verify it
872 * - fill in the cred struct.
873 */
d4e5799e 874nfs_getreq(so, prog, vers, maxproc, nam, mrp, mdp, dposp, retxid, procnum, cr,
170bfd05 875 msk, mtch)
a2907882
KM
876 struct socket *so;
877 u_long prog;
878 u_long vers;
879 int maxproc;
880 struct mbuf **nam;
881 struct mbuf **mrp;
882 struct mbuf **mdp;
883 caddr_t *dposp;
884 u_long *retxid;
d4e5799e 885 u_long *procnum;
a2907882 886 register struct ucred *cr;
f0f1cbaa 887 struct mbuf *msk, *mtch;
a2907882
KM
888{
889 register int i;
0bd503ad
KM
890 register u_long *p;
891 register long t1;
892 caddr_t dpos, cp2;
893 int error = 0;
894 struct mbuf *mrep, *md;
895 int len;
a2907882 896
f0f1cbaa 897 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
f0f1cbaa 898 error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
f0f1cbaa
KM
899 } else {
900 mrep = (struct mbuf *)0;
901 do {
902 if (mrep) {
903 m_freem(*nam);
904 m_freem(mrep);
905 }
906 error = nfs_receive(so, nam, &mrep, (struct nfsreq *)0);
907 } while (!error && nfs_badnam(*nam, msk, mtch));
908 }
909 if (error)
a2907882
KM
910 return (error);
911 md = mrep;
912 dpos = mtod(mrep, caddr_t);
913 nfsm_disect(p, u_long *, 10*NFSX_UNSIGNED);
914 *retxid = *p++;
915 if (*p++ != rpc_call) {
916 m_freem(mrep);
917 return (ERPCMISMATCH);
918 }
919 if (*p++ != rpc_vers) {
920 m_freem(mrep);
921 return (ERPCMISMATCH);
922 }
923 if (*p++ != prog) {
924 m_freem(mrep);
925 return (EPROGUNAVAIL);
926 }
927 if (*p++ != vers) {
928 m_freem(mrep);
929 return (EPROGMISMATCH);
930 }
d4e5799e
KM
931 *procnum = fxdr_unsigned(u_long, *p++);
932 if (*procnum == NFSPROC_NULL) {
a2907882
KM
933 *mrp = mrep;
934 return (0);
935 }
d4e5799e 936 if (*procnum > maxproc || *p++ != rpc_auth_unix) {
a2907882
KM
937 m_freem(mrep);
938 return (EPROCUNAVAIL);
939 }
f0f1cbaa
KM
940 len = fxdr_unsigned(int, *p++);
941 if (len < 0 || len > RPCAUTH_MAXSIZ) {
942 m_freem(mrep);
943 return (EBADRPC);
944 }
0bd503ad 945 len = fxdr_unsigned(int, *++p);
f0f1cbaa
KM
946 if (len < 0 || len > NFS_MAXNAMLEN) {
947 m_freem(mrep);
948 return (EBADRPC);
949 }
0bd503ad 950 nfsm_adv(nfsm_rndup(len));
a2907882
KM
951 nfsm_disect(p, u_long *, 3*NFSX_UNSIGNED);
952 cr->cr_uid = fxdr_unsigned(uid_t, *p++);
953 cr->cr_gid = fxdr_unsigned(gid_t, *p++);
0bd503ad 954 len = fxdr_unsigned(int, *p);
f0f1cbaa 955 if (len < 0 || len > RPCAUTH_UNIXGIDS) {
a2907882
KM
956 m_freem(mrep);
957 return (EBADRPC);
958 }
0bd503ad
KM
959 nfsm_disect(p, u_long *, (len + 2)*NFSX_UNSIGNED);
960 for (i = 1; i <= len; i++)
f0f1cbaa
KM
961 if (i < NGROUPS)
962 cr->cr_groups[i] = fxdr_unsigned(gid_t, *p++);
963 else
964 p++;
965 cr->cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
a2907882
KM
966 /*
967 * Do we have any use for the verifier.
968 * According to the "Remote Procedure Call Protocol Spec." it
969 * should be AUTH_NULL, but some clients make it AUTH_UNIX?
970 * For now, just skip over it
971 */
0bd503ad 972 len = fxdr_unsigned(int, *++p);
f0f1cbaa
KM
973 if (len < 0 || len > RPCAUTH_MAXSIZ) {
974 m_freem(mrep);
975 return (EBADRPC);
976 }
0bd503ad
KM
977 if (len > 0)
978 nfsm_adv(nfsm_rndup(len));
a2907882
KM
979 *mrp = mrep;
980 *mdp = md;
981 *dposp = dpos;
982 return (0);
983nfsmout:
984 return (error);
985}
986
987/*
988 * Generate the rpc reply header
989 * siz arg. is used to decide if adding a cluster is worthwhile
990 */
991nfs_rephead(siz, retxid, err, mrq, mbp, bposp)
992 int siz;
993 u_long retxid;
994 int err;
995 struct mbuf **mrq;
996 struct mbuf **mbp;
997 caddr_t *bposp;
998{
0bd503ad
KM
999 register u_long *p;
1000 register long t1;
1001 caddr_t bpos;
1002 struct mbuf *mreq, *mb, *mb2;
a2907882
KM
1003
1004 NFSMGETHDR(mreq);
1005 mb = mreq;
1006 if ((siz+RPC_REPLYSIZ) > MHLEN)
f0f1cbaa 1007 MCLGET(mreq, M_WAIT);
a2907882
KM
1008 p = mtod(mreq, u_long *);
1009 mreq->m_len = 6*NFSX_UNSIGNED;
1010 bpos = ((caddr_t)p)+mreq->m_len;
1011 *p++ = retxid;
1012 *p++ = rpc_reply;
1013 if (err == ERPCMISMATCH) {
1014 *p++ = rpc_msgdenied;
1015 *p++ = rpc_mismatch;
1016 *p++ = txdr_unsigned(2);
1017 *p = txdr_unsigned(2);
1018 } else {
1019 *p++ = rpc_msgaccepted;
1020 *p++ = 0;
1021 *p++ = 0;
1022 switch (err) {
1023 case EPROGUNAVAIL:
1024 *p = txdr_unsigned(RPC_PROGUNAVAIL);
1025 break;
1026 case EPROGMISMATCH:
1027 *p = txdr_unsigned(RPC_PROGMISMATCH);
1028 nfsm_build(p, u_long *, 2*NFSX_UNSIGNED);
1029 *p++ = txdr_unsigned(2);
1030 *p = txdr_unsigned(2); /* someday 3 */
1031 break;
1032 case EPROCUNAVAIL:
1033 *p = txdr_unsigned(RPC_PROCUNAVAIL);
1034 break;
1035 default:
1036 *p = 0;
1037 if (err != VNOVAL) {
1038 nfsm_build(p, u_long *, NFSX_UNSIGNED);
1039 *p = txdr_unsigned(err);
1040 }
1041 break;
1042 };
1043 }
1044 *mrq = mreq;
1045 *mbp = mb;
1046 *bposp = bpos;
1047 if (err != 0 && err != VNOVAL)
1048 nfsstats.srvrpc_errs++;
1049 return (0);
1050}
1051
1052/*
1053 * Nfs timer routine
1054 * Scan the nfsreq list and retranmit any requests that have timed out
1055 * To avoid retransmission attempts on STREAM sockets (in the future) make
2f08b65a 1056 * sure to set the r_retry field to 0 (implies nm_retry == 0).
a2907882
KM
1057 */
1058nfs_timer()
1059{
1060 register struct nfsreq *rep;
1061 register struct mbuf *m;
1062 register struct socket *so;
f0f1cbaa 1063 register struct nfsmount *nmp;
2f08b65a 1064 int s, error;
a2907882
KM
1065
1066 s = splnet();
f0f1cbaa
KM
1067 for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
1068 nmp = rep->r_nmp;
1069 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM) ||
1070 (so = nmp->nm_so) == NULL)
1071 continue;
1072 if ((nmp->nm_flag & NFSMNT_INT) && nfs_sigintr(rep->r_procp)) {
1073 rep->r_flags |= R_SOFTTERM;
1074 continue;
1075 }
2f08b65a 1076 if (rep->r_flags & R_TIMING) /* update rtt in mount */
f0f1cbaa 1077 nmp->nm_rtt++;
f0f1cbaa
KM
1078 /* If not timed out */
1079 if (++rep->r_timer < nmp->nm_rto)
2f08b65a
KM
1080 continue;
1081 /* Do backoff and save new timeout in mount */
1082 if (rep->r_flags & R_TIMING) {
f0f1cbaa 1083 nfs_backofftimer(nmp);
2f08b65a 1084 rep->r_flags &= ~R_TIMING;
f0f1cbaa 1085 nmp->nm_rtt = -1;
2f08b65a
KM
1086 }
1087 if (rep->r_flags & R_SENT) {
1088 rep->r_flags &= ~R_SENT;
f0f1cbaa 1089 nmp->nm_sent--;
2f08b65a 1090 }
f0f1cbaa
KM
1091
1092 /*
1093 * Check for too many retries on soft mount.
1094 * nb: For hard mounts, r_retry == NFS_MAXREXMIT+1
1095 */
1096 if (++rep->r_rexmit > NFS_MAXREXMIT)
2f08b65a 1097 rep->r_rexmit = NFS_MAXREXMIT;
2f08b65a 1098
f0f1cbaa
KM
1099 /*
1100 * Check for server not responding
1101 */
1102 if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
170bfd05 1103 rep->r_rexmit > NFS_FISHY) {
f0f1cbaa 1104 if (rep->r_procp && rep->r_procp->p_session)
5580a343 1105 tprintf(rep->r_procp->p_session,
f0f1cbaa
KM
1106 "Nfs server %s, not responding\n",
1107 nmp->nm_mountp->mnt_stat.f_mntfromname);
1108 else
5580a343 1109 tprintf(NULL,
f0f1cbaa
KM
1110 "Nfs server %s, not responding\n",
1111 nmp->nm_mountp->mnt_stat.f_mntfromname);
1112 rep->r_flags |= R_TPRINTFMSG;
1113 }
170bfd05 1114 if (rep->r_rexmit >= rep->r_retry) { /* too many */
f0f1cbaa
KM
1115 nfsstats.rpctimeouts++;
1116 rep->r_flags |= R_SOFTTERM;
1117 continue;
1118 }
170bfd05
KM
1119 if (nmp->nm_sotype != SOCK_DGRAM)
1120 continue;
f0f1cbaa
KM
1121
1122 /*
1123 * If there is enough space and the window allows..
1124 * Resend it
1125 */
1126 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
1127 nmp->nm_sent < nmp->nm_window &&
1128 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
1129 nfsstats.rpcretries++;
1130 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
1131 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1132 (caddr_t)0, (struct mbuf *)0, (struct mbuf *)0);
1133 else
1134 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
1135 nmp->nm_nam, (struct mbuf *)0, (struct mbuf *)0);
1136 if (error) {
1137 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
1138 so->so_error = 0;
1139 } else {
1140 /*
1141 * We need to time the request even though we
1142 * are retransmitting.
1143 */
1144 nmp->nm_rtt = 0;
1145 nmp->nm_sent++;
1146 rep->r_flags |= (R_SENT|R_TIMING);
1147 rep->r_timer = rep->r_timerinit;
1148 }
1149 }
2f08b65a
KM
1150 }
1151 splx(s);
1152 timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ);
1153}
1154
1155/*
1156 * NFS timer update and backoff. The "Jacobson/Karels/Karn" scheme is
1157 * used here. The timer state is held in the nfsmount structure and
1158 * a single request is used to clock the response. When successful
1159 * the rtt smoothing in nfs_updatetimer is used, when failed the backoff
1160 * is done by nfs_backofftimer. We also log failure messages in these
1161 * routines.
1162 *
1163 * Congestion variables are held in the nfshost structure which
1164 * is referenced by nfsmounts and shared per-server. This separation
1165 * makes it possible to do per-mount timing which allows varying disk
1166 * access times to be dealt with, while preserving a network oriented
1167 * congestion control scheme.
1168 *
1169 * The windowing implements the Jacobson/Karels slowstart algorithm
1170 * with adjusted scaling factors. We start with one request, then send
1171 * 4 more after each success until the ssthresh limit is reached, then
1172 * we increment at a rate proportional to the window. On failure, we
1173 * remember 3/4 the current window and clamp the send limit to 1. Note
1174 * ICMP source quench is not reflected in so->so_error so we ignore that
1175 * for now.
1176 *
1177 * NFS behaves much more like a transport protocol with these changes,
1178 * shedding the teenage pedal-to-the-metal tendencies of "other"
1179 * implementations.
1180 *
1181 * Timers and congestion avoidance by Tom Talpey, Open Software Foundation.
1182 */
1183
1184/*
1185 * The TCP algorithm was not forgiving enough. Because the NFS server
1186 * responds only after performing lookups/diskio/etc, we have to be
1187 * more prepared to accept a spiky variance. The TCP algorithm is:
f0f1cbaa 1188 * TCP_RTO(nmp) ((((nmp)->nm_srtt >> 2) + (nmp)->nm_rttvar) >> 1)
2f08b65a 1189 */
f0f1cbaa 1190#define NFS_RTO(nmp) (((nmp)->nm_srtt >> 3) + (nmp)->nm_rttvar)
2f08b65a 1191
f0f1cbaa
KM
1192nfs_updatetimer(nmp)
1193 register struct nfsmount *nmp;
2f08b65a 1194{
2f08b65a
KM
1195
1196 /* If retransmitted, clear and return */
f0f1cbaa
KM
1197 if (nmp->nm_rexmit || nmp->nm_currexmit) {
1198 nmp->nm_rexmit = nmp->nm_currexmit = 0;
2f08b65a
KM
1199 return;
1200 }
1201 /* If have a measurement, do smoothing */
f0f1cbaa 1202 if (nmp->nm_srtt) {
2f08b65a 1203 register short delta;
f0f1cbaa
KM
1204 delta = nmp->nm_rtt - (nmp->nm_srtt >> 3);
1205 if ((nmp->nm_srtt += delta) <= 0)
1206 nmp->nm_srtt = 1;
2f08b65a
KM
1207 if (delta < 0)
1208 delta = -delta;
f0f1cbaa
KM
1209 delta -= (nmp->nm_rttvar >> 2);
1210 if ((nmp->nm_rttvar += delta) <= 0)
1211 nmp->nm_rttvar = 1;
2f08b65a
KM
1212 /* Else initialize */
1213 } else {
f0f1cbaa
KM
1214 nmp->nm_rttvar = nmp->nm_rtt << 1;
1215 if (nmp->nm_rttvar == 0) nmp->nm_rttvar = 2;
1216 nmp->nm_srtt = nmp->nm_rttvar << 2;
2f08b65a
KM
1217 }
1218 /* Compute new Retransmission TimeOut and clip */
f0f1cbaa
KM
1219 nmp->nm_rto = NFS_RTO(nmp);
1220 if (nmp->nm_rto < NFS_MINTIMEO)
1221 nmp->nm_rto = NFS_MINTIMEO;
1222 else if (nmp->nm_rto > NFS_MAXTIMEO)
1223 nmp->nm_rto = NFS_MAXTIMEO;
2f08b65a
KM
1224
1225 /* Update window estimate */
f0f1cbaa
KM
1226 if (nmp->nm_window < nmp->nm_ssthresh) /* quickly */
1227 nmp->nm_window += 4;
2f08b65a 1228 else { /* slowly */
f0f1cbaa
KM
1229 register long incr = ++nmp->nm_winext;
1230 incr = (incr * incr) / nmp->nm_window;
2f08b65a 1231 if (incr > 0) {
f0f1cbaa
KM
1232 nmp->nm_winext = 0;
1233 ++nmp->nm_window;
2f08b65a
KM
1234 }
1235 }
f0f1cbaa
KM
1236 if (nmp->nm_window > NFS_MAXWINDOW)
1237 nmp->nm_window = NFS_MAXWINDOW;
2f08b65a
KM
1238}
1239
f0f1cbaa
KM
1240nfs_backofftimer(nmp)
1241 register struct nfsmount *nmp;
2f08b65a 1242{
2f08b65a
KM
1243 register unsigned long newrto;
1244
1245 /* Clip shift count */
f0f1cbaa
KM
1246 if (++nmp->nm_rexmit > 8 * sizeof nmp->nm_rto)
1247 nmp->nm_rexmit = 8 * sizeof nmp->nm_rto;
2f08b65a 1248 /* Back off RTO exponentially */
f0f1cbaa
KM
1249 newrto = NFS_RTO(nmp);
1250 newrto <<= (nmp->nm_rexmit - 1);
2f08b65a
KM
1251 if (newrto == 0 || newrto > NFS_MAXTIMEO)
1252 newrto = NFS_MAXTIMEO;
f0f1cbaa 1253 nmp->nm_rto = newrto;
2f08b65a
KM
1254
1255 /* If too many retries, message, assume a bogus RTT and re-measure */
f0f1cbaa
KM
1256 if (nmp->nm_currexmit < nmp->nm_rexmit) {
1257 nmp->nm_currexmit = nmp->nm_rexmit;
1258 if (nmp->nm_currexmit >= nfsrexmtthresh) {
1259 if (nmp->nm_currexmit == nfsrexmtthresh) {
1260 nmp->nm_rttvar += (nmp->nm_srtt >> 2);
1261 nmp->nm_srtt = 0;
a2907882
KM
1262 }
1263 }
a2907882 1264 }
2f08b65a 1265 /* Close down window but remember this point (3/4 current) for later */
f0f1cbaa
KM
1266 nmp->nm_ssthresh = ((nmp->nm_window << 1) + nmp->nm_window) >> 2;
1267 nmp->nm_window = 1;
1268 nmp->nm_winext = 0;
a2907882
KM
1269}
1270
1271/*
f0f1cbaa
KM
1272 * Test for a termination signal pending on procp.
1273 * This is used for NFSMNT_INT mounts.
a2907882 1274 */
f0f1cbaa
KM
1275nfs_sigintr(p)
1276 register struct proc *p;
1277{
1278 if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) &
1279 NFSINT_SIGMASK))
1280 return (1);
1281 else
1282 return (0);
1283}
2f08b65a 1284
f0f1cbaa
KM
1285/*
1286 * Lock a socket against others.
1287 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1288 * and also to avoid race conditions between the processes with nfs requests
1289 * in progress when a reconnect is necessary.
1290 */
170bfd05
KM
1291nfs_solock(flagp)
1292 register int *flagp;
a2907882 1293{
2f08b65a 1294
f0f1cbaa
KM
1295 while (*flagp & NFSMNT_SCKLOCK) {
1296 *flagp |= NFSMNT_WANTSCK;
170bfd05 1297 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsolck", 0);
2f08b65a 1298 }
f0f1cbaa
KM
1299 *flagp |= NFSMNT_SCKLOCK;
1300}
2f08b65a 1301
f0f1cbaa
KM
1302/*
1303 * Unlock the stream socket for others.
1304 */
1305nfs_sounlock(flagp)
170bfd05 1306 register int *flagp;
f0f1cbaa
KM
1307{
1308
1309 if ((*flagp & NFSMNT_SCKLOCK) == 0)
1310 panic("nfs sounlock");
1311 *flagp &= ~NFSMNT_SCKLOCK;
1312 if (*flagp & NFSMNT_WANTSCK) {
1313 *flagp &= ~NFSMNT_WANTSCK;
1314 wakeup((caddr_t)flagp);
2f08b65a 1315 }
f0f1cbaa
KM
1316}
1317
1318/*
1319 * This function compares two net addresses by family and returns TRUE
1320 * if they are the same.
1321 * If there is any doubt, return FALSE.
1322 */
1323nfs_netaddr_match(nam1, nam2)
1324 struct mbuf *nam1, *nam2;
1325{
1326 register struct sockaddr *saddr1, *saddr2;
1327
1328 saddr1 = mtod(nam1, struct sockaddr *);
1329 saddr2 = mtod(nam2, struct sockaddr *);
1330 if (saddr1->sa_family != saddr2->sa_family)
1331 return (0);
1332
1333 /*
1334 * Must do each address family separately since unused fields
1335 * are undefined values and not always zeroed.
1336 */
1337 switch (saddr1->sa_family) {
1338 case AF_INET:
1339 if (((struct sockaddr_in *)saddr1)->sin_addr.s_addr ==
1340 ((struct sockaddr_in *)saddr2)->sin_addr.s_addr)
1341 return (1);
1342 break;
1343 default:
1344 break;
1345 };
1346 return (0);
1347}
1348
1349/*
1350 * Check the hostname fields for nfsd's mask and match fields.
1351 * By address family:
1352 * - Bitwise AND the mask with the host address field
1353 * - Compare for == with match
1354 * return TRUE if not equal
1355 */
1356nfs_badnam(nam, msk, mtch)
1357 register struct mbuf *nam, *msk, *mtch;
1358{
1359 switch (mtod(nam, struct sockaddr *)->sa_family) {
1360 case AF_INET:
1361 return ((mtod(nam, struct sockaddr_in *)->sin_addr.s_addr &
1362 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr) !=
1363 mtod(mtch, struct sockaddr_in *)->sin_addr.s_addr);
1364 default:
1365 printf("nfs_badmatch, unknown sa_family\n");
1366 return (0);
1367 };
a2907882 1368}