BSD 4_4 release
[unix-history] / usr / src / sys / netinet / tcp_input.c
index 65ca5c2..f447ffb 100644 (file)
@@ -1,49 +1,63 @@
 /*
 /*
- * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ *     The Regents of the University of California.  All rights reserved.
  *
  *
- * Redistribution is only permitted until one year after the first shipment
- * of 4.4BSD by the Regents.  Otherwise, redistribution and use in source and
- * binary forms are permitted provided that: (1) source distributions retain
- * this entire copyright notice and comment, and (2) distributions including
- * binaries display the following acknowledgement:  This product includes
- * software developed by the University of California, Berkeley and its
- * contributors'' in the documentation or other materials provided with the
- * distribution and in all advertising materials mentioning features or use
- * of this software.  Neither the name of the University nor the names of
- * its contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- * THIS SOFTWARE IS PROVIDED AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
  *
  *
- *     @(#)tcp_input.c 7.25 (Berkeley) 6/30/90
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)tcp_input.c 8.2 (Berkeley) 8/10/93
  */
 
  */
 
-#include "param.h"
-#include "systm.h"
-#include "malloc.h"
-#include "mbuf.h"
-#include "protosw.h"
-#include "socket.h"
-#include "socketvar.h"
-#include "errno.h"
-
-#include "../net/if.h"
-#include "../net/route.h"
-
-#include "in.h"
-#include "in_systm.h"
-#include "ip.h"
-#include "in_pcb.h"
-#include "ip_var.h"
-#include "tcp.h"
-#include "tcp_fsm.h"
-#include "tcp_seq.h"
-#include "tcp_timer.h"
-#include "tcp_var.h"
-#include "tcpip.h"
-#include "tcp_debug.h"
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
 
 int    tcprexmtthresh = 3;
 int    tcppredack;     /* XXX debugging: times hdr predict ok for acks */
 
 int    tcprexmtthresh = 3;
 int    tcppredack;     /* XXX debugging: times hdr predict ok for acks */
@@ -52,7 +66,15 @@ int  tcppcbcachemiss;
 struct tcpiphdr tcp_saveti;
 struct inpcb *tcp_last_inpcb = &tcb;
 
 struct tcpiphdr tcp_saveti;
 struct inpcb *tcp_last_inpcb = &tcb;
 
-struct tcpcb *tcp_newtcpcb();
+extern u_long sb_max;
+
+#endif /* TUBA_INCLUDE */
+#define TCP_PAWS_IDLE  (24 * 24 * 60 * 60 * PR_SLOWHZ)
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b)  ((int)((a)-(b)) < 0)
+#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
+
 
 /*
  * Insert segment ti into reassembly queue of tcp with
 
 /*
  * Insert segment ti into reassembly queue of tcp with
@@ -80,7 +102,9 @@ struct       tcpcb *tcp_newtcpcb();
                tp->t_flags |= TF_ACKNOW; \
        } \
 }
                tp->t_flags |= TF_ACKNOW; \
        } \
 }
+#ifndef TUBA_INCLUDE
 
 
+int
 tcp_reass(tp, ti, m)
        register struct tcpcb *tp;
        register struct tcpiphdr *ti;
 tcp_reass(tp, ti, m)
        register struct tcpcb *tp;
        register struct tcpiphdr *ti;
@@ -188,13 +212,15 @@ present:
  * TCP input routine, follows pages 65-76 of the
  * protocol specification dated September, 1981 very closely.
  */
  * TCP input routine, follows pages 65-76 of the
  * protocol specification dated September, 1981 very closely.
  */
+void
 tcp_input(m, iphlen)
        register struct mbuf *m;
        int iphlen;
 {
        register struct tcpiphdr *ti;
        register struct inpcb *inp;
 tcp_input(m, iphlen)
        register struct mbuf *m;
        int iphlen;
 {
        register struct tcpiphdr *ti;
        register struct inpcb *inp;
-       struct mbuf *om = 0;
+       caddr_t optp = NULL;
+       int optlen;
        int len, tlen, off;
        register struct tcpcb *tp = 0;
        register int tiflags;
        int len, tlen, off;
        register struct tcpcb *tp = 0;
        register int tiflags;
@@ -204,6 +230,8 @@ tcp_input(m, iphlen)
        struct in_addr laddr;
        int dropsocket = 0;
        int iss = 0;
        struct in_addr laddr;
        int dropsocket = 0;
        int iss = 0;
+       u_long tiwin, ts_val, ts_ecr;
+       int ts_present = 0;
 
        tcpstat.tcps_rcvtotal++;
        /*
 
        tcpstat.tcps_rcvtotal++;
        /*
@@ -234,6 +262,7 @@ tcp_input(m, iphlen)
                tcpstat.tcps_rcvbadsum++;
                goto drop;
        }
                tcpstat.tcps_rcvbadsum++;
                goto drop;
        }
+#endif /* TUBA_INCLUDE */
 
        /*
         * Check that TCP offset makes sense,
 
        /*
         * Check that TCP offset makes sense,
@@ -254,16 +283,24 @@ tcp_input(m, iphlen)
                        }
                        ti = mtod(m, struct tcpiphdr *);
                }
                        }
                        ti = mtod(m, struct tcpiphdr *);
                }
-               om = m_get(M_DONTWAIT, MT_DATA);
-               if (om == 0)
-                       goto drop;
-               om->m_len = off - sizeof (struct tcphdr);
-               { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
-                 bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len);
-                 m->m_len -= om->m_len;
-                 m->m_pkthdr.len -= om->m_len;
-                 bcopy(op+om->m_len, op,
-                  (unsigned)(m->m_len-sizeof (struct tcpiphdr)));
+               optlen = off - sizeof (struct tcphdr);
+               optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
+               /* 
+                * Do quick retrieval of timestamp options ("options
+                * prediction?").  If timestamp is the only option and it's
+                * formatted as recommended in RFC 1323 appendix A, we
+                * quickly get the values now and not bother calling
+                * tcp_dooptions(), etc.
+                */
+               if ((optlen == TCPOLEN_TSTAMP_APPA ||
+                    (optlen > TCPOLEN_TSTAMP_APPA &&
+                       optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
+                    *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
+                    (ti->ti_flags & TH_SYN) == 0) {
+                       ts_present = 1;
+                       ts_val = ntohl(*(u_long *)(optp + 4));
+                       ts_ecr = ntohl(*(u_long *)(optp + 8));
+                       optp = NULL;    /* we've parsed the options */
                }
        }
        tiflags = ti->ti_flags;
                }
        }
        tiflags = ti->ti_flags;
@@ -305,6 +342,13 @@ findpcb:
                goto dropwithreset;
        if (tp->t_state == TCPS_CLOSED)
                goto drop;
                goto dropwithreset;
        if (tp->t_state == TCPS_CLOSED)
                goto drop;
+       
+       /* Unscale the window into a 32-bit value. */
+       if ((tiflags & TH_SYN) == 0)
+               tiwin = ti->ti_win << tp->snd_scale;
+       else
+               tiwin = ti->ti_win;
+
        so = inp->inp_socket;
        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
                if (so->so_options & SO_DEBUG) {
        so = inp->inp_socket;
        if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
                if (so->so_options & SO_DEBUG) {
@@ -335,6 +379,12 @@ findpcb:
 #endif
                        tp = intotcpcb(inp);
                        tp->t_state = TCPS_LISTEN;
 #endif
                        tp = intotcpcb(inp);
                        tp->t_state = TCPS_LISTEN;
+
+                       /* Compute proper scaling value from buffer space
+                        */
+                       while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+                          TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
+                               tp->request_r_scale++;
                }
        }
 
                }
        }
 
@@ -349,10 +399,10 @@ findpcb:
         * Process options if not in LISTEN state,
         * else do it below (after getting remote address).
         */
         * Process options if not in LISTEN state,
         * else do it below (after getting remote address).
         */
-       if (om && tp->t_state != TCPS_LISTEN) {
-               tcp_dooptions(tp, om, ti);
-               om = 0;
-       }
+       if (optp && tp->t_state != TCPS_LISTEN)
+               tcp_dooptions(tp, optp, optlen, ti,
+                       &ts_present, &ts_val, &ts_ecr);
+
        /* 
         * Header prediction: check for the two common cases
         * of a uni-directional data xfer.  If the packet has
        /* 
         * Header prediction: check for the two common cases
         * of a uni-directional data xfer.  If the packet has
@@ -369,9 +419,21 @@ findpcb:
         */
        if (tp->t_state == TCPS_ESTABLISHED &&
            (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
         */
        if (tp->t_state == TCPS_ESTABLISHED &&
            (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+           (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
            ti->ti_seq == tp->rcv_nxt &&
            ti->ti_seq == tp->rcv_nxt &&
-           ti->ti_win && ti->ti_win == tp->snd_wnd &&
+           tiwin && tiwin == tp->snd_wnd &&
            tp->snd_nxt == tp->snd_max) {
            tp->snd_nxt == tp->snd_max) {
+
+               /* 
+                * If last ACK falls within this segment's sequence numbers,
+                *  record the timestamp.
+                */
+               if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+                  SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
+                       tp->ts_recent_age = tcp_now;
+                       tp->ts_recent = ts_val;
+               }
+
                if (ti->ti_len == 0) {
                        if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
                            SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
                if (ti->ti_len == 0) {
                        if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
                            SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
@@ -380,8 +442,11 @@ findpcb:
                                 * this is a pure ack for outstanding data.
                                 */
                                ++tcppredack;
                                 * this is a pure ack for outstanding data.
                                 */
                                ++tcppredack;
-                               if (tp->t_rtt && SEQ_GT(ti->ti_ack,tp->t_rtseq))
-                                       tcp_xmit_timer(tp);
+                               if (ts_present)
+                                       tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+                               else if (tp->t_rtt &&
+                                           SEQ_GT(ti->ti_ack, tp->t_rtseq))
+                                       tcp_xmit_timer(tp, tp->t_rtt);
                                acked = ti->ti_ack - tp->snd_una;
                                tcpstat.tcps_rcvackpack++;
                                tcpstat.tcps_rcvackbyte += acked;
                                acked = ti->ti_ack - tp->snd_una;
                                tcpstat.tcps_rcvackpack++;
                                tcpstat.tcps_rcvackbyte += acked;
@@ -422,11 +487,11 @@ findpcb:
                        tcpstat.tcps_rcvpack++;
                        tcpstat.tcps_rcvbyte += ti->ti_len;
                        /*
                        tcpstat.tcps_rcvpack++;
                        tcpstat.tcps_rcvbyte += ti->ti_len;
                        /*
-                        * Drop TCP and IP headers then add data
-                        * to socket buffer
+                        * Drop TCP, IP headers and TCP options then add data
+                        * to socket buffer.
                         */
                         */
-                       m->m_data += sizeof(struct tcpiphdr);
-                       m->m_len -= sizeof(struct tcpiphdr);
+                       m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+                       m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
                        sbappend(&so->so_rcv, m);
                        sorwakeup(so);
                        tp->t_flags |= TF_DELACK;
                        sbappend(&so->so_rcv, m);
                        sorwakeup(so);
                        tp->t_flags |= TF_DELACK;
@@ -435,10 +500,10 @@ findpcb:
        }
 
        /*
        }
 
        /*
-        * Drop TCP and IP headers; TCP options were dropped above.
+        * Drop TCP, IP headers and TCP options.
         */
         */
-       m->m_data += sizeof(struct tcpiphdr);
-       m->m_len -= sizeof(struct tcpiphdr);
+       m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+       m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
 
        /*
         * Calculate amount of space in receive window,
 
        /*
         * Calculate amount of space in receive window,
@@ -479,7 +544,13 @@ findpcb:
                        goto dropwithreset;
                if ((tiflags & TH_SYN) == 0)
                        goto drop;
                        goto dropwithreset;
                if ((tiflags & TH_SYN) == 0)
                        goto drop;
-               if (m->m_flags & M_BCAST)
+               /*
+                * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+                * in_broadcast() should never return true on a received
+                * packet with M_BCAST not set.
+                */
+               if (m->m_flags & (M_BCAST|M_MCAST) ||
+                   IN_MULTICAST(ti->ti_dst.s_addr))
                        goto drop;
                am = m_get(M_DONTWAIT, MT_SONAME);      /* XXX */
                if (am == NULL)
                        goto drop;
                am = m_get(M_DONTWAIT, MT_SONAME);      /* XXX */
                if (am == NULL)
@@ -490,6 +561,7 @@ findpcb:
                sin->sin_len = sizeof(*sin);
                sin->sin_addr = ti->ti_src;
                sin->sin_port = ti->ti_sport;
                sin->sin_len = sizeof(*sin);
                sin->sin_addr = ti->ti_src;
                sin->sin_port = ti->ti_sport;
+               bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
                laddr = inp->inp_laddr;
                if (inp->inp_laddr.s_addr == INADDR_ANY)
                        inp->inp_laddr = ti->ti_dst;
                laddr = inp->inp_laddr;
                if (inp->inp_laddr.s_addr == INADDR_ANY)
                        inp->inp_laddr = ti->ti_dst;
@@ -505,10 +577,9 @@ findpcb:
                        dropsocket = 0;         /* socket is already gone */
                        goto drop;
                }
                        dropsocket = 0;         /* socket is already gone */
                        goto drop;
                }
-               if (om) {
-                       tcp_dooptions(tp, om, ti);
-                       om = 0;
-               }
+               if (optp)
+                       tcp_dooptions(tp, optp, optlen, ti,
+                               &ts_present, &ts_val, &ts_ecr);
                if (iss)
                        tp->iss = iss;
                else
                if (iss)
                        tp->iss = iss;
                else
@@ -562,6 +633,12 @@ findpcb:
                        tcpstat.tcps_connects++;
                        soisconnected(so);
                        tp->t_state = TCPS_ESTABLISHED;
                        tcpstat.tcps_connects++;
                        soisconnected(so);
                        tp->t_state = TCPS_ESTABLISHED;
+                       /* Do window scaling on this connection? */
+                       if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+                               (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+                               tp->snd_scale = tp->requested_s_scale;
+                               tp->rcv_scale = tp->request_r_scale;
+                       }
                        (void) tcp_reass(tp, (struct tcpiphdr *)0,
                                (struct mbuf *)0);
                        /*
                        (void) tcp_reass(tp, (struct tcpiphdr *)0,
                                (struct mbuf *)0);
                        /*
@@ -569,7 +646,7 @@ findpcb:
                         * use its rtt as our initial srtt & rtt var.
                         */
                        if (tp->t_rtt)
                         * use its rtt as our initial srtt & rtt var.
                         */
                        if (tp->t_rtt)
-                               tcp_xmit_timer(tp);
+                               tcp_xmit_timer(tp, tp->t_rtt);
                } else
                        tp->t_state = TCPS_SYN_RECEIVED;
 
                } else
                        tp->t_state = TCPS_SYN_RECEIVED;
 
@@ -595,10 +672,39 @@ trimthenstep6:
 
        /*
         * States other than LISTEN or SYN_SENT.
 
        /*
         * States other than LISTEN or SYN_SENT.
-        * First check that at least some bytes of segment are within 
+        * First check timestamp, if present.
+        * Then check that at least some bytes of segment are within 
         * receive window.  If segment begins before rcv_nxt,
         * drop leading data (and SYN); if nothing left, just ack.
         * receive window.  If segment begins before rcv_nxt,
         * drop leading data (and SYN); if nothing left, just ack.
+        * 
+        * RFC 1323 PAWS: If we have a timestamp reply on this segment
+        * and it's less than ts_recent, drop it.
         */
         */
+       if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
+           TSTMP_LT(ts_val, tp->ts_recent)) {
+
+               /* Check to see if ts_recent is over 24 days old.  */
+               if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+                       /*
+                        * Invalidate ts_recent.  If this segment updates
+                        * ts_recent, the age will be reset later and ts_recent
+                        * will get a valid value.  If it does not, setting
+                        * ts_recent to zero will at least satisfy the
+                        * requirement that zero be placed in the timestamp
+                        * echo reply when ts_recent isn't valid.  The
+                        * age isn't reset until we get a valid ts_recent
+                        * because we don't want out-of-order segments to be
+                        * dropped when ts_recent is old.
+                        */
+                       tp->ts_recent = 0;
+               } else {
+                       tcpstat.tcps_rcvduppack++;
+                       tcpstat.tcps_rcvdupbyte += ti->ti_len;
+                       tcpstat.tcps_pawsdrop++;
+                       goto dropafterack;
+               }
+       }
+
        todrop = tp->rcv_nxt - ti->ti_seq;
        if (todrop > 0) {
                if (tiflags & TH_SYN) {
        todrop = tp->rcv_nxt - ti->ti_seq;
        if (todrop > 0) {
                if (tiflags & TH_SYN) {
@@ -610,8 +716,7 @@ trimthenstep6:
                                tiflags &= ~TH_URG;
                        todrop--;
                }
                                tiflags &= ~TH_URG;
                        todrop--;
                }
-               if (todrop > ti->ti_len ||
-                   todrop == ti->ti_len && (tiflags&TH_FIN) == 0) {
+               if (todrop >= ti->ti_len) {
                        tcpstat.tcps_rcvduppack++;
                        tcpstat.tcps_rcvdupbyte += ti->ti_len;
                        /*
                        tcpstat.tcps_rcvduppack++;
                        tcpstat.tcps_rcvdupbyte += ti->ti_len;
                        /*
@@ -632,8 +737,15 @@ trimthenstep6:
                                todrop = ti->ti_len;
                                tiflags &= ~TH_FIN;
                                tp->t_flags |= TF_ACKNOW;
                                todrop = ti->ti_len;
                                tiflags &= ~TH_FIN;
                                tp->t_flags |= TF_ACKNOW;
-                       } else
-                               goto dropafterack;
+                       } else {
+                               /*
+                                * Handle the case when a bound socket connects
+                                * to itself. Allow packets with a SYN and
+                                * an ACK to continue with the processing.
+                                */
+                               if (todrop != 0 || (tiflags & TH_ACK) == 0)
+                                       goto dropafterack;
+                       }
                } else {
                        tcpstat.tcps_rcvpartduppack++;
                        tcpstat.tcps_rcvpartdupbyte += todrop;
                } else {
                        tcpstat.tcps_rcvpartduppack++;
                        tcpstat.tcps_rcvpartdupbyte += todrop;
@@ -701,6 +813,17 @@ trimthenstep6:
                tiflags &= ~(TH_PUSH|TH_FIN);
        }
 
                tiflags &= ~(TH_PUSH|TH_FIN);
        }
 
+       /*
+        * If last ACK falls within this segment's sequence numbers,
+        * record its timestamp.
+        */
+       if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+           SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
+                  ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
+               tp->ts_recent_age = tcp_now;
+               tp->ts_recent = ts_val;
+       }
+
        /*
         * If the RST bit is set examine the state:
         *    SYN_RECEIVED STATE:
        /*
         * If the RST bit is set examine the state:
         *    SYN_RECEIVED STATE:
@@ -767,6 +890,12 @@ trimthenstep6:
                tcpstat.tcps_connects++;
                soisconnected(so);
                tp->t_state = TCPS_ESTABLISHED;
                tcpstat.tcps_connects++;
                soisconnected(so);
                tp->t_state = TCPS_ESTABLISHED;
+               /* Do window scaling? */
+               if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+                       (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+                       tp->snd_scale = tp->requested_s_scale;
+                       tp->rcv_scale = tp->request_r_scale;
+               }
                (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
                tp->snd_wl1 = ti->ti_seq - 1;
                /* fall into ... */
                (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
                tp->snd_wl1 = ti->ti_seq - 1;
                /* fall into ... */
@@ -788,7 +917,7 @@ trimthenstep6:
        case TCPS_TIME_WAIT:
 
                if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
        case TCPS_TIME_WAIT:
 
                if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
-                       if (ti->ti_len == 0 && ti->ti_win == tp->snd_wnd) {
+                       if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
                                tcpstat.tcps_rcvdupack++;
                                /*
                                 * If we have outstanding data (other than
                                tcpstat.tcps_rcvdupack++;
                                /*
                                 * If we have outstanding data (other than
@@ -862,14 +991,18 @@ trimthenstep6:
                tcpstat.tcps_rcvackbyte += acked;
 
                /*
                tcpstat.tcps_rcvackbyte += acked;
 
                /*
-                * If transmit timer is running and timed sequence
+                * If we have a timestamp reply, update smoothed
+                * round trip time.  If no timestamp is present but
+                * transmit timer is running and timed sequence
                 * number was acked, update smoothed round trip time.
                 * Since we now have an rtt measurement, cancel the
                 * timer backoff (cf., Phil Karn's retransmit alg.).
                 * Recompute the initial retransmit timer.
                 */
                 * number was acked, update smoothed round trip time.
                 * Since we now have an rtt measurement, cancel the
                 * timer backoff (cf., Phil Karn's retransmit alg.).
                 * Recompute the initial retransmit timer.
                 */
-               if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
-                       tcp_xmit_timer(tp);
+               if (ts_present)
+                       tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+               else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+                       tcp_xmit_timer(tp,tp->t_rtt);
 
                /*
                 * If all outstanding data is acked, stop retransmit
 
                /*
                 * If all outstanding data is acked, stop retransmit
@@ -897,7 +1030,7 @@ trimthenstep6:
 
                if (cw > tp->snd_ssthresh)
                        incr = incr * incr / cw + incr / 8;
 
                if (cw > tp->snd_ssthresh)
                        incr = incr * incr / cw + incr / 8;
-               tp->snd_cwnd = min(cw + incr, TCP_MAXWIN);
+               tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
                }
                if (acked > so->so_snd.sb_cc) {
                        tp->snd_wnd -= so->so_snd.sb_cc;
                }
                if (acked > so->so_snd.sb_cc) {
                        tp->snd_wnd -= so->so_snd.sb_cc;
@@ -985,12 +1118,12 @@ step6:
        if ((tiflags & TH_ACK) &&
            (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
            (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
        if ((tiflags & TH_ACK) &&
            (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
            (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
-            tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd))) {
+            tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) {
                /* keep track of pure window updates */
                if (ti->ti_len == 0 &&
                /* keep track of pure window updates */
                if (ti->ti_len == 0 &&
-                   tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd)
+                   tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
                        tcpstat.tcps_rcvwinupd++;
                        tcpstat.tcps_rcvwinupd++;
-               tp->snd_wnd = ti->ti_win;
+               tp->snd_wnd = tiwin;
                tp->snd_wl1 = ti->ti_seq;
                tp->snd_wl2 = ti->ti_ack;
                if (tp->snd_wnd > tp->max_sndwnd)
                tp->snd_wl1 = ti->ti_seq;
                tp->snd_wl2 = ti->ti_ack;
                if (tp->snd_wnd > tp->max_sndwnd)
@@ -1009,7 +1142,7 @@ step6:
                 * soreceive.  It's hard to imagine someone
                 * actually wanting to send this much urgent data.
                 */
                 * soreceive.  It's hard to imagine someone
                 * actually wanting to send this much urgent data.
                 */
-               if (ti->ti_urp + so->so_rcv.sb_cc > SB_MAX) {
+               if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
                        ti->ti_urp = 0;                 /* XXX */
                        tiflags &= ~TH_URG;             /* XXX */
                        goto dodata;                    /* XXX */
                        ti->ti_urp = 0;                 /* XXX */
                        tiflags &= ~TH_URG;             /* XXX */
                        goto dodata;                    /* XXX */
@@ -1153,16 +1286,13 @@ dropafterack:
        return;
 
 dropwithreset:
        return;
 
 dropwithreset:
-       if (om) {
-               (void) m_free(om);
-               om = 0;
-       }
        /*
         * Generate a RST, dropping incoming segment.
         * Make ACK acceptable to originator of segment.
        /*
         * Generate a RST, dropping incoming segment.
         * Make ACK acceptable to originator of segment.
-        * Don't bother to respond if destination was broadcast.
+        * Don't bother to respond if destination was broadcast/multicast.
         */
         */
-       if ((tiflags & TH_RST) || m->m_flags & M_BCAST)
+       if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
+           IN_MULTICAST(ti->ti_dst.s_addr))
                goto drop;
        if (tiflags & TH_ACK)
                tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
                goto drop;
        if (tiflags & TH_ACK)
                tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
@@ -1178,8 +1308,6 @@ dropwithreset:
        return;
 
 drop:
        return;
 
 drop:
-       if (om)
-               (void) m_free(om);
        /*
         * Drop space held by incoming segment and return.
         */
        /*
         * Drop space held by incoming segment and return.
         */
@@ -1190,19 +1318,21 @@ drop:
        if (dropsocket)
                (void) soabort(so);
        return;
        if (dropsocket)
                (void) soabort(so);
        return;
+#ifndef TUBA_INCLUDE
 }
 
 }
 
-tcp_dooptions(tp, om, ti)
+void
+tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
        struct tcpcb *tp;
        struct tcpcb *tp;
-       struct mbuf *om;
+       u_char *cp;
+       int cnt;
        struct tcpiphdr *ti;
        struct tcpiphdr *ti;
+       int *ts_present;
+       u_long *ts_val, *ts_ecr;
 {
 {
-       register u_char *cp;
        u_short mss;
        u_short mss;
-       int opt, optlen, cnt;
+       int opt, optlen;
 
 
-       cp = mtod(om, u_char *);
-       cnt = om->m_len;
        for (; cnt > 0; cnt -= optlen, cp += optlen) {
                opt = cp[0];
                if (opt == TCPOPT_EOL)
        for (; cnt > 0; cnt -= optlen, cp += optlen) {
                opt = cp[0];
                if (opt == TCPOPT_EOL)
@@ -1220,7 +1350,7 @@ tcp_dooptions(tp, om, ti)
                        continue;
 
                case TCPOPT_MAXSEG:
                        continue;
 
                case TCPOPT_MAXSEG:
-                       if (optlen != 4)
+                       if (optlen != TCPOLEN_MAXSEG)
                                continue;
                        if (!(ti->ti_flags & TH_SYN))
                                continue;
                                continue;
                        if (!(ti->ti_flags & TH_SYN))
                                continue;
@@ -1228,9 +1358,37 @@ tcp_dooptions(tp, om, ti)
                        NTOHS(mss);
                        (void) tcp_mss(tp, mss);        /* sets t_maxseg */
                        break;
                        NTOHS(mss);
                        (void) tcp_mss(tp, mss);        /* sets t_maxseg */
                        break;
+
+               case TCPOPT_WINDOW:
+                       if (optlen != TCPOLEN_WINDOW)
+                               continue;
+                       if (!(ti->ti_flags & TH_SYN))
+                               continue;
+                       tp->t_flags |= TF_RCVD_SCALE;
+                       tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+                       break;
+
+               case TCPOPT_TIMESTAMP:
+                       if (optlen != TCPOLEN_TIMESTAMP)
+                               continue;
+                       *ts_present = 1;
+                       bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
+                       NTOHL(*ts_val);
+                       bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
+                       NTOHL(*ts_ecr);
+
+                       /* 
+                        * A timestamp received in a SYN makes
+                        * it ok to send timestamp requests and replies.
+                        */
+                       if (ti->ti_flags & TH_SYN) {
+                               tp->t_flags |= TF_RCVD_TSTMP;
+                               tp->ts_recent = *ts_val;
+                               tp->ts_recent_age = tcp_now;
+                       }
+                       break;
                }
        }
                }
        }
-       (void) m_free(om);
 }
 
 /*
 }
 
 /*
@@ -1239,6 +1397,7 @@ tcp_dooptions(tp, om, ti)
  * It is still reflected in the segment length for
  * sequencing purposes.
  */
  * It is still reflected in the segment length for
  * sequencing purposes.
  */
+void
 tcp_pulloutofband(so, ti, m)
        struct socket *so;
        struct tcpiphdr *ti;
 tcp_pulloutofband(so, ti, m)
        struct socket *so;
        struct tcpiphdr *ti;
@@ -1269,8 +1428,10 @@ tcp_pulloutofband(so, ti, m)
  * Collect new round-trip time estimate
  * and update averages and current timeout.
  */
  * Collect new round-trip time estimate
  * and update averages and current timeout.
  */
-tcp_xmit_timer(tp)
+void
+tcp_xmit_timer(tp, rtt)
        register struct tcpcb *tp;
        register struct tcpcb *tp;
+       short rtt;
 {
        register short delta;
 
 {
        register short delta;
 
@@ -1281,9 +1442,9 @@ tcp_xmit_timer(tp)
                 * binary point (i.e., scaled by 8).  The following magic
                 * is equivalent to the smoothing algorithm in rfc793 with
                 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
                 * binary point (i.e., scaled by 8).  The following magic
                 * is equivalent to the smoothing algorithm in rfc793 with
                 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
-                * point).  Adjust t_rtt to origin 0.
+                * point).  Adjust rtt to origin 0.
                 */
                 */
-               delta = tp->t_rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
+               delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
                if ((tp->t_srtt += delta) <= 0)
                        tp->t_srtt = 1;
                /*
                if ((tp->t_srtt += delta) <= 0)
                        tp->t_srtt = 1;
                /*
@@ -1305,10 +1466,10 @@ tcp_xmit_timer(tp)
                /* 
                 * No rtt measurement yet - use the unsmoothed rtt.
                 * Set the variance to half the rtt (so our first
                /* 
                 * No rtt measurement yet - use the unsmoothed rtt.
                 * Set the variance to half the rtt (so our first
-                * retransmit happens at 2*rtt)
+                * retransmit happens at 3*rtt).
                 */
                 */
-               tp->t_srtt = tp->t_rtt << TCP_RTT_SHIFT;
-               tp->t_rttvar = tp->t_rtt << (TCP_RTTVAR_SHIFT - 1);
+               tp->t_srtt = rtt << TCP_RTT_SHIFT;
+               tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
        }
        tp->t_rtt = 0;
        tp->t_rxtshift = 0;
        }
        tp->t_rtt = 0;
        tp->t_rxtshift = 0;
@@ -1352,7 +1513,7 @@ tcp_xmit_timer(tp)
  * While looking at the routing entry, we also initialize other path-dependent
  * parameters from pre-set or cached values in the routing entry.
  */
  * While looking at the routing entry, we also initialize other path-dependent
  * parameters from pre-set or cached values in the routing entry.
  */
-
+int
 tcp_mss(tp, offer)
        register struct tcpcb *tp;
        u_short offer;
 tcp_mss(tp, offer)
        register struct tcpcb *tp;
        u_short offer;
@@ -1391,7 +1552,11 @@ tcp_mss(tp, offer)
         * to scaled multiples of the slow timeout timer.
         */
        if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
         * to scaled multiples of the slow timeout timer.
         */
        if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
-               if (rt->rt_rmx.rmx_locks & RTV_MTU)
+               /*
+                * XXX the lock bit for MTU indicates that the value
+                * is also a minimum value; this is subject to time.
+                */
+               if (rt->rt_rmx.rmx_locks & RTV_RTT)
                        tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
                tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
                if (rt->rt_rmx.rmx_rttvar)
                        tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
                tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
                if (rt->rt_rmx.rmx_rttvar)
@@ -1449,8 +1614,10 @@ tcp_mss(tp, offer)
                if (bufsize < mss)
                        mss = bufsize;
                else {
                if (bufsize < mss)
                        mss = bufsize;
                else {
-                       bufsize = min(bufsize, SB_MAX) / mss * mss;
-                       (void) sbreserve(&so->so_snd, bufsize);
+                       bufsize = roundup(bufsize, mss);
+                       if (bufsize > sb_max)
+                               bufsize = sb_max;
+                       (void)sbreserve(&so->so_snd, bufsize);
                }
                tp->t_maxseg = mss;
 
                }
                tp->t_maxseg = mss;
 
@@ -1459,8 +1626,10 @@ tcp_mss(tp, offer)
 #endif
                        bufsize = so->so_rcv.sb_hiwat;
                if (bufsize > mss) {
 #endif
                        bufsize = so->so_rcv.sb_hiwat;
                if (bufsize > mss) {
-                       bufsize = min(bufsize, SB_MAX) / mss * mss;
-                       (void) sbreserve(&so->so_rcv, bufsize);
+                       bufsize = roundup(bufsize, mss);
+                       if (bufsize > sb_max)
+                               bufsize = sb_max;
+                       (void)sbreserve(&so->so_rcv, bufsize);
                }
        }
        tp->snd_cwnd = mss;
                }
        }
        tp->snd_cwnd = mss;
@@ -1478,3 +1647,4 @@ tcp_mss(tp, offer)
 #endif /* RTV_MTU */
        return (mss);
 }
 #endif /* RTV_MTU */
        return (mss);
 }
+#endif /* TUBA_INCLUDE */