BSD 4_4_Lite1 release
[unix-history] / usr / src / sys / netinet / tcp_usrreq.c
index 43722f0..38a08d6 100644 (file)
-/* tcp_usrreq.c 1.31 81/11/20 */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/mbuf.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../h/protosw.h"
-#include "../net/inet.h"
-#include "../net/inet_pcb.h"
-#include "../net/inet_systm.h"
-#include "../net/if.h"
-#include "../net/imp.h"
-#include "../net/ip.h"
-#include "../net/ip_var.h"
-#include "../net/tcp.h"
-#define TCPFSTAB
-#ifdef TCPDEBUG
-#define TCPSTATES
-#endif
-#include "../net/tcp_fsm.h"
-#include "../net/tcp_var.h"
-#include "/usr/include/errno.h"
-
 /*
 /*
- * Tcp initialization
+ * Copyright (c) 1982, 1986, 1988, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)tcp_usrreq.c        8.2 (Berkeley) 1/3/94
  */
  */
-tcp_init()
-{
 
 
-       tcp_iss = 1;            /* wrong */
-       tcb.inp_next = tcb.inp_prev = &tcb;
-}
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
 
 /*
 
 /*
- * Tcp finite state machine entries for timer and user generated
- * requests.  These routines raise the ipl to that of the network
- * to prevent reentry.  In particluar, this requires that the software
- * clock interrupt have lower priority than the network so that
- * we can enter the network from timeout routines without improperly
- * nesting the interrupt stack.
+ * TCP protocol interface to socket abstraction.
  */
  */
+extern char *tcpstates[];
 
 /*
 
 /*
- * Tcp protocol timeout routine called every 500 ms.
- * Updates the timers in all active tcb's and
- * causes finite state machine actions if timers expire.
- */
-tcp_slowtimo()
-{
-       register struct inpcb *ip;
-       register struct tcpcb *tp;
-       int s = splnet();
-       register short *tmp;
-       register int i;
-COUNT(TCP_TIMEO);
-
-       /*
-        * Search through tcb's and update active timers.
-        */
-       for (ip = tcb.inp_next; ip != &tcb; ip = ip->inp_next) {
-               tp = intotcpcb(ip);
-               tmp = &tp->t_init;
-               for (i = 0; i < TNTIMERS; i++) {
-                       if (*tmp && --*tmp == 0)
-                               (void) tcp_usrreq(tp->t_inpcb->inp_socket,
-                                   PRU_SLOWTIMO, (struct mbuf *)0,
-                                   (caddr_t)i);
-                       tmp++;
-               }
-               tp->t_xmt++;
-       }
-       tcp_iss += ISSINCR/2;           /* increment iss */
-       splx(s);
-}
-
-/*
- * Cancel all timers for tcp tp.
- */
-tcp_tcancel(tp)
-       struct tcpcb *tp;
-{
-       register short *tmp = &tp->t_init;
-       register int i;
-
-       for (i = 0; i < TNTIMERS; i++)
-               *tmp++ = 0;
-}
-
-struct tcpcb *tcp_newtcpcb();
-/*
- * Process a TCP user request for tcp tb.  If this is a send request
+ * Process a TCP user request for TCP tb.  If this is a send request
  * then m is the mbuf chain of send data.  If this is a timer expiration
  * (called from the software clock routine), then timertype tells which timer.
  */
  * then m is the mbuf chain of send data.  If this is a timer expiration
  * (called from the software clock routine), then timertype tells which timer.
  */
-tcp_usrreq(so, req, m, addr)
+/*ARGSUSED*/
+int
+tcp_usrreq(so, req, m, nam, control)
        struct socket *so;
        int req;
        struct socket *so;
        int req;
-       struct mbuf *m;
-       caddr_t addr;
+       struct mbuf *m, *nam, *control;
 {
 {
-       register struct inpcb *inp = sotoinpcb(so);
+       register struct inpcb *inp;
        register struct tcpcb *tp;
        register struct tcpcb *tp;
-       int s = splnet();
-       register int nstate;
-#ifdef TCPDEBUG
-       struct tcp_debug tdb;
-#endif
+       int s;
        int error = 0;
        int error = 0;
-COUNT(TCP_USRREQ);
+       int ostate;
 
 
+       if (req == PRU_CONTROL)
+               return (in_control(so, (int)m, (caddr_t)nam,
+                       (struct ifnet *)control));
+       if (control && control->m_len) {
+               m_freem(control);
+               if (m)
+                       m_freem(m);
+               return (EINVAL);
+       }
+
+       s = splnet();
+       inp = sotoinpcb(so);
        /*
        /*
-        * Make sure attached.  If not,
-        * only PRU_ATTACH is valid.
+        * When a TCP is attached to a socket, then there will be
+        * a (struct inpcb) pointed at by the socket, and this
+        * structure will point at a subsidary (struct tcpcb).
         */
         */
-#ifdef TCPDEBUG
-       tdb.td_tod = 0;
-#endif
-       if (inp == 0) {
-               if (req != PRU_ATTACH) {
-                       splx(s);
-                       return (EINVAL);
-               }
-       } else {
+       if (inp == 0 && req != PRU_ATTACH) {
+               splx(s);
+               return (EINVAL);                /* XXX */
+       }
+       if (inp) {
                tp = intotcpcb(inp);
                tp = intotcpcb(inp);
-               nstate = tp->t_state;
+               /* WHAT IF TP IS 0? */
 #ifdef KPROF
 #ifdef KPROF
-               tcp_acounts[nstate][req]++;
+               tcp_acounts[tp->t_state][req]++;
 #endif
 #endif
-#ifdef TCPDEBUG
-               if (((tp->t_socket->so_options & SO_DEBUG) || tcpconsdebug)) {
-                       tdb_setup(tp, (struct tcpiphdr *)0, req, &tdb);
-                       tdb.td_tim = timertype;
-               }
-#endif
-               tp->tc_flags &= ~TC_NET_KEEP;
-       }
-
+               ostate = tp->t_state;
+       } else
+               ostate = 0;
        switch (req) {
 
        switch (req) {
 
+       /*
+        * TCP attaches to socket via PRU_ATTACH, reserving space,
+        * and an internet control block.
+        */
        case PRU_ATTACH:
                if (inp) {
                        error = EISCONN;
                        break;
                }
        case PRU_ATTACH:
                if (inp) {
                        error = EISCONN;
                        break;
                }
-               tp = tcp_newtcpcb();
-               if (tp == 0) {
-                       error = ENOBUFS;
-                       break;
-               }
-               error = in_pcballoc(so, &tcb, 2048, 2048, (struct sockaddr_in *)addr);
-               if (error) {
-                       m_free(dtom(tp));
+               error = tcp_attach(so);
+               if (error)
                        break;
                        break;
-               }
-               inp = (struct inpcb *)so->so_pcb;
-               tp->t_inpcb = inp;
-               inp->inp_ppcb = (caddr_t)tp;
-               if (so->so_options & SO_ACCEPTCONN)
-                       nstate = LISTEN;
-               else
-                       nstate = CLOSED;
+               if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+                       so->so_linger = TCP_LINGERTIME;
+               tp = sototcpcb(so);
                break;
 
                break;
 
+       /*
+        * PRU_DETACH detaches the TCP protocol from the socket.
+        * If the protocol state is non-embryonic, then can't
+        * do this directly: have to initiate a PRU_DISCONNECT,
+        * which may finish later; embryonic TCB's can just
+        * be discarded here.
+        */
        case PRU_DETACH:
        case PRU_DETACH:
-               tcp_detach(tp);
+               if (tp->t_state > TCPS_LISTEN)
+                       tp = tcp_disconnect(tp);
+               else
+                       tp = tcp_close(tp);
                break;
 
                break;
 
-       case PRU_CONNECT:
-               if (tp->t_state != 0 && tp->t_state != CLOSED)
-                       goto bad;
-               error = in_pcbsetpeer(inp, (struct sockaddr_in *)addr);
+       /*
+        * Give the socket an address.
+        */
+       case PRU_BIND:
+               error = in_pcbbind(inp, nam);
                if (error)
                        break;
                if (error)
                        break;
-               (void) tcp_sndctl(tp);
-               nstate = SYN_SENT;
-               soisconnecting(so);
                break;
 
                break;
 
-       case PRU_ACCEPT:
-               soisconnected(so);
+       /*
+        * Prepare to accept connections.
+        */
+       case PRU_LISTEN:
+               if (inp->inp_lport == 0)
+                       error = in_pcbbind(inp, (struct mbuf *)0);
+               if (error == 0)
+                       tp->t_state = TCPS_LISTEN;
                break;
 
                break;
 
-       case PRU_DISCONNECT:
-               if ((tp->tc_flags & TC_FIN_RCVD) == 0)
-                       goto abort;
-               if (nstate < ESTAB)
-                       tcp_disconnect(tp);
-               else {
-                       tp->tc_flags |= TC_SND_FIN;
-                       (void) tcp_sndctl(tp);
-                       tp->tc_flags |= TC_USR_CLOSED;
-                       soisdisconnecting(so);
+       /*
+        * Initiate connection to peer.
+        * Create a template for use in transmissions on this connection.
+        * Enter SYN_SENT state, and mark socket as connecting.
+        * Start keep-alive timer, and seed output sequence space.
+        * Send initial segment on connection.
+        */
+       case PRU_CONNECT:
+               if (inp->inp_lport == 0) {
+                       error = in_pcbbind(inp, (struct mbuf *)0);
+                       if (error)
+                               break;
                }
                }
+               error = in_pcbconnect(inp, nam);
+               if (error)
+                       break;
+               tp->t_template = tcp_template(tp);
+               if (tp->t_template == 0) {
+                       in_pcbdisconnect(inp);
+                       error = ENOBUFS;
+                       break;
+               }
+               /* Compute window scaling to request.  */
+               while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+                   (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
+                       tp->request_r_scale++;
+               soisconnecting(so);
+               tcpstat.tcps_connattempt++;
+               tp->t_state = TCPS_SYN_SENT;
+               tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+               tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
+               tcp_sendseqinit(tp);
+               error = tcp_output(tp);
                break;
 
                break;
 
-       case PRU_SHUTDOWN:
-               switch (nstate) {
+       /*
+        * Create a TCP connection between two sockets.
+        */
+       case PRU_CONNECT2:
+               error = EOPNOTSUPP;
+               break;
 
 
-               case LISTEN:
-               case SYN_SENT:
-                       nstate = CLOSED;
-                       break;
+       /*
+        * Initiate disconnect from peer.
+        * If connection never passed embryonic stage, just drop;
+        * else if don't need to let data drain, then can just drop anyways,
+        * else have to begin TCP shutdown process: mark socket disconnecting,
+        * drain unread data, state switch to reflect user close, and
+        * send segment (e.g. FIN) to peer.  Socket will be really disconnected
+        * when peer sends FIN and acks ours.
+        *
+        * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+        */
+       case PRU_DISCONNECT:
+               tp = tcp_disconnect(tp);
+               break;
 
 
-               case SYN_RCVD:
-               case L_SYN_RCVD:
-               case ESTAB:     
-               case CLOSE_WAIT:
-                       tp->tc_flags |= TC_SND_FIN;
-                       (void) tcp_sndctl(tp);
-                       tp->tc_flags |= TC_USR_CLOSED;
-                       nstate = nstate != CLOSE_WAIT ? FIN_W1 : LAST_ACK;
-                       break;
-                       
-               case FIN_W1:
-               case FIN_W2:
-               case TIME_WAIT:
-               case CLOSING:
-               case LAST_ACK:
-               case RCV_WAIT:
-                       break;
+       /*
+        * Accept a connection.  Essentially all the work is
+        * done at higher levels; just return the address
+        * of the peer, storing through addr.
+        */
+       case PRU_ACCEPT:
+               in_setpeeraddr(inp, nam);
+               break;
 
 
-               default:
-                       goto bad;
-               }
+       /*
+        * Mark the connection as being incapable of further output.
+        */
+       case PRU_SHUTDOWN:
+               socantsendmore(so);
+               tp = tcp_usrclosed(tp);
+               if (tp)
+                       error = tcp_output(tp);
                break;
 
                break;
 
+       /*
+        * After a receive, possibly send window update to peer.
+        */
        case PRU_RCVD:
        case PRU_RCVD:
-               if (nstate < ESTAB || nstate == CLOSED)
-                       goto bad;
-               tcp_sndwin(tp);
-               if ((tp->tc_flags&TC_FIN_RCVD) &&
-                   (tp->tc_flags&TC_USR_CLOSED) == 0 &&
-                   rcv_empty(tp))
-                       error = ESHUTDOWN;
-               if (nstate == RCV_WAIT && rcv_empty(tp))
-                       nstate = CLOSED;
+               (void) tcp_output(tp);
                break;
 
                break;
 
+       /*
+        * Do a send by putting data in output queue and updating urgent
+        * marker if URG set.  Possibly send more data.
+        */
        case PRU_SEND:
        case PRU_SEND:
-               switch (nstate) {
-
-               case ESTAB:
-               case CLOSE_WAIT:
-                       tcp_usrsend(tp, m);
-                       break;
-               
-               default:
-                       if (nstate < ESTAB)
-                               goto bad;
-                       m_freem(m);
-                       error = ENOTCONN;
-                       break;
-               }
+               sbappend(&so->so_snd, m);
+               error = tcp_output(tp);
                break;
 
                break;
 
-abort:
+       /*
+        * Abort the TCP.
+        */
        case PRU_ABORT:
        case PRU_ABORT:
-               tcp_abort(tp);
-               nstate = CLOSED;
+               tp = tcp_drop(tp, ECONNABORTED);
                break;
 
                break;
 
-       case PRU_CONTROL:
-               error = EOPNOTSUPP;
-               break;
-
-       case PRU_SLOWTIMO:
-               switch (nstate) {
+       case PRU_SENSE:
+               ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+               (void) splx(s);
+               return (0);
 
 
-               case 0:
-               case CLOSED:
-               case LISTEN:
-                       goto bad;
+       case PRU_RCVOOB:
+               if ((so->so_oobmark == 0 &&
+                   (so->so_state & SS_RCVATMARK) == 0) ||
+                   so->so_options & SO_OOBINLINE ||
+                   tp->t_oobflags & TCPOOB_HADDATA) {
+                       error = EINVAL;
+                       break;
+               }
+               if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
+                       error = EWOULDBLOCK;
+                       break;
+               }
+               m->m_len = 1;
+               *mtod(m, caddr_t) = tp->t_iobc;
+               if (((int)nam & MSG_PEEK) == 0)
+                       tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+               break;
 
 
-               default:
-                       nstate = tcp_timers(tp, (int)addr);
+       case PRU_SENDOOB:
+               if (sbspace(&so->so_snd) < -512) {
+                       m_freem(m);
+                       error = ENOBUFS;
+                       break;
                }
                }
+               /*
+                * According to RFC961 (Assigned Protocols),
+                * the urgent pointer points to the last octet
+                * of urgent data.  We continue, however,
+                * to consider it to indicate the first octet
+                * of data past the urgent section.
+                * Otherwise, snd_up should be one lower.
+                */
+               sbappend(&so->so_snd, m);
+               tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
+               tp->t_force = 1;
+               error = tcp_output(tp);
+               tp->t_force = 0;
                break;
 
                break;
 
-       default:
-               panic("tcp_usrreq");
-       bad:
-               printf("tcp: bad state: tcb=%x state=%d input=%d\n",
-                   tp, tp->t_state, req);
-               nstate = EFAILEC;
+       case PRU_SOCKADDR:
+               in_setsockaddr(inp, nam);
                break;
                break;
-       }
-#ifdef TCPDEBUG
-       if (tdb.td_tod)
-               tdb_stuff(&tdb, nstate);
-#endif
-       switch (nstate) {
 
 
-       case CLOSED:
-       case SAME:
+       case PRU_PEERADDR:
+               in_setpeeraddr(inp, nam);
                break;
 
                break;
 
-       case EFAILEC:
-               if (m)
-                       m_freem(dtom(m));
+       /*
+        * TCP slow timer went off; going through this
+        * routine for tracing's sake.
+        */
+       case PRU_SLOWTIMO:
+               tp = tcp_timers(tp, (int)nam);
+               req |= (int)nam << 8;           /* for debug's sake */
                break;
 
        default:
                break;
 
        default:
-               tp->t_state = nstate;
-               break;
+               panic("tcp_usrreq");
        }
        }
+       if (tp && (so->so_options & SO_DEBUG))
+               tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
        splx(s);
        return (error);
 }
 
        splx(s);
        return (error);
 }
 
-struct tcpcb *
-tcp_newtcpcb()
-{
-       struct mbuf *m = m_getclr(0);
-       register struct tcpcb *tp;
-COUNT(TCP_NEWTCPCB);
-
-       if (m == 0)
-               return (0);
-       tp = mtod(m, struct tcpcb *);
-
-       /*
-        * Make empty reassembly queue.
-        */
-       tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
-
-       /*
-        * Initialize sequence numbers and round trip retransmit timer.
-        */
-       tp->t_xmtime = T_REXMT;
-       tp->snd_end = tp->seq_fin = tp->snd_nxt = tp->snd_hi = tp->snd_una =
-           tp->iss = tcp_iss;
-       tp->snd_off = tp->iss + 1;
-       tcp_iss += (ISSINCR >> 1) + 1;
-       return (tp);
-}
-
-tcp_detach(tp)
-       struct tcpcb *tp;
-{
-COUNT(TCP_DETACH);
-
-       in_pcbfree(tp->t_inpcb);
-       (void) m_free(dtom(tp));
-}
-
-tcp_disconnect(tp)
-       register struct tcpcb *tp;
+int
+tcp_ctloutput(op, so, level, optname, mp)
+       int op;
+       struct socket *so;
+       int level, optname;
+       struct mbuf **mp;
 {
 {
-       register struct tcpiphdr *t;
-
-COUNT(TCP_DISCONNECT);
-       tcp_tcancel(tp);
-       t = tp->seg_next;
-       for (; t != (struct tcpiphdr *)tp; t = (struct tcpiphdr *)t->ti_next)
-               m_freem(dtom(t));
-       tcp_drainunack(tp);
-       if (tp->t_template) {
-               (void) m_free(dtom(tp->t_template));
-               tp->t_template = 0;
-       }
-       in_pcbfree(tp->t_inpcb);
-}
-
-tcp_abort(tp)
+       int error = 0, s;
+       struct inpcb *inp;
        register struct tcpcb *tp;
        register struct tcpcb *tp;
-{
-
-COUNT(TCP_ABORT);
-       switch (tp->t_state) {
+       register struct mbuf *m;
+       register int i;
 
 
-       case SYN_RCVD:
-       case ESTAB:
-       case FIN_W1:
-       case FIN_W2:
-       case CLOSE_WAIT:
-               tp->tc_flags |= TC_SND_RST;
-               tcp_sndnull(tp);
+       s = splnet();
+       inp = sotoinpcb(so);
+       if (inp == NULL) {
+               splx(s);
+               if (op == PRCO_SETOPT && *mp)
+                       (void) m_free(*mp);
+               return (ECONNRESET);
        }
        }
-       soisdisconnected(tp->t_inpcb->inp_socket);
-}
-
-/*
- * Send data queue headed by m0 into the protocol.
- */
-tcp_usrsend(tp, m0)
-       register struct tcpcb *tp;
-       struct mbuf *m0;
-{
-       register struct socket *so = tp->t_inpcb->inp_socket;
-COUNT(TCP_USRSEND);
-
-       sbappend(&so->so_snd, m0);
-       if (tp->t_options & TO_EOL)
-               tp->snd_end = tp->snd_off + so->so_snd.sb_cc;
-       if (tp->t_options & TO_URG) {
-               tp->snd_urp = tp->snd_off + so->so_snd.sb_cc + 1;
-               tp->tc_flags |= TC_SND_URG;
+       if (level != IPPROTO_TCP) {
+               error = ip_ctloutput(op, so, level, optname, mp);
+               splx(s);
+               return (error);
        }
        }
-       (void) tcp_send(tp);
-}
+       tp = intotcpcb(inp);
 
 
-/*
- * TCP timer went off processing.
- */
-tcp_timers(tp, timertype)
-       register struct tcpcb *tp;
-       int timertype;
-{
-
-COUNT(TCP_TIMERS);
-       switch (timertype) {
+       switch (op) {
 
 
-       case TFINACK:           /* fin-ack timer */
-               switch (tp->t_state) {
+       case PRCO_SETOPT:
+               m = *mp;
+               switch (optname) {
 
 
-               case TIME_WAIT:
-                       /*
-                        * We can be sure our ACK of foreign FIN was rcvd,
-                        * and can close if no data left for user.
-                        */
-                       if (rcv_empty(tp)) {
-                               tcp_disconnect(tp);
-                               return (CLOSED);
-                       }
-                       return (RCV_WAIT);                      /* 17 */
+               case TCP_NODELAY:
+                       if (m == NULL || m->m_len < sizeof (int))
+                               error = EINVAL;
+                       else if (*mtod(m, int *))
+                               tp->t_flags |= TF_NODELAY;
+                       else
+                               tp->t_flags &= ~TF_NODELAY;
+                       break;
 
 
-               case CLOSING:
-                       tp->tc_flags |= TC_WAITED_2_ML;
-                       return (SAME);
+               case TCP_MAXSEG:
+                       if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
+                               tp->t_maxseg = i;
+                       else
+                               error = EINVAL;
+                       break;
 
                default:
 
                default:
-                       return (SAME);
+                       error = ENOPROTOOPT;
+                       break;
                }
                }
+               if (m)
+                       (void) m_free(m);
+               break;
 
 
-       case TREXMT:            /* retransmission timer */
-               if (tp->t_rexmt_val > tp->snd_una) {            /* 34 */
-                       /*
-                        * Set so for a retransmission, increase rexmt time
-                        * in case of multiple retransmissions.
-                        */
-                       tp->snd_nxt = tp->snd_una;
-                       tp->tc_flags |= TC_REXMT;
-                       tp->t_xmtime = tp->t_xmtime << 1;
-                       if (tp->t_xmtime > T_REMAX)
-                               tp->t_xmtime = T_REMAX;
-                       (void) tcp_send(tp);
-               }
-               return (SAME);
+       case PRCO_GETOPT:
+               *mp = m = m_get(M_WAIT, MT_SOOPTS);
+               m->m_len = sizeof(int);
 
 
-       case TREXMTTL:          /* retransmit too long */
-               if (tp->t_rtl_val > tp->snd_una)                /* 36 */
-                       tcp_error(tp, EIO);             /* URXTIMO !?! */
-               /*
-                * If user has already closed, abort the connection.
-                */
-               if (tp->tc_flags & TC_USR_CLOSED) {
-                       tcp_abort(tp);
-                       return (CLOSED);
+               switch (optname) {
+               case TCP_NODELAY:
+                       *mtod(m, int *) = tp->t_flags & TF_NODELAY;
+                       break;
+               case TCP_MAXSEG:
+                       *mtod(m, int *) = tp->t_maxseg;
+                       break;
+               default:
+                       error = ENOPROTOOPT;
+                       break;
                }
                }
-               return (SAME);
-
-       case TPERSIST:          /* persist timer */
-               /*
-                * Force a byte send through closed window.
-                */
-               tp->tc_flags |= TC_FORCE_ONE;
-               (void) tcp_send(tp);
-               return (SAME);
+               break;
        }
        }
-       panic("tcp_timers");
-       /*NOTREACHED*/
+       splx(s);
+       return (error);
 }
 
 }
 
-/*ARGSUSED*/
-tcp_sense(m)
-       struct mbuf *m;
+u_long tcp_sendspace = 1024*8;
+u_long tcp_recvspace = 1024*8;
+
+/*
+ * Attach TCP protocol to socket, allocating
+ * internet protocol control block, tcp control block,
+ * bufer space, and entering LISTEN state if to accept connections.
+ */
+int
+tcp_attach(so)
+       struct socket *so;
 {
 {
+       register struct tcpcb *tp;
+       struct inpcb *inp;
+       int error;
 
 
-COUNT(TCP_SENSE);
-       return (EOPNOTSUPP);
+       if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+               error = soreserve(so, tcp_sendspace, tcp_recvspace);
+               if (error)
+                       return (error);
+       }
+       error = in_pcballoc(so, &tcb);
+       if (error)
+               return (error);
+       inp = sotoinpcb(so);
+       tp = tcp_newtcpcb(inp);
+       if (tp == 0) {
+               int nofd = so->so_state & SS_NOFDREF;   /* XXX */
+
+               so->so_state &= ~SS_NOFDREF;    /* don't free the socket yet */
+               in_pcbdetach(inp);
+               so->so_state |= nofd;
+               return (ENOBUFS);
+       }
+       tp->t_state = TCPS_CLOSED;
+       return (0);
 }
 
 }
 
-tcp_error(tp, errno)
-       struct tcpcb *tp;
-       int errno;
+/*
+ * Initiate (or continue) disconnect.
+ * If embryonic state, just send reset (once).
+ * If in ``let data drain'' option and linger null, just drop.
+ * Otherwise (hard), mark socket disconnecting and drop
+ * current input data; switch states based on user close, and
+ * send segment to peer (with FIN).
+ */
+struct tcpcb *
+tcp_disconnect(tp)
+       register struct tcpcb *tp;
 {
        struct socket *so = tp->t_inpcb->inp_socket;
 
 {
        struct socket *so = tp->t_inpcb->inp_socket;
 
-COUNT(TCP_ERROR);
-       so->so_error = errno;
-       sorwakeup(so);
-       sowwakeup(so);
+       if (tp->t_state < TCPS_ESTABLISHED)
+               tp = tcp_close(tp);
+       else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+               tp = tcp_drop(tp, 0);
+       else {
+               soisdisconnecting(so);
+               sbflush(&so->so_rcv);
+               tp = tcp_usrclosed(tp);
+               if (tp)
+                       (void) tcp_output(tp);
+       }
+       return (tp);
 }
 
 }
 
-#ifdef TCPDEBUG
 /*
 /*
- * TCP debugging utility subroutines.
- * THE NAMES OF THE FIELDS USED BY THESE ROUTINES ARE STUPID.
+ * User issued close, and wish to trail through shutdown states:
+ * if never received SYN, just forget it.  If got a SYN from peer,
+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
+ * If already got a FIN from peer, then almost done; go to LAST_ACK
+ * state.  In all other cases, have already sent FIN to peer (e.g.
+ * after PRU_SHUTDOWN), and just have to play tedious game waiting
+ * for peer to send FIN or not respond to keep-alives, etc.
+ * We can let the user exit from the close as soon as the FIN is acked.
  */
  */
-tdb_setup(tp, n, input, tdp)
-       struct tcpcb *tp;
-       register struct tcpiphdr *n;
-       int input;
-       register struct tcp_debug *tdp;
+struct tcpcb *
+tcp_usrclosed(tp)
+       register struct tcpcb *tp;
 {
 
 {
 
-COUNT(TDB_SETUP);
-       tdp->td_tod = time;
-       tdp->td_tcb = tp;
-       tdp->td_old = tp->t_state;
-       tdp->td_inp = input;
-       tdp->td_tim = 0;
-       tdp->td_new = -1;
-       if (n) {
-               tdp->td_sno = n->ti_seq;
-               tdp->td_ano = n->ti_ackno;
-               tdp->td_wno = n->t_win;
-               tdp->td_lno = n->ti_len;
-               tdp->td_flg = n->ti_flags;
-       } else
-               tdp->td_sno = tdp->td_ano = tdp->td_wno = tdp->td_lno =
-                   tdp->td_flg = 0;
-}
+       switch (tp->t_state) {
 
 
-tdb_stuff(tdp, nstate)
-       struct tcp_debug *tdp;
-       int nstate;
-{
-COUNT(TDB_STUFF);
+       case TCPS_CLOSED:
+       case TCPS_LISTEN:
+       case TCPS_SYN_SENT:
+               tp->t_state = TCPS_CLOSED;
+               tp = tcp_close(tp);
+               break;
 
 
-       tdp->td_new = nstate;
-       tcp_debug[tdbx++ % TDBSIZE] = *tdp;
-       if (tcpconsdebug & 2)
-               tcp_prt(tdp);
-}
+       case TCPS_SYN_RECEIVED:
+       case TCPS_ESTABLISHED:
+               tp->t_state = TCPS_FIN_WAIT_1;
+               break;
 
 
-tcp_prt(tdp)
-       register struct tcp_debug *tdp;
-{
-COUNT(TCP_PRT);
-
-       printf("%x ", ((int)tdp->td_tcb)&0xffffff);
-       if (tdp->td_inp == INSEND) {
-               printf("SEND #%x", tdp->td_sno);
-               tdp->td_lno = ntohs(tdp->td_lno);
-               tdp->td_wno = ntohs(tdp->td_wno);
-       } else {
-               if (tdp->td_inp == INRECV)
-                       printf("RCV #%x ", tdp->td_sno);
-               printf("%s.%s",
-                   tcpstates[tdp->td_old], tcpinputs[tdp->td_inp]);
-               if (tdp->td_inp == ISTIMER)
-                       printf("(%s)", tcptimers[tdp->td_tim]);
-               printf(" -> %s",
-                   tcpstates[(tdp->td_new > 0) ? tdp->td_new : tdp->td_old]);
-               if (tdp->td_new == -1)
-                       printf(" (FAILED)");
+       case TCPS_CLOSE_WAIT:
+               tp->t_state = TCPS_LAST_ACK;
+               break;
        }
        }
-       /* GROSS... DEPENDS ON SIGN EXTENSION OF CHARACTERS */
-       if (tdp->td_lno)
-               printf(" len=%d", tdp->td_lno);
-       if (tdp->td_wno)
-               printf(" win=%d", tdp->td_wno);
-       if (tdp->td_flg & TH_FIN) printf(" FIN");
-       if (tdp->td_flg & TH_SYN) printf(" SYN");
-       if (tdp->td_flg & TH_RST) printf(" RST");
-       if (tdp->td_flg & TH_EOL) printf(" EOL");
-       if (tdp->td_flg & TH_ACK)  printf(" ACK %x", tdp->td_ano);
-       if (tdp->td_flg & TH_URG) printf(" URG");
-       printf("\n");
+       if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
+               soisdisconnected(tp->t_inpcb->inp_socket);
+       return (tp);
 }
 }
-#endif