new signals
[unix-history] / usr / src / sys / netinet / tcp_usrreq.c
index 05c5a58..ebed585 100644 (file)
@@ -1,25 +1,47 @@
-/* tcp_usrreq.c 1.48 82/01/17 */
-
-#include "../h/param.h"
-#include "../h/systm.h"
-#include "../h/mbuf.h"
-#include "../h/socket.h"
-#include "../h/socketvar.h"
-#include "../h/protosw.h"
-#include "../net/in.h"
-#include "../net/in_pcb.h"
-#include "../net/in_systm.h"
+/*
+ * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *     @(#)tcp_usrreq.c        7.12 (Berkeley) %G%
+ */
+
+#include "param.h"
+#include "systm.h"
+#include "malloc.h"
+#include "mbuf.h"
+#include "socket.h"
+#include "socketvar.h"
+#include "protosw.h"
+#include "errno.h"
+#include "stat.h"
+
 #include "../net/if.h"
 #include "../net/if.h"
-#include "../net/ip.h"
-#include "../net/ip_var.h"
-#include "../net/tcp.h"
-#include "../net/tcp_fsm.h"
-#include "../net/tcp_seq.h"
-#include "../net/tcp_timer.h"
-#include "../net/tcp_var.h"
-#include "../net/tcpip.h"
-#include "../net/tcp_debug.h"
-#include "../errno.h"
+#include "../net/route.h"
+
+#include "in.h"
+#include "in_pcb.h"
+#include "in_systm.h"
+#include "ip.h"
+#include "ip_var.h"
+#include "tcp.h"
+#include "tcp_fsm.h"
+#include "tcp_seq.h"
+#include "tcp_timer.h"
+#include "tcp_var.h"
+#include "tcpip.h"
+#include "tcp_debug.h"
 
 /*
  * TCP protocol interface to socket abstraction.
 
 /*
  * TCP protocol interface to socket abstraction.
@@ -32,33 +54,35 @@ struct      tcpcb *tcp_newtcpcb();
  * then m is the mbuf chain of send data.  If this is a timer expiration
  * (called from the software clock routine), then timertype tells which timer.
  */
  * then m is the mbuf chain of send data.  If this is a timer expiration
  * (called from the software clock routine), then timertype tells which timer.
  */
-tcp_usrreq(so, req, m, addr)
+/*ARGSUSED*/
+tcp_usrreq(so, req, m, nam, rights)
        struct socket *so;
        int req;
        struct socket *so;
        int req;
-       struct mbuf *m;
-       caddr_t addr;
+       struct mbuf *m, *nam, *rights;
 {
 {
-       register struct inpcb *inp = sotoinpcb(so);
+       register struct inpcb *inp;
        register struct tcpcb *tp;
        register struct tcpcb *tp;
-       int s = splnet();
+       int s;
        int error = 0;
        int ostate;
        int error = 0;
        int ostate;
-COUNT(TCP_USRREQ);
 
 
+#if BSD>=43
+       if (req == PRU_CONTROL)
+               return (in_control(so, (int)m, (caddr_t)nam,
+                       (struct ifnet *)rights));
+#else
+       if (req == PRU_CONTROL)
+               return(EOPNOTSUPP);
+#endif
+       if (rights && rights->m_len)
+               return (EINVAL);
+
+       s = splnet();
+       inp = sotoinpcb(so);
        /*
         * When a TCP is attached to a socket, then there will be
         * a (struct inpcb) pointed at by the socket, and this
         * structure will point at a subsidary (struct tcpcb).
        /*
         * When a TCP is attached to a socket, then there will be
         * a (struct inpcb) pointed at by the socket, and this
         * structure will point at a subsidary (struct tcpcb).
-        * The normal sequence of events is:
-        *      PRU_ATTACH              creating these structures
-        *      PRU_CONNECT             connecting to a remote peer
-        *      (PRU_SEND|PRU_RCVD)*    exchanging data
-        *      PRU_DISCONNECT          disconnecting from remote peer
-        *      PRU_DETACH              deleting the structures
-        * With the operations from PRU_CONNECT through PRU_DISCONNECT
-        * possible repeated several times.
-        *
-        * MULTIPLE CONNECTS ARE NOT YET IMPLEMENTED.
         */
        if (inp == 0 && req != PRU_ATTACH) {
                splx(s);
         */
        if (inp == 0 && req != PRU_ATTACH) {
                splx(s);
@@ -66,28 +90,28 @@ COUNT(TCP_USRREQ);
        }
        if (inp) {
                tp = intotcpcb(inp);
        }
        if (inp) {
                tp = intotcpcb(inp);
+               /* WHAT IF TP IS 0? */
 #ifdef KPROF
                tcp_acounts[tp->t_state][req]++;
 #endif
                ostate = tp->t_state;
 #ifdef KPROF
                tcp_acounts[tp->t_state][req]++;
 #endif
                ostate = tp->t_state;
-       }
+       } else
+               ostate = 0;
        switch (req) {
 
        /*
         * TCP attaches to socket via PRU_ATTACH, reserving space,
        switch (req) {
 
        /*
         * TCP attaches to socket via PRU_ATTACH, reserving space,
-        * and internet and TCP control blocks.
-        * If the socket is to receive connections,
-        * then the LISTEN state is entered.
+        * and an internet control block.
         */
        case PRU_ATTACH:
                if (inp) {
                        error = EISCONN;
                        break;
                }
         */
        case PRU_ATTACH:
                if (inp) {
                        error = EISCONN;
                        break;
                }
-               error = tcp_attach(so, (struct sockaddr *)addr);
+               error = tcp_attach(so);
                if (error)
                        break;
                if (error)
                        break;
-               if ((so->so_options & SO_DONTLINGER) == 0)
+               if ((so->so_options & SO_LINGER) && so->so_linger == 0)
                        so->so_linger = TCP_LINGERTIME;
                tp = sototcpcb(so);
                break;
                        so->so_linger = TCP_LINGERTIME;
                tp = sototcpcb(so);
                break;
@@ -101,11 +125,28 @@ COUNT(TCP_USRREQ);
         */
        case PRU_DETACH:
                if (tp->t_state > TCPS_LISTEN)
         */
        case PRU_DETACH:
                if (tp->t_state > TCPS_LISTEN)
-                       tcp_disconnect(tp);
-               else {
-                       tcp_close(tp);
-                       tp = 0;
-               }
+                       tp = tcp_disconnect(tp);
+               else
+                       tp = tcp_close(tp);
+               break;
+
+       /*
+        * Give the socket an address.
+        */
+       case PRU_BIND:
+               error = in_pcbbind(inp, nam);
+               if (error)
+                       break;
+               break;
+
+       /*
+        * Prepare to accept connections.
+        */
+       case PRU_LISTEN:
+               if (inp->inp_lport == 0)
+                       error = in_pcbbind(inp, (struct mbuf *)0);
+               if (error == 0)
+                       tp->t_state = TCPS_LISTEN;
                break;
 
        /*
                break;
 
        /*
@@ -116,7 +157,12 @@ COUNT(TCP_USRREQ);
         * Send initial segment on connection.
         */
        case PRU_CONNECT:
         * Send initial segment on connection.
         */
        case PRU_CONNECT:
-               error = in_pcbconnect(inp, (struct sockaddr_in *)addr);
+               if (inp->inp_lport == 0) {
+                       error = in_pcbbind(inp, (struct mbuf *)0);
+                       if (error)
+                               break;
+               }
+               error = in_pcbconnect(inp, nam);
                if (error)
                        break;
                tp->t_template = tcp_template(tp);
                if (error)
                        break;
                tp->t_template = tcp_template(tp);
@@ -126,11 +172,19 @@ COUNT(TCP_USRREQ);
                        break;
                }
                soisconnecting(so);
                        break;
                }
                soisconnecting(so);
+               tcpstat.tcps_connattempt++;
                tp->t_state = TCPS_SYN_SENT;
                tp->t_state = TCPS_SYN_SENT;
-               tp->t_timer[TCPT_KEEP] = TCPTV_KEEP;
+               tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
                tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
                tcp_sendseqinit(tp);
                tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
                tcp_sendseqinit(tp);
-               (void) tcp_output(tp);
+               error = tcp_output(tp);
+               break;
+
+       /*
+        * Create a TCP connection between two sockets.
+        */
+       case PRU_CONNECT2:
+               error = EOPNOTSUPP;
                break;
 
        /*
                break;
 
        /*
@@ -145,7 +199,7 @@ COUNT(TCP_USRREQ);
         * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
         */
        case PRU_DISCONNECT:
         * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
         */
        case PRU_DISCONNECT:
-               tcp_disconnect(tp);
+               tp = tcp_disconnect(tp);
                break;
 
        /*
                break;
 
        /*
@@ -153,17 +207,25 @@ COUNT(TCP_USRREQ);
         * done at higher levels; just return the address
         * of the peer, storing through addr.
         */
         * done at higher levels; just return the address
         * of the peer, storing through addr.
         */
-       case PRU_ACCEPT:
-               in_pcbconnaddr(inp, (struct sockaddr *)addr);
+       case PRU_ACCEPT: {
+               struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
+
+               nam->m_len = sizeof (struct sockaddr_in);
+               sin->sin_family = AF_INET;
+               sin->sin_len = sizeof(*sin);
+               sin->sin_port = inp->inp_fport;
+               sin->sin_addr = inp->inp_faddr;
                break;
                break;
+               }
 
        /*
         * Mark the connection as being incapable of further output.
         */
        case PRU_SHUTDOWN:
                socantsendmore(so);
 
        /*
         * Mark the connection as being incapable of further output.
         */
        case PRU_SHUTDOWN:
                socantsendmore(so);
-               tcp_usrclosed(tp);
-               (void) tcp_output(tp);
+               tp = tcp_usrclosed(tp);
+               if (tp)
+                       error = tcp_output(tp);
                break;
 
        /*
                break;
 
        /*
@@ -179,82 +241,68 @@ COUNT(TCP_USRREQ);
         */
        case PRU_SEND:
                sbappend(&so->so_snd, m);
         */
        case PRU_SEND:
                sbappend(&so->so_snd, m);
-/*
-               if (tp->t_flags & TF_PUSH)
-                       tp->snd_end = tp->snd_una + so->so_snd.sb_cc;
- */
-               (void) tcp_output(tp);
+               error = tcp_output(tp);
                break;
 
        /*
         * Abort the TCP.
         */
        case PRU_ABORT:
                break;
 
        /*
         * Abort the TCP.
         */
        case PRU_ABORT:
-               tcp_drop(tp, ECONNABORTED);
-               break;
-
-/* SOME AS YET UNIMPLEMENTED HOOKS */
-       case PRU_CONTROL:
-               error = EOPNOTSUPP;
+               tp = tcp_drop(tp, ECONNABORTED);
                break;
 
        case PRU_SENSE:
                break;
 
        case PRU_SENSE:
-               error = EOPNOTSUPP;
-               break;
-/* END UNIMPLEMENTED HOOKS */
+               ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
+               (void) splx(s);
+               return (0);
 
        case PRU_RCVOOB:
 
        case PRU_RCVOOB:
-#if TCPTRUEOOB
-               if (tp->t_flags & TF_DOOOB) {
-                       if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
-                               error = EWOULDBLOCK;
-                               break;
-                       }
-                       *mtod(m, caddr_t) = tp->t_iobc;
-                       tp->t_oobflags &= ~TCPOOB_HAVEDATA;
-                       break;
-               }
+               if ((so->so_oobmark == 0 &&
+                   (so->so_state & SS_RCVATMARK) == 0) ||
+#ifdef SO_OOBINLINE
+                   so->so_options & SO_OOBINLINE ||
 #endif
 #endif
-               if (so->so_oobmark == 0 &&
-                   (so->so_state & SS_RCVATMARK) == 0) {
+                   tp->t_oobflags & TCPOOB_HADDATA) {
                        error = EINVAL;
                        break;
                }
                        error = EINVAL;
                        break;
                }
-               if (so->so_rcv.sb_cc < so->so_oobmark) {
+               if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
                        error = EWOULDBLOCK;
                        error = EWOULDBLOCK;
-                       return;
-               }
-               { struct mbuf *n = so->so_rcv.sb_mb;
-                 int cnt = so->so_oobmark;
-                 while (cnt > n->m_len) {
-                       cnt -= n->m_len;
-                       n = n->m_next;
-                 }
-                 *mtod(m, caddr_t) = *(mtod(n, caddr_t) + cnt);
+                       break;
                }
                }
-               tp->t_oobflags &= ~TCPOOB_HAVEDATA;
+               m->m_len = 1;
+               *mtod(m, caddr_t) = tp->t_iobc;
+               if (((int)nam & MSG_PEEK) == 0)
+                       tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
                break;
 
        case PRU_SENDOOB:
                if (sbspace(&so->so_snd) < -512) {
                break;
 
        case PRU_SENDOOB:
                if (sbspace(&so->so_snd) < -512) {
+                       m_freem(m);
                        error = ENOBUFS;
                        break;
                }
                        error = ENOBUFS;
                        break;
                }
-               tp->snd_up = tp->snd_una + so->so_snd.sb_cc + 1;
+               /*
+                * According to RFC961 (Assigned Protocols),
+                * the urgent pointer points to the last octet
+                * of urgent data.  We continue, however,
+                * to consider it to indicate the first octet
+                * of data past the urgent section.
+                * Otherwise, snd_up should be one lower.
+                */
                sbappend(&so->so_snd, m);
                sbappend(&so->so_snd, m);
-/*
-               if (tp->t_flags & TF_PUSH)
-                       tp->snd_end = tp->snd_una + so->so_snd.sb_cc;
- */
-#ifdef TCPTRUEOOB
-               if (tp->t_flags & TF_DOOOB) {
-                       tp->t_oobseq++;
-                       tp->t_oobc = *mtod(m, caddr_t);
-printf("sendoob seq now %x oobc %x\n", tp->t_oobseq, tp->t_oobc);
-                       tp->t_oobflags |= TCPOOB_NEEDACK;
-               }
-#endif
-               tp->t_force = 1; (void) tcp_output(tp); tp->t_force = 0;
+               tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
+               tp->t_force = 1;
+               error = tcp_output(tp);
+               tp->t_force = 0;
+               break;
+
+       case PRU_SOCKADDR:
+               in_setsockaddr(inp, nam);
+               break;
+
+       case PRU_PEERADDR:
+               in_setpeeraddr(inp, nam);
                break;
 
        /*
                break;
 
        /*
@@ -262,8 +310,8 @@ printf("sendoob seq now %x oobc %x\n", tp->t_oobseq, tp->t_oobc);
         * routine for tracing's sake.
         */
        case PRU_SLOWTIMO:
         * routine for tracing's sake.
         */
        case PRU_SLOWTIMO:
-               tcp_timers(tp, (int)addr);
-               req |= (int)addr << 8;          /* for debug's sake */
+               tp = tcp_timers(tp, (int)nam);
+               req |= (int)nam << 8;           /* for debug's sake */
                break;
 
        default:
                break;
 
        default:
@@ -275,58 +323,129 @@ printf("sendoob seq now %x oobc %x\n", tp->t_oobseq, tp->t_oobc);
        return (error);
 }
 
        return (error);
 }
 
+#if BSD>=43
+tcp_ctloutput(op, so, level, optname, mp)
+       int op;
+       struct socket *so;
+       int level, optname;
+       struct mbuf **mp;
+{
+       int error = 0;
+       struct inpcb *inp = sotoinpcb(so);
+       register struct tcpcb *tp = intotcpcb(inp);
+       register struct mbuf *m;
+
+       if (level != IPPROTO_TCP)
+               return (ip_ctloutput(op, so, level, optname, mp));
+
+       switch (op) {
+
+       case PRCO_SETOPT:
+               m = *mp;
+               switch (optname) {
+
+               case TCP_NODELAY:
+                       if (m == NULL || m->m_len < sizeof (int))
+                               error = EINVAL;
+                       else if (*mtod(m, int *))
+                               tp->t_flags |= TF_NODELAY;
+                       else
+                               tp->t_flags &= ~TF_NODELAY;
+                       break;
+
+               case TCP_MAXSEG:        /* not yet */
+               default:
+                       error = EINVAL;
+                       break;
+               }
+               if (m)
+                       (void) m_free(m);
+               break;
+
+       case PRCO_GETOPT:
+               *mp = m = m_get(M_WAIT, MT_SOOPTS);
+               m->m_len = sizeof(int);
+
+               switch (optname) {
+               case TCP_NODELAY:
+                       *mtod(m, int *) = tp->t_flags & TF_NODELAY;
+                       break;
+               case TCP_MAXSEG:
+                       *mtod(m, int *) = tp->t_maxseg;
+                       break;
+               default:
+                       error = EINVAL;
+                       break;
+               }
+               break;
+       }
+       return (error);
+}
+#endif
+
+u_long tcp_sendspace = 1024*4;
+u_long tcp_recvspace = 1024*4;
+
 /*
  * Attach TCP protocol to socket, allocating
  * internet protocol control block, tcp control block,
  * bufer space, and entering LISTEN state if to accept connections.
  */
 /*
  * Attach TCP protocol to socket, allocating
  * internet protocol control block, tcp control block,
  * bufer space, and entering LISTEN state if to accept connections.
  */
-tcp_attach(so, sa)
+tcp_attach(so)
        struct socket *so;
        struct socket *so;
-       struct sockaddr *sa;
 {
        register struct tcpcb *tp;
        struct inpcb *inp;
        int error;
 
 {
        register struct tcpcb *tp;
        struct inpcb *inp;
        int error;
 
-       error = in_pcbattach(so, &tcb, 2048, 2048, (struct sockaddr_in *)sa);
+       if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
+               error = soreserve(so, tcp_sendspace, tcp_recvspace);
+               if (error)
+                       return (error);
+       }
+       error = in_pcballoc(so, &tcb);
        if (error)
                return (error);
        if (error)
                return (error);
-       inp = (struct inpcb *)so->so_pcb;
+       inp = sotoinpcb(so);
        tp = tcp_newtcpcb(inp);
        tp = tcp_newtcpcb(inp);
-       if (so->so_options & SO_ACCEPTCONN) {
-               if (tp == 0) {
-                       in_pcbdetach(inp);
-                       return (ENOBUFS);
-               }
-               tp->t_state = TCPS_LISTEN;
-       } else
-               tp->t_state = TCPS_CLOSED;
+       if (tp == 0) {
+               int nofd = so->so_state & SS_NOFDREF;   /* XXX */
+
+               so->so_state &= ~SS_NOFDREF;    /* don't free the socket yet */
+               in_pcbdetach(inp);
+               so->so_state |= nofd;
+               return (ENOBUFS);
+       }
+       tp->t_state = TCPS_CLOSED;
        return (0);
 }
 
 /*
  * Initiate (or continue) disconnect.
  * If embryonic state, just send reset (once).
        return (0);
 }
 
 /*
  * Initiate (or continue) disconnect.
  * If embryonic state, just send reset (once).
- * If not in ``let data drain'' option, just drop.
+ * If in ``let data drain'' option and linger null, just drop.
  * Otherwise (hard), mark socket disconnecting and drop
  * current input data; switch states based on user close, and
  * send segment to peer (with FIN).
  */
  * Otherwise (hard), mark socket disconnecting and drop
  * current input data; switch states based on user close, and
  * send segment to peer (with FIN).
  */
+struct tcpcb *
 tcp_disconnect(tp)
 tcp_disconnect(tp)
-       struct tcpcb *tp;
+       register struct tcpcb *tp;
 {
        struct socket *so = tp->t_inpcb->inp_socket;
 
        if (tp->t_state < TCPS_ESTABLISHED)
 {
        struct socket *so = tp->t_inpcb->inp_socket;
 
        if (tp->t_state < TCPS_ESTABLISHED)
-               tcp_close(tp);
-       else if (so->so_linger == 0)
-               tcp_drop(tp, 0);
+               tp = tcp_close(tp);
+       else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+               tp = tcp_drop(tp, 0);
        else {
                soisdisconnecting(so);
                sbflush(&so->so_rcv);
        else {
                soisdisconnecting(so);
                sbflush(&so->so_rcv);
-               tcp_usrclosed(tp);
-               (void) tcp_output(tp);
+               tp = tcp_usrclosed(tp);
+               if (tp)
+                       (void) tcp_output(tp);
        }
        }
+       return (tp);
 }
 
 /*
 }
 
 /*
@@ -337,17 +456,20 @@ tcp_disconnect(tp)
  * state.  In all other cases, have already sent FIN to peer (e.g.
  * after PRU_SHUTDOWN), and just have to play tedious game waiting
  * for peer to send FIN or not respond to keep-alives, etc.
  * state.  In all other cases, have already sent FIN to peer (e.g.
  * after PRU_SHUTDOWN), and just have to play tedious game waiting
  * for peer to send FIN or not respond to keep-alives, etc.
+ * We can let the user exit from the close as soon as the FIN is acked.
  */
  */
+struct tcpcb *
 tcp_usrclosed(tp)
 tcp_usrclosed(tp)
-       struct tcpcb *tp;
+       register struct tcpcb *tp;
 {
 
        switch (tp->t_state) {
 
 {
 
        switch (tp->t_state) {
 
+       case TCPS_CLOSED:
        case TCPS_LISTEN:
        case TCPS_SYN_SENT:
                tp->t_state = TCPS_CLOSED;
        case TCPS_LISTEN:
        case TCPS_SYN_SENT:
                tp->t_state = TCPS_CLOSED;
-               tcp_close(tp);
+               tp = tcp_close(tp);
                break;
 
        case TCPS_SYN_RECEIVED:
                break;
 
        case TCPS_SYN_RECEIVED:
@@ -359,4 +481,7 @@ tcp_usrclosed(tp)
                tp->t_state = TCPS_LAST_ACK;
                break;
        }
                tp->t_state = TCPS_LAST_ACK;
                break;
        }
+       if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
+               soisdisconnected(tp->t_inpcb->inp_socket);
+       return (tp);
 }
 }