getpeer
[unix-history] / usr / src / sys / netinet / tcp_usrreq.c
index d6871f7..51a7a44 100644 (file)
@@ -1,4 +1,4 @@
-/* tcp_usrreq.c 1.34 81/11/24 */
+/*     tcp_usrreq.c    1.81    83/07/25        */
 
 #include "../h/param.h"
 #include "../h/systm.h"
 
 #include "../h/param.h"
 #include "../h/systm.h"
 #include "../h/socket.h"
 #include "../h/socketvar.h"
 #include "../h/protosw.h"
 #include "../h/socket.h"
 #include "../h/socketvar.h"
 #include "../h/protosw.h"
-#include "../net/inet.h"
-#include "../net/inet_pcb.h"
-#include "../net/inet_systm.h"
+#include "../h/errno.h"
+
 #include "../net/if.h"
 #include "../net/if.h"
-#include "../net/imp.h"
-#include "../net/ip.h"
-#include "../net/ip_var.h"
-#include "../net/tcp.h"
-#include "../net/tcp_fsm.h"
-#include "../net/tcp_var.h"
-#include "/usr/include/errno.h"
+#include "../net/route.h"
+
+#include "../netinet/in.h"
+#include "../netinet/in_pcb.h"
+#include "../netinet/in_systm.h"
+#include "../netinet/ip.h"
+#include "../netinet/ip_var.h"
+#include "../netinet/tcp.h"
+#include "../netinet/tcp_fsm.h"
+#include "../netinet/tcp_seq.h"
+#include "../netinet/tcp_timer.h"
+#include "../netinet/tcp_var.h"
+#include "../netinet/tcpip.h"
+#include "../netinet/tcp_debug.h"
 
 
+/*
+ * TCP protocol interface to socket abstraction.
+ */
+extern char *tcpstates[];
 struct tcpcb *tcp_newtcpcb();
 struct tcpcb *tcp_newtcpcb();
+int    tcpsenderrors;
+
 /*
 /*
- * Process a TCP user request for tcp tb.  If this is a send request
+ * Process a TCP user request for TCP tb.  If this is a send request
  * then m is the mbuf chain of send data.  If this is a timer expiration
  * (called from the software clock routine), then timertype tells which timer.
  */
  * then m is the mbuf chain of send data.  If this is a timer expiration
  * (called from the software clock routine), then timertype tells which timer.
  */
-tcp_usrreq(so, req, m, addr)
+/*ARGSUSED*/
+tcp_usrreq(so, req, m, nam, rights)
        struct socket *so;
        int req;
        struct socket *so;
        int req;
-       struct mbuf *m;
-       caddr_t addr;
+       struct mbuf *m, *nam, *rights;
 {
        register struct inpcb *inp = sotoinpcb(so);
        register struct tcpcb *tp;
        int s = splnet();
 {
        register struct inpcb *inp = sotoinpcb(so);
        register struct tcpcb *tp;
        int s = splnet();
-       register int nstate;
        int error = 0;
        int error = 0;
-COUNT(TCP_USRREQ);
+       int ostate;
 
 
+       if (rights && rights->m_len) {
+               splx(s);
+               return (EINVAL);
+       }
        /*
        /*
-        * Make sure attached.  If not,
-        * only PRU_ATTACH is valid.
+        * When a TCP is attached to a socket, then there will be
+        * a (struct inpcb) pointed at by the socket, and this
+        * structure will point at a subsidary (struct tcpcb).
         */
         */
-       if (inp == 0) {
-               if (req != PRU_ATTACH) {
-                       splx(s);
-                       return (EINVAL);
-               }
-       } else {
+       if (inp == 0 && req != PRU_ATTACH) {
+               splx(s);
+               return (EINVAL);                /* XXX */
+       }
+       if (inp) {
                tp = intotcpcb(inp);
                tp = intotcpcb(inp);
-               nstate = tp->t_state;
+               /* WHAT IF TP IS 0? */
 #ifdef KPROF
 #ifdef KPROF
-               tcp_acounts[nstate][req]++;
+               tcp_acounts[tp->t_state][req]++;
 #endif
 #endif
-       }
-
+               ostate = tp->t_state;
+       } else
+               ostate = 0;
        switch (req) {
 
        switch (req) {
 
+       /*
+        * TCP attaches to socket via PRU_ATTACH, reserving space,
+        * and an internet control block.
+        */
        case PRU_ATTACH:
                if (inp) {
                        error = EISCONN;
                        break;
                }
        case PRU_ATTACH:
                if (inp) {
                        error = EISCONN;
                        break;
                }
-               error = in_pcballoc(so, &tcb, 2048, 2048, (struct sockaddr_in *)addr);
-               if (error) {
-                       (void) m_free(dtom(tp));
+               error = tcp_attach(so);
+               if (error)
                        break;
                        break;
-               }
-               inp = (struct inpcb *)so->so_pcb;
-               if (so->so_options & SO_ACCEPTCONN) {
-                       tp = tcp_newtcpcb(inp);
-                       if (tp == 0) {
-                               error = ENOBUFS;
-                               break;
-                       }
-                       nstate = LISTEN;
-               } else
-                       nstate = CLOSED;
+               if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+                       so->so_linger = TCP_LINGERTIME;
+               tp = sototcpcb(so);
                break;
 
                break;
 
+       /*
+        * PRU_DETACH detaches the TCP protocol from the socket.
+        * If the protocol state is non-embryonic, then can't
+        * do this directly: have to initiate a PRU_DISCONNECT,
+        * which may finish later; embryonic TCB's can just
+        * be discarded here.
+        */
        case PRU_DETACH:
        case PRU_DETACH:
+               if (tp->t_state > TCPS_LISTEN)
+                       tp = tcp_disconnect(tp);
+               else
+                       tp = tcp_close(tp);
                break;
 
                break;
 
+       /*
+        * Give the socket an address.
+        */
+       case PRU_BIND:
+               error = in_pcbbind(inp, nam);
+               if (error)
+                       break;
+               break;
+
+       /*
+        * Prepare to accept connections.
+        */
+       case PRU_LISTEN:
+               if (inp->inp_lport == 0)
+                       error = in_pcbbind(inp, (struct mbuf *)0);
+               if (error == 0)
+                       tp->t_state = TCPS_LISTEN;
+               break;
+
+       /*
+        * Initiate connection to peer.
+        * Create a template for use in transmissions on this connection.
+        * Enter SYN_SENT state, and mark socket as connecting.
+        * Start keep-alive timer, and seed output sequence space.
+        * Send initial segment on connection.
+        */
        case PRU_CONNECT:
        case PRU_CONNECT:
-               error = in_pcbsetpeer(inp, (struct sockaddr_in *)addr);
+               if (inp->inp_lport == 0) {
+                       error = in_pcbbind(inp, (struct mbuf *)0);
+                       if (error)
+                               break;
+               }
+               error = in_pcbconnect(inp, nam);
                if (error)
                        break;
                if (error)
                        break;
-               tp = tcp_newtcpcb(inp);
-               if (tp == 0) {
-                       inp->inp_faddr.s_addr = 0;
+               tp->t_template = tcp_template(tp);
+               if (tp->t_template == 0) {
+                       in_pcbdisconnect(inp);
                        error = ENOBUFS;
                        break;
                }
                        error = ENOBUFS;
                        break;
                }
-               tp->t_inpcb = inp;
-               inp->inp_ppcb = (caddr_t)tp;
-               (void) tcp_sndctl(tp);
-               nstate = SYN_SENT;
                soisconnecting(so);
                soisconnecting(so);
+               tp->t_state = TCPS_SYN_SENT;
+               tp->t_timer[TCPT_KEEP] = TCPTV_KEEP;
+               tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
+               tcp_sendseqinit(tp);
+               error = tcp_output(tp);
                break;
 
                break;
 
-       case PRU_ACCEPT:
-               soisconnected(so);
+       /*
+        * Create a TCP connection between two sockets.
+        */
+       case PRU_CONNECT2:
+               error = EOPNOTSUPP;
                break;
 
                break;
 
+       /*
+        * Initiate disconnect from peer.
+        * If connection never passed embryonic stage, just drop;
+        * else if don't need to let data drain, then can just drop anyways,
+        * else have to begin TCP shutdown process: mark socket disconnecting,
+        * drain unread data, state switch to reflect user close, and
+        * send segment (e.g. FIN) to peer.  Socket will be really disconnected
+        * when peer sends FIN and acks ours.
+        *
+        * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
+        */
        case PRU_DISCONNECT:
        case PRU_DISCONNECT:
-               if (nstate < ESTAB)
-                       tcp_disconnect(tp);
-               else {
-                       tp->tc_flags |= TC_SND_FIN;
-                       (void) tcp_sndctl(tp);
-                       soisdisconnecting(so);
-               }
+               tp = tcp_disconnect(tp);
                break;
 
                break;
 
-       case PRU_SHUTDOWN:
-               switch (nstate) {
-
-               case TCPS_LISTEN:
-               case TCPS_SYN_SENT:
-                       nstate = TCPS_CLOSED;
-                       break;
-
-               case TCPS_SYN_RCVD:
-               case TCPS_ESTABLISHED:
-               case TCPS_CLOSE_WAIT:
-                       tp->tc_flags |= TC_SND_FIN;
-                       (void) tcp_sndctl(tp);
-                       nstate = nstate != CLOSE_WAIT ? FIN_W1 : LAST_ACK;
-                       break;
-                       
-               case TCPS_FIN_W1:
-               case TCPS_FIN_W2:
-               case TCPS_TIME_WAIT:
-               case TCPS_CLOSING:
-               case TCPS_LAST_ACK:
-               case TCPS_RCV_WAIT:
-                       break;
+       /*
+        * Accept a connection.  Essentially all the work is
+        * done at higher levels; just return the address
+        * of the peer, storing through addr.
+        */
+       case PRU_ACCEPT: {
+               struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
 
 
-               default:
-                       goto bad;
+               nam->m_len = sizeof (struct sockaddr_in);
+               sin->sin_family = AF_INET;
+               sin->sin_port = inp->inp_fport;
+               sin->sin_addr = inp->inp_faddr;
+               break;
                }
                }
+
+       /*
+        * Mark the connection as being incapable of further output.
+        */
+       case PRU_SHUTDOWN:
+               socantsendmore(so);
+               tp = tcp_usrclosed(tp);
+               if (tp)
+                       error = tcp_output(tp);
                break;
 
                break;
 
+       /*
+        * After a receive, possibly send window update to peer.
+        */
        case PRU_RCVD:
        case PRU_RCVD:
-               if (nstate < TCPS_ESTAB)
-                       goto bad;
-               tcp_sndwin(tp);
-               if (nstate == TCPS_RCV_WAIT && rcv_empty(tp))
-                       nstate = TCPS_CLOSED;
+               (void) tcp_output(tp);
                break;
 
                break;
 
+       /*
+        * Do a send by putting data in output queue and updating urgent
+        * marker if URG set.  Possibly send more data.
+        */
        case PRU_SEND:
        case PRU_SEND:
-               switch (nstate) {
-
-               case ESTAB:
-               case CLOSE_WAIT:
-                       tcp_usrsend(tp, m);
-                       break;
-               
-               default:
-                       if (nstate < ESTAB)
-                               goto bad;
-                       m_freem(m);
-                       error = ENOTCONN;
-                       break;
+               sbappend(&so->so_snd, m);
+#ifdef notdef
+               if (tp->t_flags & TF_PUSH)
+                       tp->snd_end = tp->snd_una + so->so_snd.sb_cc;
+#endif
+               error = tcp_output(tp);
+               if (error) {            /* XXX fix to use other path */
+                       if (error == ENOBUFS)           /* XXX */
+                               error = 0;              /* XXX */
+                       tcpsenderrors++;
                }
                break;
 
                }
                break;
 
+       /*
+        * Abort the TCP.
+        */
        case PRU_ABORT:
        case PRU_ABORT:
-               tcp_abort(tp);
-               nstate = CLOSED;
+               tp = tcp_drop(tp, ECONNABORTED);
                break;
 
                break;
 
+/* SOME AS YET UNIMPLEMENTED HOOKS */
        case PRU_CONTROL:
                error = EOPNOTSUPP;
                break;
 
        case PRU_CONTROL:
                error = EOPNOTSUPP;
                break;
 
-       case PRU_SLOWTIMO:
-               switch (nstate) {
+       case PRU_SENSE:
+               error = EOPNOTSUPP;
+               break;
+/* END UNIMPLEMENTED HOOKS */
 
 
-               case 0:
-               case CLOSED:
-               case LISTEN:
-                       goto bad;
+       case PRU_RCVOOB:
+               if (so->so_oobmark == 0 &&
+                   (so->so_state & SS_RCVATMARK) == 0) {
+                       error = EINVAL;
+                       break;
+               }
+               if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
+                       error = EWOULDBLOCK;
+                       break;
+               }
+               m->m_len = 1;
+               *mtod(m, caddr_t) = tp->t_iobc;
+               break;
 
 
-               default:
-                       nstate = tcp_timers(tp, (int)addr);
+       case PRU_SENDOOB:
+               if (sbspace(&so->so_snd) < -512) {
+                       m_freem(m);
+                       error = ENOBUFS;
+                       break;
                }
                }
+               tp->snd_up = tp->snd_una + so->so_snd.sb_cc + 1;
+               sbappend(&so->so_snd, m);
+               tp->t_force = 1;
+               error = tcp_output(tp);
+               tp->t_force = 0;
                break;
 
                break;
 
-       default:
-               panic("tcp_usrreq");
-       bad:
-               printf("tcp: bad state: tcb=%x state=%d input=%d\n",
-                   tp, tp->t_state, req);
-               nstate = EFAILEC;
+       case PRU_SOCKADDR:
+               in_setsockaddr(inp, nam);
                break;
                break;
-       }
-       switch (nstate) {
 
 
-       case CLOSED:
-       case SAME:
+       case PRU_PEERADDR:
+               in_setpeeraddr(inp, nam);
                break;
 
                break;
 
-       case EFAILEC:
-               if (m)
-                       m_freem(dtom(m));
+       /*
+        * TCP slow timer went off; going through this
+        * routine for tracing's sake.
+        */
+       case PRU_SLOWTIMO:
+               tp = tcp_timers(tp, (int)nam);
+               req |= (int)nam << 8;           /* for debug's sake */
                break;
 
        default:
                break;
 
        default:
-               tp->t_state = nstate;
-               break;
+               panic("tcp_usrreq");
        }
        }
+       if (tp && (so->so_options & SO_DEBUG))
+               tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
        splx(s);
        return (error);
 }
 
        splx(s);
        return (error);
 }
 
-struct tcpcb *
-tcp_newtcpcb(inp)
-       struct inpcb *inp;
+int    tcp_sendspace = 1024*2;
+int    tcp_recvspace = 1024*2;
+/*
+ * Attach TCP protocol to socket, allocating
+ * internet protocol control block, tcp control block,
+ * bufer space, and entering LISTEN state if to accept connections.
+ */
+tcp_attach(so)
+       struct socket *so;
 {
 {
-       struct mbuf *m = m_getclr(0);
-       register struct tcpcb *tp;
-COUNT(TCP_NEWTCPCB);
-
-       if (m == 0)
-               return (0);
-       tp = mtod(m, struct tcpcb *);
-
-       /*
-        * Make empty reassembly queue.
-        */
-       tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
-
-       /*
-        * Initialize sequence numbers and round trip retransmit timer.
-        */
-       tp->t_xmtime = T_REXMT;
-       tp->snd_end = tp->seq_fin = tp->snd_nxt = tp->snd_hi = tp->snd_una =
-           tp->iss = tcp_iss;
-       tp->snd_off = tp->iss + 1;
-       tcp_iss += (ISSINCR >> 1) + 1;
-
-       /*
-        * Hook to inpcb.
-        */
-       tp->t_inpcb = inp;
-       inp->inp_ppcb = (caddr_t)tp;
-       return (tp);
-}
-
-tcp_disconnect(tp)
        register struct tcpcb *tp;
        register struct tcpcb *tp;
-{
-       register struct tcpiphdr *t;
-
-COUNT(TCP_DISCONNECT);
-       tcp_tcancel(tp);
-       t = tp->seg_next;
-       for (; t != (struct tcpiphdr *)tp; t = (struct tcpiphdr *)t->ti_next)
-               m_freem(dtom(t));
-       tcp_drainunack(tp);
-       if (tp->t_template) {
-               (void) m_free(dtom(tp->t_template));
-               tp->t_template = 0;
+       struct inpcb *inp;
+       int error;
+
+       error = soreserve(so, tcp_sendspace, tcp_recvspace);
+       if (error)
+               goto bad;
+       error = in_pcballoc(so, &tcb);
+       if (error)
+               goto bad;
+       inp = sotoinpcb(so);
+       tp = tcp_newtcpcb(inp);
+       if (tp == 0) {
+               error = ENOBUFS;
+               goto bad2;
        }
        }
-       in_pcbfree(tp->t_inpcb);
-       (void) m_free(dtom(tp));
+       tp->t_state = TCPS_CLOSED;
+       return (0);
+bad2:
+       in_pcbdetach(inp);
+bad:
+       return (error);
 }
 
 }
 
-tcp_abort(tp)
+/*
+ * Initiate (or continue) disconnect.
+ * If embryonic state, just send reset (once).
+ * If in ``let data drain'' option and linger null, just drop.
+ * Otherwise (hard), mark socket disconnecting and drop
+ * current input data; switch states based on user close, and
+ * send segment to peer (with FIN).
+ */
+struct tcpcb *
+tcp_disconnect(tp)
        register struct tcpcb *tp;
 {
        register struct tcpcb *tp;
 {
+       struct socket *so = tp->t_inpcb->inp_socket;
 
 
-COUNT(TCP_ABORT);
-       switch (tp->t_state) {
-
-       case SYN_RCVD:
-       case ESTAB:
-       case FIN_W1:
-       case FIN_W2:
-       case CLOSE_WAIT:
-               tp->tc_flags |= TC_SND_RST;
-               tcp_sndnull(tp);
+       if (tp->t_state < TCPS_ESTABLISHED)
+               tp = tcp_close(tp);
+       else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+               tp = tcp_drop(tp, 0);
+       else {
+               soisdisconnecting(so);
+               sbflush(&so->so_rcv);
+               tp = tcp_usrclosed(tp);
+               if (tp)
+                       (void) tcp_output(tp);
        }
        }
-       soisdisconnected(tp->t_inpcb->inp_socket);
-       tcp_disconnect(tp);
+       return (tp);
 }
 
 /*
 }
 
 /*
- * Send data queue headed by m0 into the protocol.
+ * User issued close, and wish to trail through shutdown states:
+ * if never received SYN, just forget it.  If got a SYN from peer,
+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
+ * If already got a FIN from peer, then almost done; go to LAST_ACK
+ * state.  In all other cases, have already sent FIN to peer (e.g.
+ * after PRU_SHUTDOWN), and just have to play tedious game waiting
+ * for peer to send FIN or not respond to keep-alives, etc.
+ * We can let the user exit from the close as soon as the FIN is acked.
  */
  */
-tcp_usrsend(tp, m0)
+struct tcpcb *
+tcp_usrclosed(tp)
        register struct tcpcb *tp;
        register struct tcpcb *tp;
-       struct mbuf *m0;
-{
-       register struct socket *so = tp->t_inpcb->inp_socket;
-COUNT(TCP_USRSEND);
-
-       sbappend(&so->so_snd, m0);
-       if (tp->t_options & TO_EOL)
-               tp->snd_end = tp->snd_off + so->so_snd.sb_cc;
-       if (tp->t_options & TO_URG) {
-               tp->snd_urp = tp->snd_off + so->so_snd.sb_cc + 1;
-               tp->tc_flags |= TC_SND_URG;
-       }
-       (void) tcp_send(tp);
-}
-
-/*ARGSUSED*/
-tcp_sense(m)
-       struct mbuf *m;
-{
-
-COUNT(TCP_SENSE);
-       return (EOPNOTSUPP);
-}
-
-tcp_drop(tp, errno)
-       struct tcpcb *tp;
-       int errno;
 {
 {
-       struct socket *so = tp->t_inpcb->inp_socket;
 
 
-COUNT(TCP_ERROR);
-       so->so_error = errno;
-       sorwakeup(so);
-       sowwakeup(so);
-       tcp_disconnect(tp);
-}
-
-tcp_drain()
-{
-       register struct inpcb *ip;
-
-COUNT(TCP_DRAIN);
-       for (ip = tcb.inp_next; ip != &tcb; ip = ip->inp_next)
-               tcp_drainunack(intotcpcb(ip));
-}
+       switch (tp->t_state) {
 
 
-tcp_drainunack(tp)
-       register struct tcpcb *tp;
-{
-       register struct mbuf *m;
+       case TCPS_CLOSED:
+       case TCPS_LISTEN:
+       case TCPS_SYN_SENT:
+               tp->t_state = TCPS_CLOSED;
+               tp = tcp_close(tp);
+               break;
 
 
-COUNT(TCP_DRAINUNACK);
-       for (m = tp->seg_unack; m; m = m->m_act)
-               m_freem(m);
-       tp->seg_unack = 0;
-}
-       
-tcp_ctlinput(m)
-       struct mbuf *m;
-{
+       case TCPS_SYN_RECEIVED:
+       case TCPS_ESTABLISHED:
+               tp->t_state = TCPS_FIN_WAIT_1;
+               break;
 
 
-COUNT(TCP_CTLINPUT);
-       m_freem(m);
+       case TCPS_CLOSE_WAIT:
+               tp->t_state = TCPS_LAST_ACK;
+               break;
+       }
+       if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
+               soisdisconnected(tp->t_inpcb->inp_socket);
+       return (tp);
 }
 }