X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/e006f4251b337cb7b1d23e8987eb2dbb22708a7a..b01302422930364f2446dfe7a5d287ab3d0ef869:/usr/src/sys/netinet/tcp_usrreq.c diff --git a/usr/src/sys/netinet/tcp_usrreq.c b/usr/src/sys/netinet/tcp_usrreq.c index 61ce30d427..b0ab7eabac 100644 --- a/usr/src/sys/netinet/tcp_usrreq.c +++ b/usr/src/sys/netinet/tcp_usrreq.c @@ -1,473 +1,368 @@ -/* tcp_usrreq.c 1.15 81/10/30 */ +/* tcp_usrreq.c 1.57 82/04/30 */ #include "../h/param.h" #include "../h/systm.h" #include "../h/mbuf.h" #include "../h/socket.h" -#include "../inet/inet.h" -#include "../inet/inet_systm.h" -#include "../inet/imp.h" -#include "../inet/ip.h" -#include "../inet/tcp.h" -#define TCPFSTAB -#ifdef TCPDEBUG -#define TCPSTATES -#endif -#include "../inet/tcp_fsm.h" - -tcp_timeo() +#include "../h/socketvar.h" +#include "../h/protosw.h" +#include "../net/in.h" +#include "../net/route.h" +#include "../net/in_pcb.h" +#include "../net/in_systm.h" +#include "../net/if.h" +#include "../net/ip.h" +#include "../net/ip_var.h" +#include "../net/tcp.h" +#include "../net/tcp_fsm.h" +#include "../net/tcp_seq.h" +#include "../net/tcp_timer.h" +#include "../net/tcp_var.h" +#include "../net/tcpip.h" +#include "../net/tcp_debug.h" +#include + +/* + * TCP protocol interface to socket abstraction. + */ +extern char *tcpstates[]; +struct tcpcb *tcp_newtcpcb(); + +/* + * Process a TCP user request for TCP tb. If this is a send request + * then m is the mbuf chain of send data. If this is a timer expiration + * (called from the software clock routine), then timertype tells which timer. + */ +tcp_usrreq(so, req, m, addr) + struct socket *so; + int req; + struct mbuf *m; + caddr_t addr; { - register struct tcb *tp; + register struct inpcb *inp = sotoinpcb(so); + register struct tcpcb *tp; int s = splnet(); -COUNT(TCP_TIMEO); + int error = 0; + int ostate; +COUNT(TCP_USRREQ); /* - * Search through tcb's and update active timers. + * When a TCP is attached to a socket, then there will be + * a (struct inpcb) pointed at by the socket, and this + * structure will point at a subsidary (struct tcpcb). + * The normal sequence of events is: + * PRU_ATTACH creating these structures + * PRU_CONNECT connecting to a remote peer + * (PRU_SEND|PRU_RCVD)* exchanging data + * PRU_DISCONNECT disconnecting from remote peer + * PRU_DETACH deleting the structures + * With the operations from PRU_CONNECT through PRU_DISCONNECT + * possible repeated several times. + * + * MULTIPLE CONNECTS ARE NOT YET IMPLEMENTED. */ - for (tp = tcb_head; tp != NULL; tp = tp->t_tcb_next) { - if (tp->t_init != 0 && --tp->t_init == 0) - tcp_usrreq(ISTIMER, TINIT, tp, 0); - if (tp->t_rexmt != 0 && --tp->t_rexmt == 0) - tcp_usrreq(ISTIMER, TREXMT, tp, 0); - if (tp->t_rexmttl != 0 && --tp->t_rexmttl == 0) - tcp_usrreq(ISTIMER, TREXMTTL, tp, 0); - if (tp->t_persist != 0 && --tp->t_persist == 0) - tcp_usrreq(ISTIMER, TPERSIST, tp, 0); - if (tp->t_finack != 0 && --tp->t_finack == 0) - tcp_usrreq(ISTIMER, TFINACK, tp, 0); - tp->t_xmt++; + if (inp == 0 && req != PRU_ATTACH) { + splx(s); + return (EINVAL); /* XXX */ } - tcp_iss += ISSINCR; /* increment iss */ - timeout(tcp_timeo, 0, hz); /* reschedule every second */ - splx(s); -} - -tcp_usrreq(input, timertype, tp, mp) - int input, timertype; - register struct tcb *tp; - struct mbuf *mp; -{ - int s = splnet(); - register int nstate; -#ifdef TCPDEBUG - struct tcp_debug tdb; + if (inp) { + tp = intotcpcb(inp); +#ifdef KPROF + tcp_acounts[tp->t_state][req]++; #endif -COUNT(TCP_USRREQ); - - nstate = tp->t_state; - tp->tc_flags &= ~TC_NET_KEEP; - acounts[nstate][input]++; -#ifdef TCPDEBUG - if ((tp->t_ucb->uc_flags & UDEBUG) || tcpconsdebug) { - tdb_setup(tp, (struct th *)0, input, &tdb); - tdb.td_tim = timertype; - } else - tdb.td_tod = 0; -#endif - switch (tcp_fstab[nstate][input]) { - - default: - printf("tcp: bad state: tcb=%x state=%d input=%d\n", - tp, tp->t_state, input); - nstate = EFAILEC; - break; - - case LIS_CLS: /* 1 */ - t_open(tp, PASSIVE); - nstate = LISTEN; - break; + ostate = tp->t_state; + } + switch (req) { - case SYS_CLS: /* 2 */ - t_open(tp, ACTIVE); - send_ctl(tp); - nstate = SYN_SENT; + /* + * TCP attaches to socket via PRU_ATTACH, reserving space, + * and internet and TCP control blocks. + * If the socket is to receive connections, + * then the LISTEN state is entered. + */ + case PRU_ATTACH: + if (inp) { + error = EISCONN; + break; + } + error = tcp_attach(so, (struct sockaddr *)addr); + if (error) + break; + if ((so->so_options & SO_DONTLINGER) == 0) + so->so_linger = TCP_LINGERTIME; + tp = sototcpcb(so); break; - case CLS_OPN: /* 10 */ - t_close(tp, UCLOSED); - nstate = CLOSED; + /* + * PRU_DETACH detaches the TCP protocol from the socket. + * If the protocol state is non-embryonic, then can't + * do this directly: have to initiate a PRU_DISCONNECT, + * which may finish later; embryonic TCB's can just + * be discarded here. + */ + case PRU_DETACH: + if (tp->t_state > TCPS_LISTEN) + tcp_disconnect(tp); + else { + tcp_close(tp); + tp = 0; + } break; - case CL2_CLW: /* 10 */ - tp->tc_flags |= TC_SND_FIN; - send_ctl(tp); - tp->tc_flags |= TC_USR_CLOSED; - nstate = CLOSING2; + /* + * Initiate connection to peer. + * Create a template for use in transmissions on this connection. + * Enter SYN_SENT state, and mark socket as connecting. + * Start keep-alive timer, and seed output sequence space. + * Send initial segment on connection. + */ + case PRU_CONNECT: + error = in_pcbconnect(inp, (struct sockaddr_in *)addr); + if (error) + break; + tp->t_template = tcp_template(tp); + if (tp->t_template == 0) { + in_pcbdisconnect(inp); + error = ENOBUFS; + break; + } + soisconnecting(so); + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP; + tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + error = tcp_output(tp); break; - case TIMERS: /* 14,17,34,35,36,37,38 */ - nstate = tcp_timers(tp, timertype); + /* + * Initiate disconnect from peer. + * If connection never passed embryonic stage, just drop; + * else if don't need to let data drain, then can just drop anyways, + * else have to begin TCP shutdown process: mark socket disconnecting, + * drain unread data, state switch to reflect user close, and + * send segment (e.g. FIN) to peer. Socket will be really disconnected + * when peer sends FIN and acks ours. + * + * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. + */ + case PRU_DISCONNECT: + tcp_disconnect(tp); break; - case CLS_RWT: /* 20 */ - present_data(tp); - if (rcv_empty(tp)) { - t_close(tp, UCLOSED); - nstate = CLOSED; - } else - nstate = RCV_WAIT; + /* + * Accept a connection. Essentially all the work is + * done at higher levels; just return the address + * of the peer, storing through addr. + */ + case PRU_ACCEPT: { + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + + if (sin) { + bzero((caddr_t)sin, sizeof (*sin)); + sin->sin_family = AF_INET; + sin->sin_port = inp->inp_fport; + sin->sin_addr = inp->inp_faddr; + } + } break; - case FW1_SYR: /* 24,25 */ - tp->tc_flags |= TC_SND_FIN; - send_ctl(tp); - tp->tc_flags |= TC_USR_CLOSED; - nstate = FIN_W1; + /* + * Mark the connection as being incapable of further output. + */ + case PRU_SHUTDOWN: + socantsendmore(so); + tcp_usrclosed(tp); + error = tcp_output(tp); break; - case SSS_SND: /* 40,41 */ - nstate = sss_send(tp, mp); + /* + * After a receive, possibly send window update to peer. + */ + case PRU_RCVD: + (void) tcp_output(tp); break; - case SSS_RCV: /* 42 */ - send_ctl(tp); /* send new window */ - present_data(tp); + /* + * Do a send by putting data in output queue and updating urgent + * marker if URG set. Possibly send more data. + */ + case PRU_SEND: + sbappend(&so->so_snd, m); +#ifdef notdef + if (tp->t_flags & TF_PUSH) + tp->snd_end = tp->snd_una + so->so_snd.sb_cc; +#endif + error = tcp_output(tp); break; - case CLS_NSY: /* 44 */ - t_close(tp, UABORT); - nstate = CLOSED; + /* + * Abort the TCP. + */ + case PRU_ABORT: + tcp_drop(tp, ECONNABORTED); break; - case CLS_SYN: /* 45 */ - tp->tc_flags |= TC_SND_RST; - send_null(tp); - t_close(tp, UABORT); - nstate = CLOSED; +/* SOME AS YET UNIMPLEMENTED HOOKS */ + case PRU_CONTROL: + error = EOPNOTSUPP; break; - case CLS_ACT: /* 47 */ - t_close(tp, UNETDWN); - nstate = CLOSED; + case PRU_SENSE: + error = EOPNOTSUPP; break; +/* END UNIMPLEMENTED HOOKS */ - case NOP: + case PRU_RCVOOB: + if (so->so_oobmark == 0 && + (so->so_state & SS_RCVATMARK) == 0) { + error = EINVAL; + break; + } + if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { + error = EWOULDBLOCK; + break; + } + *mtod(m, caddr_t) = tp->t_iobc; break; - case CLS_ERR: - to_user(tp->t_ucb, UCLSERR); - break; - } -#ifdef TCPDEBUG - if (tdb.td_tod) - tdb_stuff(&tdb, nstate); + case PRU_SENDOOB: +#ifdef TCPTRUEOOB + if (tp->t_flags & TF_DOOOB) { + tp->t_oobseq++; + tp->t_oobc = *mtod(m, caddr_t); + tp->t_oobmark = tp->snd_una + so->so_snd.sb_cc; +printf("sendoob seq now %x oobc %x\n", tp->t_oobseq, tp->t_oobc); + tp->t_oobflags |= TCPOOB_NEEDACK; + /* what to do ...? */ + if (error = tcp_output(tp)) + break; + } #endif - /* YECH */ - switch (nstate) { + if (sbspace(&so->so_snd) < -512) { + error = ENOBUFS; + break; + } + tp->snd_up = tp->snd_una + so->so_snd.sb_cc + 1; + sbappend(&so->so_snd, m); +#ifdef notdef + if (tp->t_flags & TF_PUSH) + tp->snd_end = tp->snd_una + so->so_snd.sb_cc; +#endif + tp->t_force = 1; + error = tcp_output(tp); + tp->t_force = 0; + break; - case CLOSED: - case SAME: + case PRU_SOCKADDR: + in_setsockaddr((struct sockaddr_in *)addr, inp); break; - case EFAILEC: - if (mp) - m_freem(dtom(mp)); + /* + * TCP slow timer went off; going through this + * routine for tracing's sake. + */ + case PRU_SLOWTIMO: + tcp_timers(tp, (int)addr); + req |= (int)addr << 8; /* for debug's sake */ break; default: - tp->t_state = nstate; - break; + panic("tcp_usrreq"); } + if (tp && (so->so_options & SO_DEBUG)) + tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req); splx(s); + return (error); } -t_open(tp, mode) /* set up a tcb for a connection */ - register struct tcb *tp; - int mode; -{ - register struct ucb *up; -COUNT(T_OPEN); - - /* enqueue the tcb */ - - if (tcb_head == NULL) { - tcb_head = tp; - tcb_tail = tp; - } else { - tp->t_tcb_next = tcb_head; - tcb_head->t_tcb_prev = tp; - tcb_head = tp; - } - - /* initialize non-zero tcb fields */ - - tp->t_rcv_next = (struct th *)tp; - tp->t_rcv_prev = (struct th *)tp; - tp->t_xmtime = T_REXMT; - tp->snd_end = tp->seq_fin = tp->snd_nxt = tp->snd_hi = - tp->snd_una = tp->iss = tcp_iss; - tp->snd_off = tp->iss + 1; - tcp_iss += (ISSINCR >> 1) + 1; - - /* set timeout for open */ - - up = tp->t_ucb; - tp->t_init = (up->uc_timeo != 0 ? up->uc_timeo : - (mode == ACTIVE ? T_INIT : 0)); - up->uc_timeo = 0; /* overlays uc_ssize */ -} - -t_close(tp, state) - register struct tcb *tp; - short state; +int tcp_sendspace = 1024*2; +int tcp_recvspace = 1024*2; +/* + * Attach TCP protocol to socket, allocating + * internet protocol control block, tcp control block, + * bufer space, and entering LISTEN state if to accept connections. + */ +tcp_attach(so, sa) + struct socket *so; + struct sockaddr *sa; { - register struct ucb *up; - register struct th *t; - register struct mbuf *m; -COUNT(T_CLOSE); - - up = tp->t_ucb; - - tp->t_init = tp->t_rexmt = tp->t_rexmttl = tp->t_persist = - tp->t_finack = 0; - - /* delete tcb */ - - if (tp->t_tcb_prev == NULL) - tcb_head = tp->t_tcb_next; - else - tp->t_tcb_prev->t_tcb_next = tp->t_tcb_next; - if (tp->t_tcb_next == NULL) - tcb_tail = tp->t_tcb_prev; - else - tp->t_tcb_next->t_tcb_prev = tp->t_tcb_prev; - - /* free all data on receive and send buffers */ - - for (t = tp->t_rcv_next; t != (struct th *)tp; t = t->t_next) - m_freem(dtom(t)); - - if (up->uc_rbuf != NULL) { - m_freem(up->uc_rbuf); - up->uc_rbuf = NULL; - } - up->uc_rcc = 0; - if (up->uc_sbuf != NULL) { - m_freem(up->uc_sbuf); - up->uc_sbuf = NULL; - } - up->uc_ssize = 0; - for (m = tp->t_rcv_unack; m != NULL; m = m->m_act) { - m_freem(m); - tp->t_rcv_unack = NULL; - } - if (up->uc_template) { - m_free(dtom(up->uc_template)); - up->uc_template = 0; - } - wmemfree((caddr_t)tp, 1024); - up->uc_tcb = NULL; - - /* lower buffer allocation and decrement host entry */ - - mbstat.m_lowat -= up->uc_snd + (up->uc_rhiwat/MSIZE) + 2; - mbstat.m_hiwat = 2 * mbstat.m_lowat; - if (up->uc_host != NULL) { - h_free(up->uc_host); - up->uc_host = NULL; - } - - /* if user has initiated close (via close call), delete ucb - entry, otherwise just wakeup so user can issue close call */ - - if (tp->tc_flags&TC_USR_ABORT) - up->uc_proc = NULL; - else - to_user(up, state); + register struct tcpcb *tp; + struct inpcb *inp; + int error; + + error = in_pcbattach(so, &tcb, + tcp_sendspace, tcp_recvspace, (struct sockaddr_in *)sa); + if (error) + return (error); + inp = (struct inpcb *)so->so_pcb; + tp = tcp_newtcpcb(inp); + if (so->so_options & SO_ACCEPTCONN) { + if (tp == 0) { + in_pcbdetach(inp); + return (ENOBUFS); + } + tp->t_state = TCPS_LISTEN; + } else + tp->t_state = TCPS_CLOSED; + return (0); } -sss_send(tp, m0) - register struct tcb *tp; - struct mbuf *m0; +/* + * Initiate (or continue) disconnect. + * If embryonic state, just send reset (once). + * If not in ``let data drain'' option, just drop. + * Otherwise (hard), mark socket disconnecting and drop + * current input data; switch states based on user close, and + * send segment to peer (with FIN). + */ +tcp_disconnect(tp) + struct tcpcb *tp; { - register struct mbuf *m, *n; - register struct ucb *up = tp->t_ucb; - register off; - seq_t last; -COUNT(SSS_SEND); + struct socket *so = tp->t_inpcb->inp_socket; - last = tp->snd_off; - for (m = n = m0; m != NULL; m = m->m_next) { - up->uc_ssize++; - if (m->m_off > MMAXOFF) - up->uc_ssize += NMBPG; - last += m->m_len; - } - if ((m = up->uc_sbuf) == NULL) - up->uc_sbuf = n; + if (tp->t_state < TCPS_ESTABLISHED) + tcp_close(tp); + else if (so->so_linger == 0) + tcp_drop(tp, 0); else { - while (m->m_next != NULL) { - m = m->m_next; - last += m->m_len; - } - if (m->m_off <= MMAXOFF) { - last += m->m_len; - off = m->m_off + m->m_len; - while (n && n->m_off <= MMAXOFF && - (MMAXOFF - off) >= n->m_len) { - bcopy((caddr_t)((int)n + n->m_off), - (caddr_t)((int)m + off), n->m_len); - m->m_len += n->m_len; - off += n->m_len; - up->uc_ssize--; - n = m_free(n); - } - } - m->m_next = n; + soisdisconnecting(so); + sbflush(&so->so_rcv); + tcp_usrclosed(tp); + (void) tcp_output(tp); } - if (up->uc_flags & UEOL) - tp->snd_end = last; - if (up->uc_flags & UURG) { - tp->snd_urp = last+1; - tp->tc_flags |= TC_SND_URG; - } - send(tp); - return (SAME); } -tcp_timers(tp, timertype) - register struct tcb *tp; - int timertype; +/* + * User issued close, and wish to trail through shutdown states: + * if never received SYN, just forget it. If got a SYN from peer, + * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. + * If already got a FIN from peer, then almost done; go to LAST_ACK + * state. In all other cases, have already sent FIN to peer (e.g. + * after PRU_SHUTDOWN), and just have to play tedious game waiting + * for peer to send FIN or not respond to keep-alives, etc. + * We can let the user exit from the close as soon as the FIN is acked. + */ +tcp_usrclosed(tp) + struct tcpcb *tp; { -COUNT(TCP_TIMERS); - switch (timertype) { - - case TINIT: /* initialization timer */ - if ((tp->tc_flags&TC_SYN_ACKED) == 0) { /* 35 */ - t_close(tp, UINTIMO); - return (CLOSED); - } - return (SAME); - - case TFINACK: /* fin-ack timer */ - switch (tp->t_state) { - - case TIME_WAIT: - /* - * We can be sure our ACK of foreign FIN was rcvd, - * and can close if no data left for user. - */ - if (rcv_empty(tp)) { - t_close(tp, UCLOSED); /* 14 */ - return (CLOSED); - } - return (RCV_WAIT); /* 17 */ - - case CLOSING1: - tp->tc_flags |= TC_WAITED_2_ML; - return (SAME); - - default: - return (SAME); - } - - case TREXMT: /* retransmission timer */ - if (tp->t_rexmt_val > tp->snd_una) { /* 34 */ - /* - * Set up for a retransmission, increase rexmt time - * in case of multiple retransmissions. - */ - tp->snd_nxt = tp->snd_una; - tp->tc_flags |= TC_REXMT; - tp->t_xmtime = tp->t_xmtime << 1; - if (tp->t_xmtime > T_REMAX) - tp->t_xmtime = T_REMAX; - send(tp); - } - return (SAME); + switch (tp->t_state) { - case TREXMTTL: /* retransmit too long */ - if (tp->t_rtl_val > tp->snd_una) /* 36 */ - to_user(tp->t_ucb, URXTIMO); - /* - * If user has already closed, abort the connection. - */ - if (tp->tc_flags & TC_USR_CLOSED) { - t_close(tp, URXTIMO); - return (CLOSED); - } - return (SAME); - - case TPERSIST: /* persist timer */ - /* - * Force a byte send through closed window. - */ - tp->tc_flags |= TC_FORCE_ONE; - send(tp); - return (SAME); - } - panic("tcp_timers"); -} - -/* THIS ROUTINE IS A CROCK */ -to_user(up, state) - register struct ucb *up; - register short state; -{ -COUNT(TO_USER); - - up->uc_state |= state; - netwakeup(up); - if (state == UURGENT) - psignal(up->uc_proc, SIGURG); -} + case TCPS_LISTEN: + case TCPS_SYN_SENT: + tp->t_state = TCPS_CLOSED; + tcp_close(tp); + break; -#ifdef TCPDEBUG -tcp_prt(tdp) - register struct tcp_debug *tdp; -{ -COUNT(TCP_PRT); + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + tp->t_state = TCPS_FIN_WAIT_1; + break; - printf("TCP(%x) %s x %s", - tdp->td_tcb, tcpstates[tdp->td_old], tcpinputs[tdp->td_inp]); - if (tdp->td_inp == ISTIMER) - printf("(%s)", tcptimers[tdp->td_tim]); - printf(" --> %s", - tcpstates[(tdp->td_new > 0) ? tdp->td_new : tdp->td_old]); - /* GROSS... DEPENDS ON SIGN EXTENSION OF CHARACTERS */ - if (tdp->td_new < 0) - printf(" (FAILED)"); - if (tdp->td_sno) { - printf(" sno %x ano %x win %d len %d flags %x", - tdp->td_sno, tdp->td_ano, tdp->td_wno, tdp->td_lno, tdp->td_flg); + case TCPS_CLOSE_WAIT: + tp->t_state = TCPS_LAST_ACK; + break; } - printf("\n"); -} -#endif -#ifdef TCPDEBUG -tdb_setup(tp, n, input, tdp) - struct tcb *tp; - register struct th *n; - int input; - register struct tcp_debug *tdp; -{ - - tdp->td_tod = time; - tdp->td_tcb = tp; - tdp->td_old = tp->t_state; - tdp->td_inp = input; - tdp->td_tim = 0; - tdp->td_new = -1; - if (n) { - tdp->td_sno = n->t_seq; - tdp->td_ano = n->t_ackno; - tdp->td_wno = n->t_win; - tdp->td_lno = n->t_len; - tdp->td_flg = n->th_flags; - } else - tdp->td_sno = tdp->td_ano = tdp->td_wno = tdp->td_lno = - tdp->td_flg = 0; + if (tp->t_state >= TCPS_FIN_WAIT_2) + soisdisconnected(tp->t_inpcb->inp_socket); } - -tdb_stuff(tdp, nstate) - struct tcp_debug *tdp; - int nstate; -{ - - tdp->td_new = nstate; - tcp_debug[tdbx++ % TDBSIZE] = *tdp; - if (tcpconsdebug & 2) - tcp_prt(tdp); -} -#endif