X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/ee954ff170ffcebec3b59cf47a38de446bdaa376..224f3a720c8acd42674bb9f2a4864989c7ae75f0:/usr/src/sys/netinet/tcp_input.c diff --git a/usr/src/sys/netinet/tcp_input.c b/usr/src/sys/netinet/tcp_input.c index e13bcd3233..2e010262e3 100644 --- a/usr/src/sys/netinet/tcp_input.c +++ b/usr/src/sys/netinet/tcp_input.c @@ -1,28 +1,34 @@ -/* tcp_input.c 1.92 83/04/03 */ +/* + * Copyright (c) 1982 Regents of the University of California. + * All rights reserved. The Berkeley software License Agreement + * specifies the terms and conditions for redistribution. + * + * @(#)tcp_input.c 6.15 (Berkeley) %G% + */ -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/mbuf.h" -#include "../h/protosw.h" -#include "../h/socket.h" -#include "../h/socketvar.h" -#include "../h/errno.h" +#include "param.h" +#include "systm.h" +#include "mbuf.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" +#include "errno.h" #include "../net/if.h" #include "../net/route.h" -#include "../netinet/in.h" -#include "../netinet/in_pcb.h" -#include "../netinet/in_systm.h" -#include "../netinet/ip.h" -#include "../netinet/ip_var.h" -#include "../netinet/tcp.h" -#include "../netinet/tcp_fsm.h" -#include "../netinet/tcp_seq.h" -#include "../netinet/tcp_timer.h" -#include "../netinet/tcp_var.h" -#include "../netinet/tcpip.h" -#include "../netinet/tcp_debug.h" +#include "in.h" +#include "in_pcb.h" +#include "in_systm.h" +#include "ip.h" +#include "ip_var.h" +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_seq.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "tcpip.h" +#include "tcp_debug.h" int tcpprintfs = 0; int tcpcksum = 1; @@ -30,6 +36,126 @@ struct tcpiphdr tcp_saveti; extern tcpnodelack; struct tcpcb *tcp_newtcpcb(); + +/* + * Insert segment ti into reassembly queue of tcp with + * control block tp. Return TH_FIN if reassembly now includes + * a segment with FIN. The macro form does the common case inline + * (segment is the next to be received on an established connection, + * and the queue is empty), avoiding linkage into and removal + * from the queue and repetition of various conversions. + */ +#define TCP_REASS(tp, ti, m, so, flags) { \ + if ((ti)->ti_seq == (tp)->rcv_nxt && \ + (tp)->seg_next == (struct tcpiphdr *)(tp) && \ + (tp)->t_state == TCPS_ESTABLISHED) { \ + (tp)->rcv_nxt += (ti)->ti_len; \ + flags = (ti)->ti_flags & TH_FIN; \ + sbappend(&(so)->so_rcv, (m)); \ + sorwakeup(so); \ + } else \ + (flags) = tcp_reass((tp), (ti)); \ +} + +tcp_reass(tp, ti) + register struct tcpcb *tp; + register struct tcpiphdr *ti; +{ + register struct tcpiphdr *q; + struct socket *so = tp->t_inpcb->inp_socket; + struct mbuf *m; + int flags; + + /* + * Call with ti==0 after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (ti == 0) + goto present; + + /* + * Find a segment which begins after this one does. + */ + for (q = tp->seg_next; q != (struct tcpiphdr *)tp; + q = (struct tcpiphdr *)q->ti_next) + if (SEQ_GT(q->ti_seq, ti->ti_seq)) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { + register int i; + q = (struct tcpiphdr *)q->ti_prev; + /* conversion to int (in i) handles seq wraparound */ + i = q->ti_seq + q->ti_len - ti->ti_seq; + if (i > 0) { + if (i >= ti->ti_len) + goto drop; + m_adj(dtom(ti), i); + ti->ti_len -= i; + ti->ti_seq += i; + } + q = (struct tcpiphdr *)(q->ti_next); + } + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (q != (struct tcpiphdr *)tp) { + register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; + if (i <= 0) + break; + if (i < q->ti_len) { + q->ti_seq += i; + q->ti_len -= i; + m_adj(dtom(q), i); + break; + } + q = (struct tcpiphdr *)q->ti_next; + m = dtom(q->ti_prev); + remque(q->ti_prev); + m_freem(m); + } + + /* + * Stick new segment in its place. + */ + insque(ti, q->ti_prev); + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + if (TCPS_HAVERCVDSYN(tp->t_state) == 0) + return (0); + ti = tp->seg_next; + if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) + return (0); + if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) + return (0); + do { + tp->rcv_nxt += ti->ti_len; + flags = ti->ti_flags & TH_FIN; + remque(ti); + m = dtom(ti); + ti = (struct tcpiphdr *)ti->ti_next; + if (so->so_state & SS_CANTRCVMORE) + m_freem(m); + else + sbappend(&so->so_rcv, m); + } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); + sorwakeup(so); + return (flags); +drop: + m_freem(dtom(ti)); + return (0); +} + /* * TCP input routine, follows pages 65-76 of the * protocol specification dated September, 1981 very closely. @@ -98,11 +224,13 @@ tcp_input(m0) tlen -= off; ti->ti_len = tlen; if (off > sizeof (struct tcphdr)) { - if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { - tcpstat.tcps_hdrops++; - return; + if (m->m_len < sizeof(struct ip) + off) { + if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { + tcpstat.tcps_hdrops++; + return; + } + ti = mtod(m, struct tcpiphdr *); } - ti = mtod(m, struct tcpiphdr *); om = m_get(M_DONTWAIT, MT_DATA); if (om == 0) goto drop; @@ -171,6 +299,7 @@ tcp_input(m0) inp = (struct inpcb *)so->so_pcb; inp->inp_laddr = ti->ti_dst; inp->inp_lport = ti->ti_dport; + inp->inp_options = ip_srcroute(); tp = intotcpcb(inp); tp->t_state = TCPS_LISTEN; } @@ -183,20 +312,24 @@ tcp_input(m0) tp->t_timer[TCPT_KEEP] = TCPTV_KEEP; /* - * Process options. + * Process options if not in LISTEN state, + * else do it below (after getting remote address). */ - if (om) { - tcp_dooptions(tp, om); + if (om && tp->t_state != TCPS_LISTEN) { + tcp_dooptions(tp, om, ti); om = 0; } /* * Calculate amount of space in receive window, * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. */ tp->rcv_wnd = sbspace(&so->so_rcv); if (tp->rcv_wnd < 0) tp->rcv_wnd = 0; + tp->rcv_wnd = MAX(tp->rcv_wnd, (short)(tp->rcv_adv - tp->rcv_nxt)); switch (tp->t_state) { @@ -204,6 +337,7 @@ tcp_input(m0) * If the state is LISTEN then ignore segment if it contains an RST. * If the segment contains an ACK then it is bad and send a RST. * If it does not contain a SYN then it is not interesting; drop it. + * Don't bother responding if the destination was a broadcast. * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial * tp->iss, and send a segment: * @@ -222,6 +356,8 @@ tcp_input(m0) goto dropwithreset; if ((tiflags & TH_SYN) == 0) goto drop; + if (in_broadcast(ti->ti_dst)) + goto drop; am = m_get(M_DONTWAIT, MT_SONAME); if (am == NULL) goto drop; @@ -242,10 +378,14 @@ tcp_input(m0) tp->t_template = tcp_template(tp); if (tp->t_template == 0) { in_pcbdisconnect(inp); - inp->inp_laddr = laddr; + dropsocket = 0; /* socket is already gone */ tp = 0; goto drop; } + if (om) { + tcp_dooptions(tp, om, ti); + om = 0; + } tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; tp->irs = ti->ti_seq; tcp_sendseqinit(tp); @@ -270,8 +410,7 @@ tcp_input(m0) */ case TCPS_SYN_SENT: if ((tiflags & TH_ACK) && -/* this should be SEQ_LT; is SEQ_LEQ for BBN vax TCP only */ - (SEQ_LT(ti->ti_ack, tp->iss) || + (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) goto dropwithreset; if (tiflags & TH_RST) { @@ -291,6 +430,7 @@ tcp_input(m0) if (SEQ_GT(tp->snd_una, tp->iss)) { soisconnected(so); tp->t_state = TCPS_ESTABLISHED; + tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); (void) tcp_reass(tp, (struct tcpiphdr *)0); } else tp->t_state = TCPS_SYN_RECEIVED; @@ -313,6 +453,16 @@ trimthenstep6: goto step6; } + /* + * If data is received on a connection after the + * user processes are gone, then RST the other end. + */ + if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && + ti->ti_len) { + tp = tcp_close(tp); + goto dropwithreset; + } + /* * States other than LISTEN or SYN_SENT. * First check that at least some bytes of segment are within @@ -376,16 +526,6 @@ trimthenstep6: } } - /* - * If data is received on a connection after the - * user processes are gone, then RST the other end. - */ - if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && - ti->ti_len) { - tp = tcp_close(tp); - goto dropwithreset; - } - /* * If the RST bit is set examine the state: * SYN_RECEIVED STATE: @@ -451,6 +591,7 @@ trimthenstep6: tp->t_timer[TCPT_REXMT] = 0; soisconnected(so); tp->t_state = TCPS_ESTABLISHED; + tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); (void) tcp_reass(tp, (struct tcpiphdr *)0); tp->snd_wl1 = ti->ti_seq - 1; /* fall into ... */ @@ -497,12 +638,16 @@ trimthenstep6: else { TCPT_RANGESET(tp->t_timer[TCPT_REXMT], tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); - tp->t_rtt = 1; tp->t_rxtshift = 0; } + /* + * When new data is acked, open the congestion window a bit. + */ + if (acked > 0) + tp->snd_cwnd = MIN(11 * tp->snd_cwnd / 10, 65535); if (acked > so->so_snd.sb_cc) { - sbdrop(&so->so_snd, so->so_snd.sb_cc); tp->snd_wnd -= so->so_snd.sb_cc; + sbdrop(&so->so_snd, so->so_snd.sb_cc); } else { sbdrop(&so->so_snd, acked); tp->snd_wnd -= acked; @@ -526,9 +671,14 @@ trimthenstep6: /* * If we can't receive any more * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. */ - if (so->so_state & SS_CANTRCVMORE) + if (so->so_state & SS_CANTRCVMORE) { soisdisconnected(so); + tp->t_timer[TCPT_2MSL] = TCPTV_MAXIDLE; + } tp->t_state = TCPS_FIN_WAIT_2; } break; @@ -581,8 +731,6 @@ step6: tp->snd_wnd = ti->ti_win; tp->snd_wl1 = ti->ti_seq; tp->snd_wl2 = ti->ti_ack; - if (tp->snd_wnd != 0) - tp->t_timer[TCPT_PERSIST] = 0; } /* @@ -590,6 +738,18 @@ step6: */ if ((tiflags & TH_URG) && ti->ti_urp && TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * This is a kludge, but if we receive accept + * random urgent pointers, we'll crash in + * soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ + if (ti->ti_urp + (unsigned) so->so_rcv.sb_cc > 32767) { + ti->ti_urp = 0; /* XXX */ + tiflags &= ~TH_URG; /* XXX */ + ti->ti_flags &= ~TH_URG; /* XXX */ + goto badurp; /* XXX */ + } /* * If this segment advances the known urgent pointer, * then mark the data stream. This should not happen @@ -604,7 +764,7 @@ step6: if (so->so_oobmark == 0) so->so_state |= SS_RCVATMARK; sohasoutofband(so); - tp->t_oobflags &= ~TCPOOB_HAVEDATA; + tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); } /* * Remove out of band data so doesn't get presented to user. @@ -615,6 +775,7 @@ step6: if (ti->ti_urp <= ti->ti_len) tcp_pulloutofband(so, ti); } +badurp: /* XXX */ /* * Process the segment text, merging it into the TCP sequencing queue, @@ -626,7 +787,7 @@ step6: */ if ((ti->ti_len || (tiflags&TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { - tiflags = tcp_reass(tp, ti); + TCP_REASS(tp, ti, m, so, tiflags); if (tcpnodelack == 0) tp->t_flags |= TF_DELACK; else @@ -715,8 +876,9 @@ dropwithreset: /* * Generate a RST, dropping incoming segment. * Make ACK acceptable to originator of segment. + * Don't bother to respond if destination was broadcast. */ - if (tiflags & TH_RST) + if ((tiflags & TH_RST) || in_broadcast(ti->ti_dst)) goto drop; if (tiflags & TH_ACK) tcp_respond(tp, ti, (tcp_seq)0, ti->ti_ack, TH_RST); @@ -746,9 +908,10 @@ drop: return; } -tcp_dooptions(tp, om) +tcp_dooptions(tp, om, ti) struct tcpcb *tp; struct mbuf *om; + struct tcpiphdr *ti; { register u_char *cp; int opt, optlen, cnt; @@ -761,8 +924,11 @@ tcp_dooptions(tp, om) break; if (opt == TCPOPT_NOP) optlen = 1; - else + else { optlen = cp[1]; + if (optlen <= 0) + break; + } switch (opt) { default: @@ -771,8 +937,11 @@ tcp_dooptions(tp, om) case TCPOPT_MAXSEG: if (optlen != 4) continue; + if (!(ti->ti_flags & TH_SYN)) + continue; tp->t_maxseg = *(u_short *)(cp + 2); tp->t_maxseg = ntohs((u_short)tp->t_maxseg); + tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); break; } } @@ -813,105 +982,50 @@ tcp_pulloutofband(so, ti) } /* - * Insert segment ti into reassembly queue of tcp with - * control block tp. Return TH_FIN if reassembly now includes - * a segment with FIN. + * Determine a reasonable value for maxseg size. + * If the route is known, use one that can be handled + * on the given interface without forcing IP to fragment. + * If bigger than a page (CLBYTES), round down to nearest pagesize + * to utilize pagesize mbufs. + * If interface pointer is unavailable, or the destination isn't local, + * use a conservative size (512 or the default IP max size, but no more + * than the mtu of the interface through which we route), + * as we can't discover anything about intervening gateways or networks. + * + * This is ugly, and doesn't belong at this level, but has to happen somehow. */ -tcp_reass(tp, ti) +tcp_mss(tp) register struct tcpcb *tp; - register struct tcpiphdr *ti; { - register struct tcpiphdr *q; - struct socket *so = tp->t_inpcb->inp_socket; - struct mbuf *m; - int flags; - - /* - * Call with ti==0 after become established to - * force pre-ESTABLISHED data up to user socket. - */ - if (ti == 0) - goto present; - - /* - * Find a segment which begins after this one does. - */ - for (q = tp->seg_next; q != (struct tcpiphdr *)tp; - q = (struct tcpiphdr *)q->ti_next) - if (SEQ_GT(q->ti_seq, ti->ti_seq)) - break; - - /* - * If there is a preceding segment, it may provide some of - * our data already. If so, drop the data from the incoming - * segment. If it provides all of our data, drop us. - */ - if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { - register int i; - q = (struct tcpiphdr *)q->ti_prev; - /* conversion to int (in i) handles seq wraparound */ - i = q->ti_seq + q->ti_len - ti->ti_seq; - if (i > 0) { - if (i >= ti->ti_len) - goto drop; - m_adj(dtom(ti), i); - ti->ti_len -= i; - ti->ti_seq += i; - } - q = (struct tcpiphdr *)(q->ti_next); - } + struct route *ro; + struct ifnet *ifp; + int mss; + struct inpcb *inp; - /* - * While we overlap succeeding segments trim them or, - * if they are completely covered, dequeue them. - */ - while (q != (struct tcpiphdr *)tp) { - register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; - if (i <= 0) - break; - if (i < q->ti_len) { - q->ti_seq += i; - q->ti_len -= i; - m_adj(dtom(q), i); - break; + inp = tp->t_inpcb; + ro = &inp->inp_route; + if ((ro->ro_rt == (struct rtentry *)0) || + (ifp = ro->ro_rt->rt_ifp) == (struct ifnet *)0) { + /* No route yet, so try to acquire one */ + if (inp->inp_faddr.s_addr != INADDR_ANY) { + ro->ro_dst.sa_family = AF_INET; + ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = + inp->inp_faddr; + rtalloc(ro); } - q = (struct tcpiphdr *)q->ti_next; - m = dtom(q->ti_prev); - remque(q->ti_prev); - m_freem(m); + if ((ro->ro_rt == 0) || (ifp = ro->ro_rt->rt_ifp) == 0) + return (TCP_MSS); } - /* - * Stick new segment in its place. - */ - insque(ti, q->ti_prev); - -present: - /* - * Present data to user, advancing rcv_nxt through - * completed sequence space. - */ - if (TCPS_HAVERCVDSYN(tp->t_state) == 0) - return (0); - ti = tp->seg_next; - if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) - return (0); - if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) - return (0); - do { - tp->rcv_nxt += ti->ti_len; - flags = ti->ti_flags & TH_FIN; - remque(ti); - m = dtom(ti); - ti = (struct tcpiphdr *)ti->ti_next; - if (so->so_state & SS_CANTRCVMORE) - m_freem(m); - else - sbappend(&so->so_rcv, m); - } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); - sorwakeup(so); - return (flags); -drop: - m_freem(dtom(ti)); - return (0); + mss = ifp->if_mtu - sizeof(struct tcpiphdr); +#if (CLBYTES & (CLBYTES - 1)) == 0 + if (mss > CLBYTES) + mss &= ~(CLBYTES-1); +#else + if (mss > CLBYTES) + mss = mss / CLBYTES * CLBYTES; +#endif + if (in_localaddr(inp->inp_faddr)) + return (mss); + return (MIN(mss, TCP_MSS)); }