X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/e1506033e528d8c2ef7440b4c70d91e702f23ec9..224f3a720c8acd42674bb9f2a4864989c7ae75f0:/usr/src/sys/netinet/tcp_input.c diff --git a/usr/src/sys/netinet/tcp_input.c b/usr/src/sys/netinet/tcp_input.c index 1e8a446aca..2e010262e3 100644 --- a/usr/src/sys/netinet/tcp_input.c +++ b/usr/src/sys/netinet/tcp_input.c @@ -1,687 +1,1031 @@ -/* tcp_input.c 1.19 81/11/04 */ - -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/mbuf.h" -#include "../h/socket.h" -#include "../inet/inet_cksum.h" -#include "../inet/inet.h" -#include "../inet/inet_systm.h" -#include "../inet/imp.h" -#include "../inet/inet_host.h" -#include "../inet/ip.h" -#include "../inet/tcp.h" -#include "../inet/tcp_fsm.h" +/* + * Copyright (c) 1982 Regents of the University of California. + * All rights reserved. The Berkeley software License Agreement + * specifies the terms and conditions for redistribution. + * + * @(#)tcp_input.c 6.15 (Berkeley) %G% + */ +#include "param.h" +#include "systm.h" +#include "mbuf.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" +#include "errno.h" + +#include "../net/if.h" +#include "../net/route.h" + +#include "in.h" +#include "in_pcb.h" +#include "in_systm.h" +#include "ip.h" +#include "ip_var.h" +#include "tcp.h" +#include "tcp_fsm.h" +#include "tcp_seq.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "tcpip.h" +#include "tcp_debug.h" + +int tcpprintfs = 0; int tcpcksum = 1; +struct tcpiphdr tcp_saveti; +extern tcpnodelack; + +struct tcpcb *tcp_newtcpcb(); -tcp_input(mp) - register struct mbuf *mp; +/* + * Insert segment ti into reassembly queue of tcp with + * control block tp. Return TH_FIN if reassembly now includes + * a segment with FIN. The macro form does the common case inline + * (segment is the next to be received on an established connection, + * and the queue is empty), avoiding linkage into and removal + * from the queue and repetition of various conversions. + */ +#define TCP_REASS(tp, ti, m, so, flags) { \ + if ((ti)->ti_seq == (tp)->rcv_nxt && \ + (tp)->seg_next == (struct tcpiphdr *)(tp) && \ + (tp)->t_state == TCPS_ESTABLISHED) { \ + (tp)->rcv_nxt += (ti)->ti_len; \ + flags = (ti)->ti_flags & TH_FIN; \ + sbappend(&(so)->so_rcv, (m)); \ + sorwakeup(so); \ + } else \ + (flags) = tcp_reass((tp), (ti)); \ +} + +tcp_reass(tp, ti) + register struct tcpcb *tp; + register struct tcpiphdr *ti; { - register struct th *n; /* known to be r10 */ - register int j; - register struct tcb *tp; - int nstate; + register struct tcpiphdr *q; + struct socket *so = tp->t_inpcb->inp_socket; struct mbuf *m; - struct ucb *up; - int hlen, tlen; - u_short lport, fport; -#ifdef TCPDEBUG - struct tcp_debug tdb; -#endif -COUNT(TCP_INPUT); + int flags; /* - * Build extended tcp header + * Call with ti==0 after become established to + * force pre-ESTABLISHED data up to user socket. */ - n = (struct th *)((int)mp + mp->m_off); - tlen = ((struct ip *)n)->ip_len; - n->t_len = htons(tlen); - n->t_next = NULL; - n->t_prev = NULL; - n->t_x1 = 0; - lport = ntohs(n->t_dst); - fport = ntohs(n->t_src); + if (ti == 0) + goto present; - /* WONT BE POSSIBLE WHEN MBUFS ARE 256 BYTES */ - if ((hlen = n->t_off << 2) > mp->m_len) - { printf("tcp header overflow\n"); m_freem(mp); return; } + /* + * Find a segment which begins after this one does. + */ + for (q = tp->seg_next; q != (struct tcpiphdr *)tp; + q = (struct tcpiphdr *)q->ti_next) + if (SEQ_GT(q->ti_seq, ti->ti_seq)) + break; - if (tcpcksum) { - /* - * Checksum extended header and data - */ - CKSUM_TCPCHK(mp, n, r10, sizeof (struct ip) + tlen); - if (n->t_sum != 0) { - netstat.t_badsum++; - m_freem(mp); - return; + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) { + register int i; + q = (struct tcpiphdr *)q->ti_prev; + /* conversion to int (in i) handles seq wraparound */ + i = q->ti_seq + q->ti_len - ti->ti_seq; + if (i > 0) { + if (i >= ti->ti_len) + goto drop; + m_adj(dtom(ti), i); + ti->ti_len -= i; + ti->ti_seq += i; } + q = (struct tcpiphdr *)(q->ti_next); } /* - * Find tcb for message (SHOULDN'T USE LINEAR SEARCH!) + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. */ - for (tp = tcb.tcb_next; tp != (struct tcb *)&tcb; tp = tp->tcb_next) - if (tp->t_lport == lport && tp->t_fport == fport && - tp->t_ucb->uc_host->h_addr.s_addr == n->t_s.s_addr) - goto found; - for (tp = tcb.tcb_next; tp != (struct tcb *)&tcb; tp = tp->tcb_next) - if (tp->t_lport == lport && - (tp->t_fport==fport || tp->t_fport==0) && - (tp->t_ucb->uc_host->h_addr.s_addr == n->t_s.s_addr || - tp->t_ucb->uc_host->h_addr.s_addr == 0)) - goto found; - goto notwanted; -found: + while (q != (struct tcpiphdr *)tp) { + register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; + if (i <= 0) + break; + if (i < q->ti_len) { + q->ti_seq += i; + q->ti_len -= i; + m_adj(dtom(q), i); + break; + } + q = (struct tcpiphdr *)q->ti_next; + m = dtom(q->ti_prev); + remque(q->ti_prev); + m_freem(m); + } /* - * Byte swap header + * Stick new segment in its place. */ - n->t_len = tlen - hlen; - n->t_src = fport; - n->t_dst = lport; - n->t_seq = ntohl(n->t_seq); - n->t_ackno = ntohl(n->t_ackno); - n->t_win = ntohs(n->t_win); - n->t_urp = ntohs(n->t_urp); + insque(ti, q->ti_prev); +present: /* - * Check segment seq # and do rst processing + * Present data to user, advancing rcv_nxt through + * completed sequence space. */ - switch (tp->t_state) { + if (TCPS_HAVERCVDSYN(tp->t_state) == 0) + return (0); + ti = tp->seg_next; + if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt) + return (0); + if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) + return (0); + do { + tp->rcv_nxt += ti->ti_len; + flags = ti->ti_flags & TH_FIN; + remque(ti); + m = dtom(ti); + ti = (struct tcpiphdr *)ti->ti_next; + if (so->so_state & SS_CANTRCVMORE) + m_freem(m); + else + sbappend(&so->so_rcv, m); + } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); + sorwakeup(so); + return (flags); +drop: + m_freem(dtom(ti)); + return (0); +} - case LISTEN: - if ((n->th_flags&TH_ACK) || !syn_ok(tp, n)) { - tcp_sndrst(tp, n); - goto badseg; +/* + * TCP input routine, follows pages 65-76 of the + * protocol specification dated September, 1981 very closely. + */ +tcp_input(m0) + struct mbuf *m0; +{ + register struct tcpiphdr *ti; + struct inpcb *inp; + register struct mbuf *m; + struct mbuf *om = 0; + int len, tlen, off; + register struct tcpcb *tp = 0; + register int tiflags; + struct socket *so; + int todrop, acked; + short ostate; + struct in_addr laddr; + int dropsocket = 0; + + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ + m = m0; + ti = mtod(m, struct tcpiphdr *); + if (((struct ip *)ti)->ip_hl > (sizeof (struct ip) >> 2)) + ip_stripoptions((struct ip *)ti, (struct mbuf *)0); + if (m->m_off > MMAXOFF || m->m_len < sizeof (struct tcpiphdr)) { + if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { + tcpstat.tcps_hdrops++; + return; } - if (n->th_flags&TH_RST) - goto badseg; - goto goodseg; - - case SYN_SENT: - if (!ack_ok(tp, n) || !syn_ok(tp, n)) { - tcp_sndrst(tp, n); /* 71,72,75 */ - goto badseg; + ti = mtod(m, struct tcpiphdr *); + } + + /* + * Checksum extended TCP header and data. + */ + tlen = ((struct ip *)ti)->ip_len; + len = sizeof (struct ip) + tlen; + if (tcpcksum) { + ti->ti_next = ti->ti_prev = 0; + ti->ti_x1 = 0; + ti->ti_len = (u_short)tlen; + ti->ti_len = htons((u_short)ti->ti_len); + if (ti->ti_sum = in_cksum(m, len)) { + if (tcpprintfs) + printf("tcp sum: src %x\n", ti->ti_src); + tcpstat.tcps_badsum++; + goto drop; } - if (n->th_flags&TH_RST) { - tcp_close(tp, URESET); /* 70 */ - tp->t_state = CLOSED; - goto badseg; + } + + /* + * Check that TCP offset makes sense, + * pull out TCP options and adjust length. + */ + off = ti->ti_off << 2; + if (off < sizeof (struct tcphdr) || off > tlen) { + if (tcpprintfs) + printf("tcp off: src %x off %d\n", ti->ti_src, off); + tcpstat.tcps_badoff++; + goto drop; + } + tlen -= off; + ti->ti_len = tlen; + if (off > sizeof (struct tcphdr)) { + if (m->m_len < sizeof(struct ip) + off) { + if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) { + tcpstat.tcps_hdrops++; + return; + } + ti = mtod(m, struct tcpiphdr *); + } + om = m_get(M_DONTWAIT, MT_DATA); + if (om == 0) + goto drop; + om->m_len = off - sizeof (struct tcphdr); + { caddr_t op = mtod(m, caddr_t) + sizeof (struct tcpiphdr); + bcopy(op, mtod(om, caddr_t), (unsigned)om->m_len); + m->m_len -= om->m_len; + bcopy(op+om->m_len, op, + (unsigned)(m->m_len-sizeof (struct tcpiphdr))); } - goto goodseg; + } + tiflags = ti->ti_flags; - default: - if ((n->th_flags&TH_RST) == 0) - goto common; - if (n->t_seq < tp->rcv_nxt) /* bad rst */ - goto badseg; /* 69 */ - switch (tp->t_state) { + /* + * Drop TCP and IP headers. + */ + off += sizeof (struct ip); + m->m_off += off; + m->m_len -= off; - case L_SYN_RCVD: - if (ack_ok(tp, n) == 0) - goto badseg; /* 69 */ - tp->t_rexmt = 0; - tp->t_rexmttl = 0; - tp->t_persist = 0; - h_free(tp->t_ucb->uc_host); - tp->t_state = LISTEN; - goto badseg; + /* + * Convert TCP protocol specific fields to host format. + */ + ti->ti_seq = ntohl(ti->ti_seq); + ti->ti_ack = ntohl(ti->ti_ack); + ti->ti_win = ntohs(ti->ti_win); + ti->ti_urp = ntohs(ti->ti_urp); - default: - tcp_close(tp, URESET); /* 66 */ - tp->t_state = CLOSED; - goto badseg; - } - /*NOTREACHED*/ + /* + * Locate pcb for segment. + */ + inp = in_pcblookup + (&tcb, ti->ti_src, ti->ti_sport, ti->ti_dst, ti->ti_dport, + INPLOOKUP_WILDCARD); - case SYN_RCVD: -common: - if (ack_ok(tp, n) == 0) { - tcp_sndrst(tp, n); /* 74 */ - goto badseg; - } - if (syn_ok(tp, n) && n->t_seq != tp->irs) { - tcp_sndnull(tp); /* 74 */ - goto badseg; - } - goto goodseg; + /* + * If the state is CLOSED (i.e., TCB does not exist) then + * all data in the incoming segment is discarded. + */ + if (inp == 0) + goto dropwithreset; + tp = intotcpcb(inp); + if (tp == 0) + goto dropwithreset; + so = inp->inp_socket; + if (so->so_options & SO_DEBUG) { + ostate = tp->t_state; + tcp_saveti = *ti; + } + if (so->so_options & SO_ACCEPTCONN) { + so = sonewconn(so); + if (so == 0) + goto drop; + /* + * This is ugly, but .... + * + * Mark socket as temporary until we're + * committed to keeping it. The code at + * ``drop'' and ``dropwithreset'' check the + * flag dropsocket to see if the temporary + * socket created here should be discarded. + * We mark the socket as discardable until + * we're committed to it below in TCPS_LISTEN. + */ + dropsocket++; + inp = (struct inpcb *)so->so_pcb; + inp->inp_laddr = ti->ti_dst; + inp->inp_lport = ti->ti_dport; + inp->inp_options = ip_srcroute(); + tp = intotcpcb(inp); + tp->t_state = TCPS_LISTEN; } -badseg: - m_freem(mp); - return; -goodseg: -#ifdef notdef - /* DO SOMETHING ABOUT UNACK!!! */ /* - * Defer processing if no buffer space for this connection. + * Segment received on connection. + * Reset idle time and keep-alive timer. */ - up = tp->t_ucb; - if (up->uc_rcc > up->uc_rhiwat && - && n->t_len != 0 && mbstat.m_bufs < mbstat.m_lowat) { - mp->m_act = (struct mbuf *)0; - if ((m = tp->t_rcv_unack) != NULL) { - while (m->m_act != NULL) - m = m->m_act; - m->m_act = mp; - } else - tp->t_rcv_unack = mp; - return; + tp->t_idle = 0; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP; + + /* + * Process options if not in LISTEN state, + * else do it below (after getting remote address). + */ + if (om && tp->t_state != TCPS_LISTEN) { + tcp_dooptions(tp, om, ti); + om = 0; } -#endif /* - * Discard ip header, and do tcp input processing. + * Calculate amount of space in receive window, + * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. */ - hlen += sizeof(struct ip); - mp->m_off += hlen; - mp->m_len -= hlen; - nstate = tp->t_state; - tp->tc_flags &= ~TC_NET_KEEP; - acounts[tp->t_state][INRECV]++; -#ifdef TCPDEBUG - if ((tp->t_ucb->uc_flags & UDEBUG) || tcpconsdebug) { - tdb_setup(tp, n, INRECV, &tdb); - } else - tdb.td_tod = 0; -#endif + tp->rcv_wnd = sbspace(&so->so_rcv); + if (tp->rcv_wnd < 0) + tp->rcv_wnd = 0; + tp->rcv_wnd = MAX(tp->rcv_wnd, (short)(tp->rcv_adv - tp->rcv_nxt)); + switch (tp->t_state) { - case LISTEN: - if (!syn_ok(tp, n) || - ((tp->t_ucb->uc_host = h_make(&n->t_s)) == 0)) { - nstate = EFAILEC; - goto done; + /* + * If the state is LISTEN then ignore segment if it contains an RST. + * If the segment contains an ACK then it is bad and send a RST. + * If it does not contain a SYN then it is not interesting; drop it. + * Don't bother responding if the destination was a broadcast. + * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial + * tp->iss, and send a segment: + * + * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. + * Fill in remote peer address fields if not previously specified. + * Enter SYN_RECEIVED state, and process any other fields of this + * segment in this state. + */ + case TCPS_LISTEN: { + struct mbuf *am; + register struct sockaddr_in *sin; + + if (tiflags & TH_RST) + goto drop; + if (tiflags & TH_ACK) + goto dropwithreset; + if ((tiflags & TH_SYN) == 0) + goto drop; + if (in_broadcast(ti->ti_dst)) + goto drop; + am = m_get(M_DONTWAIT, MT_SONAME); + if (am == NULL) + goto drop; + am->m_len = sizeof (struct sockaddr_in); + sin = mtod(am, struct sockaddr_in *); + sin->sin_family = AF_INET; + sin->sin_addr = ti->ti_src; + sin->sin_port = ti->ti_sport; + laddr = inp->inp_laddr; + if (inp->inp_laddr.s_addr == INADDR_ANY) + inp->inp_laddr = ti->ti_dst; + if (in_pcbconnect(inp, am)) { + inp->inp_laddr = laddr; + (void) m_free(am); + goto drop; } - tp->t_fport = n->t_src; + (void) m_free(am); tp->t_template = tcp_template(tp); - tcp_ctldat(tp, n, 1); - if (tp->tc_flags&TC_FIN_RCVD) { - tp->t_finack = T_2ML; /* 3 */ - tp->tc_flags &= ~TC_WAITED_2_ML; - nstate = CLOSE_WAIT; - } else { - tp->t_init = T_INIT / 2; /* 4 */ - nstate = L_SYN_RCVD; + if (tp->t_template == 0) { + in_pcbdisconnect(inp); + dropsocket = 0; /* socket is already gone */ + tp = 0; + goto drop; } - goto done; - - case SYN_SENT: - if (!syn_ok(tp, n)) { - nstate = EFAILEC; - goto done; + if (om) { + tcp_dooptions(tp, om, ti); + om = 0; } - tcp_ctldat(tp, n, 1); - if (tp->tc_flags&TC_FIN_RCVD) { - if ((n->th_flags&TH_ACK) == 0) { - tp->t_finack = T_2ML; /* 9 */ - tp->tc_flags &= ~TC_WAITED_2_ML; - } - nstate = CLOSE_WAIT; - goto done; + tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; + tp->irs = ti->ti_seq; + tcp_sendseqinit(tp); + tcp_rcvseqinit(tp); + tp->t_state = TCPS_SYN_RECEIVED; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP; + dropsocket = 0; /* committed to socket */ + goto trimthenstep6; } - nstate = (n->th_flags&TH_ACK) ? ESTAB : SYN_RCVD; /* 11:8 */ - goto done; - - case SYN_RCVD: - case L_SYN_RCVD: - if ((n->th_flags&TH_ACK) == 0 || - (n->th_flags&TH_ACK) && n->t_ackno <= tp->iss) { - nstate = EFAILEC; - goto done; + + /* + * If the state is SYN_SENT: + * if seg contains an ACK, but not for our SYN, drop the input. + * if seg contains a RST, then drop the connection. + * if seg does not contain SYN, then drop it. + * Otherwise this is an acceptable SYN segment + * initialize tp->rcv_nxt and tp->irs + * if seg contains ack then advance tp->snd_una + * if SYN has been acked change to ESTABLISHED else SYN_RCVD state + * arrange for segment to be acked (eventually) + * continue processing rest of data/controls, beginning with URG + */ + case TCPS_SYN_SENT: + if ((tiflags & TH_ACK) && + (SEQ_LEQ(ti->ti_ack, tp->iss) || + SEQ_GT(ti->ti_ack, tp->snd_max))) + goto dropwithreset; + if (tiflags & TH_RST) { + if (tiflags & TH_ACK) + tp = tcp_drop(tp, ECONNREFUSED); + goto drop; } - goto input; - - case ESTAB: - case FIN_W1: - case FIN_W2: - case TIME_WAIT: -input: - tcp_ctldat(tp, n, 1); /* 39 */ - switch (tp->t_state) { + if ((tiflags & TH_SYN) == 0) + goto drop; + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + tp->t_timer[TCPT_REXMT] = 0; + tp->irs = ti->ti_seq; + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + if (SEQ_GT(tp->snd_una, tp->iss)) { + soisconnected(so); + tp->t_state = TCPS_ESTABLISHED; + tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); + (void) tcp_reass(tp, (struct tcpiphdr *)0); + } else + tp->t_state = TCPS_SYN_RECEIVED; + goto trimthenstep6; - case ESTAB: - if (tp->tc_flags&TC_FIN_RCVD) - nstate = CLOSE_WAIT; - break; +trimthenstep6: + /* + * Advance ti->ti_seq to correspond to first data byte. + * If data, trim to stay within window, + * dropping FIN if necessary. + */ + ti->ti_seq++; + if (ti->ti_len > tp->rcv_wnd) { + todrop = ti->ti_len - tp->rcv_wnd; + m_adj(m, -todrop); + ti->ti_len = tp->rcv_wnd; + ti->ti_flags &= ~TH_FIN; + } + tp->snd_wl1 = ti->ti_seq - 1; + goto step6; + } - case SYN_RCVD: - case L_SYN_RCVD: - nstate = (tp->tc_flags&TC_FIN_RCVD) ? - CLOSE_WAIT : ESTAB; /* 33:5 */ - break; + /* + * If data is received on a connection after the + * user processes are gone, then RST the other end. + */ + if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && + ti->ti_len) { + tp = tcp_close(tp); + goto dropwithreset; + } - case FIN_W1: - j = ack_fin(tp, n); - if ((tp->tc_flags & TC_FIN_RCVD) == 0) { - if (j) - nstate = FIN_W2; /* 27 */ - break; + /* + * States other than LISTEN or SYN_SENT. + * First check that at least some bytes of segment are within + * receive window. + */ + if (tp->rcv_wnd == 0) { + /* + * If window is closed can only take segments at + * window edge, and have to drop data and PUSH from + * incoming segments. + */ + if (tp->rcv_nxt != ti->ti_seq) + goto dropafterack; + if (ti->ti_len > 0) { + m_adj(m, ti->ti_len); + ti->ti_len = 0; + ti->ti_flags &= ~(TH_PUSH|TH_FIN); + } + } else { + /* + * If segment begins before rcv_nxt, drop leading + * data (and SYN); if nothing left, just ack. + */ + todrop = tp->rcv_nxt - ti->ti_seq; + if (todrop > 0) { + if (tiflags & TH_SYN) { + tiflags &= ~TH_SYN; + ti->ti_flags &= ~TH_SYN; + ti->ti_seq++; + if (ti->ti_urp > 1) + ti->ti_urp--; + else + tiflags &= ~TH_URG; + todrop--; } - tp->t_finack = T_2ML; - tp->tc_flags &= ~TC_WAITED_2_ML; - nstate = j ? TIME_WAIT : CLOSING; /* 28:26 */ - break; - - case FIN_W2: - if (tp->tc_flags&TC_FIN_RCVD) { - tp->t_finack = T_2ML; /* 29 */ - tp->tc_flags &= ~TC_WAITED_2_ML; - nstate = TIME_WAIT; - break; + if (todrop > ti->ti_len || + todrop == ti->ti_len && (tiflags&TH_FIN) == 0) + goto dropafterack; + m_adj(m, todrop); + ti->ti_seq += todrop; + ti->ti_len -= todrop; + if (ti->ti_urp > todrop) + ti->ti_urp -= todrop; + else { + tiflags &= ~TH_URG; + ti->ti_flags &= ~TH_URG; + ti->ti_urp = 0; } - break; - } - goto done; - - case CLOSE_WAIT: - if (n->th_flags&TH_FIN) { - if ((n->th_flags&TH_ACK) && - n->t_ackno <= tp->seq_fin) { - tcp_ctldat(tp, n, 0); /* 30 */ - tp->t_finack = T_2ML; - tp->tc_flags &= ~TC_WAITED_2_ML; - } else - tcp_sndctl(tp); /* 31 */ - goto done; - } - goto input; - - case CLOSING: - j = ack_fin(tp, n); - if (n->th_flags&TH_FIN) { - tcp_ctldat(tp, n, 0); - tp->t_finack = T_2ML; - tp->tc_flags &= ~TC_WAITED_2_ML; - if (j) - nstate = TIME_WAIT; /* 23 */ - goto done; } - if (j) { - if (tp->tc_flags&TC_WAITED_2_ML) - if (rcv_empty(tp)) { - tcp_close(tp, UCLOSED); /* 15 */ - nstate = CLOSED; - } else - nstate = RCV_WAIT; /* 18 */ - else - nstate = TIME_WAIT; - goto done; - } - goto input; - - case LAST_ACK: - if (ack_fin(tp, n)) { - if (rcv_empty(tp)) { /* 16 */ - tcp_close(tp, UCLOSED); - nstate = CLOSED; - } else - nstate = RCV_WAIT; /* 19 */ - goto done; - } - if (n->th_flags&TH_FIN) { - tcp_sndctl(tp); /* 31 */ - goto done; - } - goto input; - - case RCV_WAIT: - if ((n->th_flags&TH_FIN) && (n->th_flags&TH_ACK) && - n->t_ackno <= tp->seq_fin) { - tcp_ctldat(tp, n, 0); - tp->t_finack = T_2ML; - tp->tc_flags &= ~TC_WAITED_2_ML; /* 30 */ + /* + * If segment ends after window, drop trailing data + * (and PUSH and FIN); if nothing left, just ACK. + */ + todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); + if (todrop > 0) { + if (todrop >= ti->ti_len) + goto dropafterack; + m_adj(m, -todrop); + ti->ti_len -= todrop; + ti->ti_flags &= ~(TH_PUSH|TH_FIN); } - goto done; } - panic("tcp_input"); -done: /* - * Done with state*input specific processing. - * Form trace records, free input if not needed, - * and enter new state. + * If the RST bit is set examine the state: + * SYN_RECEIVED STATE: + * If passive open, return to LISTEN state. + * If active open, inform user that connection was refused. + * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: + * Inform user that connection was reset, and close tcb. + * CLOSING, LAST_ACK, TIME_WAIT STATES + * Close the tcb. */ -#ifdef TCPDEBUG - if (tdb.td_tod) - tdb_stuff(&tdb, nstate); -#endif - switch (nstate) { + if (tiflags&TH_RST) switch (tp->t_state) { - case EFAILEC: - m_freem(mp); - return; + case TCPS_SYN_RECEIVED: + tp = tcp_drop(tp, ECONNREFUSED); + goto drop; - default: - tp->t_state = nstate; - /* fall into ... */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + tp = tcp_drop(tp, ECONNRESET); + goto drop; - case CLOSED: - /* IF CLOSED CANT LOOK AT tc_flags */ - if ((tp->tc_flags&TC_NET_KEEP) == 0) - /* inline expansion of m_freem */ - while (mp) { - MFREE(mp, m); - mp = m; - } - return; + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + tp = tcp_close(tp); + goto drop; } - /* NOTREACHED */ /* - * Unwanted packed; free everything - * but the header and return an rst. + * If a SYN is in the window, then this is an + * error and we send an RST and drop the connection. */ -notwanted: - m_freem(mp->m_next); - mp->m_next = NULL; - mp->m_len = sizeof(struct th); -#define xchg(a,b) j=a; a=b; b=j - xchg(n->t_d.s_addr, n->t_s.s_addr); xchg(n->t_dst, n->t_src); -#undef xchg - if (n->th_flags&TH_ACK) - n->t_seq = n->t_ackno; - else { - n->t_ackno = htonl(ntohl(n->t_seq) + tlen - hlen); - n->t_seq = 0; + if (tiflags & TH_SYN) { + tp = tcp_drop(tp, ECONNRESET); + goto dropwithreset; } - n->th_flags = TH_RST; /* not TH_FIN, TH_SYN */ - n->th_flags ^= TH_ACK; - n->t_len = htons(TCPSIZE); - n->t_off = 5; - n->t_sum = inet_cksum(mp, sizeof(struct th)); - ((struct ip *)n)->ip_len = sizeof(struct th); - ip_output(mp); - netstat.t_badsegs++; -} -tcp_ctldat(tp, n, dataok) - register struct tcb *tp; - register struct th *n; -{ - register struct mbuf *m; - int sent; -COUNT(TCP_CTLDAT); - - tp->tc_flags &= ~(TC_DROPPED_TXT|TC_ACK_DUE|TC_NEW_WINDOW); -/* syn */ - if ((tp->tc_flags&TC_SYN_RCVD) == 0 && (n->th_flags&TH_SYN)) { - tp->irs = n->t_seq; - tp->rcv_nxt = n->t_seq + 1; - tp->snd_wl = tp->rcv_urp = tp->irs; - tp->tc_flags |= (TC_SYN_RCVD|TC_ACK_DUE); - } -/* ack */ - if ((n->th_flags&TH_ACK) && (tp->tc_flags&TC_SYN_RCVD) && - n->t_ackno > tp->snd_una) { - register struct mbuf *mn; - register struct ucb *up; - int len; - - up = tp->t_ucb; - - /* update snd_una and snd_nxt */ - tp->snd_una = n->t_ackno; - if (tp->snd_una > tp->snd_nxt) + /* + * If the ACK bit is off we drop the segment and return. + */ + if ((tiflags & TH_ACK) == 0) + goto drop; + + /* + * Ack processing. + */ + switch (tp->t_state) { + + /* + * In SYN_RECEIVED state if the ack ACKs our SYN then enter + * ESTABLISHED state and continue processing, othewise + * send an RST. + */ + case TCPS_SYN_RECEIVED: + if (SEQ_GT(tp->snd_una, ti->ti_ack) || + SEQ_GT(ti->ti_ack, tp->snd_max)) + goto dropwithreset; + tp->snd_una++; /* SYN acked */ + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; - /* if timed msg acked, set retrans time value */ - if ((tp->tc_flags&TC_SYN_ACKED) && - tp->snd_una > tp->t_xmt_val) { - tp->t_xmtime = (tp->t_xmt != 0 ? tp->t_xmt : T_REXMT); - if (tp->t_xmtime > T_REMAX) - tp->t_xmtime = T_REMAX; + tp->t_timer[TCPT_REXMT] = 0; + soisconnected(so); + tp->t_state = TCPS_ESTABLISHED; + tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); + (void) tcp_reass(tp, (struct tcpiphdr *)0); + tp->snd_wl1 = ti->ti_seq - 1; + /* fall into ... */ + + /* + * In ESTABLISHED state: drop duplicate ACKs; ACK out of range + * ACKs. If the ack is in the range + * tp->snd_una < ti->ti_ack <= tp->snd_max + * then advance tp->snd_una to ti->ti_ack and drop + * data from the retransmission queue. If this ACK reflects + * more up to date window information we update our window information. + */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: +#define ourfinisacked (acked > 0) + + if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) + break; + if (SEQ_GT(ti->ti_ack, tp->snd_max)) + goto dropafterack; + acked = ti->ti_ack - tp->snd_una; + + /* + * If transmit timer is running and timed sequence + * number was acked, update smoothed round trip time. + */ + if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) { + if (tp->t_srtt == 0) + tp->t_srtt = tp->t_rtt; + else + tp->t_srtt = + tcp_alpha * tp->t_srtt + + (1 - tcp_alpha) * tp->t_rtt; + tp->t_rtt = 0; } - /* remove acked data from send buf */ - len = tp->snd_una - tp->snd_off; - m = up->uc_sbuf; - while (len > 0 && m != NULL) - if (m->m_len <= len) { - len -= m->m_len; - if (m->m_off > MMAXOFF) - up->uc_ssize -= NMBPG; - MFREE(m, mn); - m = mn; - up->uc_ssize--; - } else { - m->m_len -= len; - m->m_off += len; - break; + if (ti->ti_ack == tp->snd_max) + tp->t_timer[TCPT_REXMT] = 0; + else { + TCPT_RANGESET(tp->t_timer[TCPT_REXMT], + tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); + tp->t_rxtshift = 0; + } + /* + * When new data is acked, open the congestion window a bit. + */ + if (acked > 0) + tp->snd_cwnd = MIN(11 * tp->snd_cwnd / 10, 65535); + if (acked > so->so_snd.sb_cc) { + tp->snd_wnd -= so->so_snd.sb_cc; + sbdrop(&so->so_snd, so->so_snd.sb_cc); + } else { + sbdrop(&so->so_snd, acked); + tp->snd_wnd -= acked; + acked = 0; + } + if ((so->so_snd.sb_flags & SB_WAIT) || so->so_snd.sb_sel) + sowwakeup(so); + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + switch (tp->t_state) { + + /* + * In FIN_WAIT_1 STATE in addition to the processing + * for the ESTABLISHED state if our FIN is now acknowledged + * then enter FIN_WAIT_2. + */ + case TCPS_FIN_WAIT_1: + if (ourfinisacked) { + /* + * If we can't receive any more + * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. + */ + if (so->so_state & SS_CANTRCVMORE) { + soisdisconnected(so); + tp->t_timer[TCPT_2MSL] = TCPTV_MAXIDLE; + } + tp->t_state = TCPS_FIN_WAIT_2; } - up->uc_sbuf = m; - tp->snd_off = tp->snd_una; - if ((tp->tc_flags&TC_SYN_ACKED) == 0 && - (tp->snd_una > tp->iss)) { - tp->tc_flags |= TC_SYN_ACKED; - tp->t_init = 0; + break; + + /* + * In CLOSING STATE in addition to the processing for + * the ESTABLISHED state if the ACK acknowledges our FIN + * then enter the TIME-WAIT state, otherwise ignore + * the segment. + */ + case TCPS_CLOSING: + if (ourfinisacked) { + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + soisdisconnected(so); + } + break; + + /* + * The only thing that can arrive in LAST_ACK state + * is an acknowledgment of our FIN. If our FIN is now + * acknowledged, delete the TCB, enter the closed state + * and return. + */ + case TCPS_LAST_ACK: + if (ourfinisacked) + tp = tcp_close(tp); + goto drop; + + /* + * In TIME_WAIT state the only thing that should arrive + * is a retransmission of the remote FIN. Acknowledge + * it and restart the finack timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + goto dropafterack; } - if (tp->seq_fin != tp->iss && tp->snd_una > tp->seq_fin) - tp->tc_flags &= ~TC_SND_FIN; - tp->t_rexmt = 0; - tp->t_rexmttl = 0; - tp->tc_flags |= TC_CANCELLED; - netwakeup(tp->t_ucb); /* wasteful */ - } -/* win */ - if ((tp->tc_flags & TC_SYN_RCVD) && n->t_seq >= tp->snd_wl) { - tp->snd_wl = n->t_seq; - tp->snd_wnd = n->t_win; - tp->tc_flags |= TC_NEW_WINDOW; - tp->t_persist = 0; - } - if (dataok == 0) - goto ctlonly; -/* text */ - if (n->t_len == 0) - goto notext; - { register int i; - register struct th *p, *q; - register struct mbuf *m; - int overage; - - /* - * Discard duplicate data already passed to user. - */ - if (SEQ_LT(n->t_seq, tp->rcv_nxt)) { - i = tp->rcv_nxt - n->t_seq; - if (i >= n->t_len) - goto notext; - n->t_seq += i; - n->t_len -= i; - m_adj(dtom(n), i); +#undef ourfinisacked } +step6: /* - * Find a segment which begins after this one does. + * Update window information. */ - for (q = tp->t_rcv_next; q != (struct th *)tp; q = q->t_next) - if (SEQ_GT(q->t_seq, n->t_seq)) - break; + if (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq && + (SEQ_LT(tp->snd_wl2, ti->ti_ack) || + tp->snd_wl2 == ti->ti_ack && ti->ti_win > tp->snd_wnd)) { + tp->snd_wnd = ti->ti_win; + tp->snd_wl1 = ti->ti_seq; + tp->snd_wl2 = ti->ti_ack; + } /* - * If there is a preceding segment, it may provide some of - * our data already. If so, drop the data from the incoming - * segment. If it provides all of our data, drop us. + * Process segments with URG. */ - if (q->t_prev != (struct th *)tp) { - /* conversion to int (in i) handles seq wraparound */ - i = q->t_prev->t_seq + q->t_prev->t_len - n->t_seq; - if (i > 0) { - if (i >= n->t_len) - goto notext; /* w/o setting TC_NET_KEEP */ - m_adj(dtom(tp), i); - n->t_len -= i; - n->t_seq += i; + if ((tiflags & TH_URG) && ti->ti_urp && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * This is a kludge, but if we receive accept + * random urgent pointers, we'll crash in + * soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ + if (ti->ti_urp + (unsigned) so->so_rcv.sb_cc > 32767) { + ti->ti_urp = 0; /* XXX */ + tiflags &= ~TH_URG; /* XXX */ + ti->ti_flags &= ~TH_URG; /* XXX */ + goto badurp; /* XXX */ } + /* + * If this segment advances the known urgent pointer, + * then mark the data stream. This should not happen + * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since + * a FIN has been received from the remote side. + * In these states we ignore the URG. + */ + if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { + tp->rcv_up = ti->ti_seq + ti->ti_urp; + so->so_oobmark = so->so_rcv.sb_cc + + (tp->rcv_up - tp->rcv_nxt) - 1; + if (so->so_oobmark == 0) + so->so_state |= SS_RCVATMARK; + sohasoutofband(so); + tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); + } + /* + * Remove out of band data so doesn't get presented to user. + * This can happen independent of advancing the URG pointer, + * but if two URG's are pending at once, some out-of-band + * data may creep in... ick. + */ + if (ti->ti_urp <= ti->ti_len) + tcp_pulloutofband(so, ti); } +badurp: /* XXX */ /* - * While we overlap succeeding segments trim them or, - * if they are completely covered, dequeue them. + * Process the segment text, merging it into the TCP sequencing queue, + * and arranging for acknowledgment of receipt if necessary. + * This process logically involves adjusting tp->rcv_wnd as data + * is presented to the user (this happens in tcp_usrreq.c, + * case PRU_RCVD). If a FIN has already been received on this + * connection then we just ignore the text. */ - while (q != (struct th *)tp && SEQ_GT(n->t_seq + n->t_len, q->t_seq)) { - i = (n->t_seq + n->t_len) - q->t_seq; - if (i < q->t_len) { - q->t_len -= i; - m_adj(dtom(q), i); + if ((ti->ti_len || (tiflags&TH_FIN)) && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + TCP_REASS(tp, ti, m, so, tiflags); + if (tcpnodelack == 0) + tp->t_flags |= TF_DELACK; + else + tp->t_flags |= TF_ACKNOW; + } else { + m_freem(m); + tiflags &= ~TH_FIN; + } + + /* + * If FIN is received ACK the FIN and let the user know + * that the connection is closing. + */ + if (tiflags & TH_FIN) { + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { + socantrcvmore(so); + tp->t_flags |= TF_ACKNOW; + tp->rcv_nxt++; + } + switch (tp->t_state) { + + /* + * In SYN_RECEIVED and ESTABLISHED STATES + * enter the CLOSE_WAIT state. + */ + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + tp->t_state = TCPS_CLOSE_WAIT; + break; + + /* + * If still in FIN_WAIT_1 STATE FIN has not been acked so + * enter the CLOSING state. + */ + case TCPS_FIN_WAIT_1: + tp->t_state = TCPS_CLOSING; + break; + + /* + * In FIN_WAIT_2 state enter the TIME_WAIT state, + * starting the time-wait timer, turning off the other + * standard timers. + */ + case TCPS_FIN_WAIT_2: + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + soisdisconnected(so); + break; + + /* + * In TIME_WAIT state restart the 2 MSL time_wait timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; break; } - q = q->t_next; - m_freem(dtom(q->t_prev)); - remque(q->t_prev); } + if (so->so_options & SO_DEBUG) + tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0); /* - * Stick new segment in its place. + * Return any desired output. */ - insque(n, q->t_prev); - tp->seqcnt += n->t_len; - -#ifdef notdef - /* - * Calculate available space and discard segments for - * which there is too much. - */ - q = tp->t_rcv_prev; - overage = - (tp->t_ucb->uc_rcc + tp->rcv_seqcnt) - tp->t_ucb->uc_rhiwat; - if (overage > 0) - for (;;) { - i = MIN(q->t_len, overage); - overage -= i; - q->t_len -= i; - m_adj(q, -i); - if (q == n) - tp->tc_flags |= TC_DROPPED_TXT; - if (q->t_len) - break; - if (q == n) - panic("tcp_text dropall"); - q = q->t_prev; - remque(q->t_next); - } -#endif + (void) tcp_output(tp); + return; +dropafterack: /* - * Advance rcv_next through - * newly completed sequence space - * and return forcing an ack. + * Generate an ACK dropping incoming segment if it occupies + * sequence space, where the ACK reflects our state. */ - while (n->t_seq == tp->rcv_nxt) { - /* present data belongs here */ - tp->rcv_nxt += n->t_len; - n = n->t_next; - if (n == (struct th *)tp) - break; + if ((tiflags&TH_RST) || + tlen == 0 && (tiflags&(TH_SYN|TH_FIN)) == 0) + goto drop; + if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) + tcp_trace(TA_RESPOND, ostate, tp, &tcp_saveti, 0); + tcp_respond(tp, ti, tp->rcv_nxt, tp->snd_nxt, TH_ACK); + return; + +dropwithreset: + if (om) { + (void) m_free(om); + om = 0; } - tp->tc_flags |= (TC_ACK_DUE|TC_NET_KEEP); + /* + * Generate a RST, dropping incoming segment. + * Make ACK acceptable to originator of segment. + * Don't bother to respond if destination was broadcast. + */ + if ((tiflags & TH_RST) || in_broadcast(ti->ti_dst)) + goto drop; + if (tiflags & TH_ACK) + tcp_respond(tp, ti, (tcp_seq)0, ti->ti_ack, TH_RST); + else { + if (tiflags & TH_SYN) + ti->ti_len++; + tcp_respond(tp, ti, ti->ti_seq+ti->ti_len, (tcp_seq)0, + TH_RST|TH_ACK); } -notext: -urgeolfin: -/* urg */ - if (n->th_flags&TH_URG) { - unsigned urgent; - - urgent = n->t_urp + n->t_seq; - if (tp->rcv_nxt < urgent) { - if (tp->rcv_urp <= tp->rcv_nxt) - to_user(tp->t_ucb, UURGENT); - tp->rcv_urp = urgent; + /* destroy temporarily created socket */ + if (dropsocket) + (void) soabort(so); + return; + +drop: + if (om) + (void) m_free(om); + /* + * Drop space held by incoming segment and return. + */ + if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) + tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0); + m_freem(m); + /* destroy temporarily created socket */ + if (dropsocket) + (void) soabort(so); + return; +} + +tcp_dooptions(tp, om, ti) + struct tcpcb *tp; + struct mbuf *om; + struct tcpiphdr *ti; +{ + register u_char *cp; + int opt, optlen, cnt; + + cp = mtod(om, u_char *); + cnt = om->m_len; + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + optlen = cp[1]; + if (optlen <= 0) + break; } - } -/* eol */ - if ((n->th_flags&TH_EOL) && - (tp->tc_flags&TC_DROPPED_TXT) == 0 && - tp->t_rcv_prev != (struct th *)tp) { - /* mark last mbuf */ - m = dtom(tp->t_rcv_prev); - if (m != NULL) { - while (m->m_next != NULL) - m = m->m_next; - m->m_act = - (struct mbuf *)(m->m_off + m->m_len - 1); + switch (opt) { + + default: + break; + + case TCPOPT_MAXSEG: + if (optlen != 4) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + tp->t_maxseg = *(u_short *)(cp + 2); + tp->t_maxseg = ntohs((u_short)tp->t_maxseg); + tp->t_maxseg = MIN(tp->t_maxseg, tcp_mss(tp)); + break; } } -ctlonly: -/* fin */ - if ((n->th_flags&TH_FIN) && (tp->tc_flags&TC_DROPPED_TXT) == 0) { - seq_t last; - - if ((tp->tc_flags&TC_FIN_RCVD) == 0) { - /* do we really have fin ? */ - last = firstempty(tp); - if (tp->t_rcv_prev == (struct th *)tp || - last == t_end(tp->t_rcv_prev)) { - tp->tc_flags |= TC_FIN_RCVD; - netwakeup(tp->t_ucb); /* poke */ - } - if ((tp->tc_flags&TC_FIN_RCVD) && - tp->rcv_nxt >= last) { - tp->rcv_nxt = last + 1; /* fin seq */ - tp->tc_flags |= TC_ACK_DUE; - } - } else - tp->tc_flags |= TC_ACK_DUE; - } + (void) m_free(om); +} -/* respond */ - sent = 0; - if (tp->tc_flags&TC_ACK_DUE) - sent = tcp_sndctl(tp); - else if (tp->tc_flags&TC_NEW_WINDOW) { - seq_t last = tp->snd_off; - for (m = tp->t_ucb->uc_sbuf; m != NULL; m = m->m_next) /*###*/ - last += m->m_len; /*###*/ - if (tp->snd_nxt <= last || (tp->tc_flags&TC_SND_FIN)) - sent = tcp_send(tp); - } +/* + * Pull out of band byte out of a segment so + * it doesn't appear in the user's data queue. + * It is still reflected in the segment length for + * sequencing purposes. + */ +tcp_pulloutofband(so, ti) + struct socket *so; + struct tcpiphdr *ti; +{ + register struct mbuf *m; + int cnt = ti->ti_urp - 1; + + m = dtom(ti); + while (cnt >= 0) { + if (m->m_len > cnt) { + char *cp = mtod(m, caddr_t) + cnt; + struct tcpcb *tp = sototcpcb(so); -/* set for retrans */ - if (!sent && tp->snd_una < tp->snd_nxt && - (tp->tc_flags&TC_CANCELLED)) { - tp->t_rexmt = tp->t_xmtime; - tp->t_rexmttl = T_REXMTTL; - tp->t_rexmt_val = tp->t_rtl_val = tp->snd_lst; - tp->tc_flags &= ~TC_CANCELLED; - } -/* present data to user */ - { register struct mbuf **mp; - register struct ucb *up = tp->t_ucb; - seq_t ready; - - /* connection must be synced and data available for user */ - if ((tp->tc_flags&TC_SYN_ACKED) == 0) - return; - up = tp->t_ucb; - mp = &up->uc_rbuf; - while (*mp) - mp = &(*mp)->m_next; - n = tp->t_rcv_next; - /* SHOULD PACK DATA IN HERE */ - while (n != (struct th *)tp && n->t_seq < tp->rcv_nxt) { - remque(n); - m = dtom(n); - up->uc_rcc += n->t_len; - tp->seqcnt -= n->t_len; - if (tp->seqcnt < 0) panic("present_data"); - n = n->t_next; - while (m) { - if (m->m_len == 0) { - MFREE(m, *mp); - } else { - *mp = m; - mp = &m->m_next; - } - m = *mp; + tp->t_iobc = *cp; + tp->t_oobflags |= TCPOOB_HAVEDATA; + bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1)); + m->m_len--; + return; } + cnt -= m->m_len; + m = m->m_next; + if (m == 0) + break; } - if (up->uc_rcc != 0) - netwakeup(up); - if ((tp->tc_flags&TC_FIN_RCVD) && /* ### */ - (tp->tc_flags&TC_USR_CLOSED) == 0 && /* ### */ - rcv_empty(tp)) /* ### */ - to_user(up, UCLOSED); /* ### */ + panic("tcp_pulloutofband"); +} + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, use one that can be handled + * on the given interface without forcing IP to fragment. + * If bigger than a page (CLBYTES), round down to nearest pagesize + * to utilize pagesize mbufs. + * If interface pointer is unavailable, or the destination isn't local, + * use a conservative size (512 or the default IP max size, but no more + * than the mtu of the interface through which we route), + * as we can't discover anything about intervening gateways or networks. + * + * This is ugly, and doesn't belong at this level, but has to happen somehow. + */ +tcp_mss(tp) + register struct tcpcb *tp; +{ + struct route *ro; + struct ifnet *ifp; + int mss; + struct inpcb *inp; + + inp = tp->t_inpcb; + ro = &inp->inp_route; + if ((ro->ro_rt == (struct rtentry *)0) || + (ifp = ro->ro_rt->rt_ifp) == (struct ifnet *)0) { + /* No route yet, so try to acquire one */ + if (inp->inp_faddr.s_addr != INADDR_ANY) { + ro->ro_dst.sa_family = AF_INET; + ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = + inp->inp_faddr; + rtalloc(ro); + } + if ((ro->ro_rt == 0) || (ifp = ro->ro_rt->rt_ifp) == 0) + return (TCP_MSS); } + + mss = ifp->if_mtu - sizeof(struct tcpiphdr); +#if (CLBYTES & (CLBYTES - 1)) == 0 + if (mss > CLBYTES) + mss &= ~(CLBYTES-1); +#else + if (mss > CLBYTES) + mss = mss / CLBYTES * CLBYTES; +#endif + if (in_localaddr(inp->inp_faddr)) + return (mss); + return (MIN(mss, TCP_MSS)); }