X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/ae348eeadb89e419b478405d4e044baf8c26b383..cce93e4b226012d5eebcf7f7bd01cde8a8a07f51:/usr/src/sys/netinet/tcp_output.c diff --git a/usr/src/sys/netinet/tcp_output.c b/usr/src/sys/netinet/tcp_output.c index 9f82abf460..6dc94b5376 100644 --- a/usr/src/sys/netinet/tcp_output.c +++ b/usr/src/sys/netinet/tcp_output.c @@ -1,350 +1,329 @@ -/* tcp_output.c 4.4 81/10/31 */ +/* tcp_output.c 4.49 82/12/14 */ #include "../h/param.h" #include "../h/systm.h" #include "../h/mbuf.h" +#include "../h/protosw.h" #include "../h/socket.h" -#include "../inet/inet.h" -#include "../inet/inet_host.h" -#include "../inet/inet_systm.h" -#include "../inet/imp.h" -#include "../inet/ip.h" -#include "../inet/tcp.h" -#include "../inet/tcp_fsm.h" +#include "../h/socketvar.h" +#include "../netinet/in.h" +#include "../net/route.h" +#include "../netinet/in_pcb.h" +#include "../netinet/in_systm.h" +#include "../netinet/ip.h" +#include "../netinet/ip_var.h" +#include "../netinet/tcp.h" +#define TCPOUTFLAGS +#include "../netinet/tcp_fsm.h" +#include "../netinet/tcp_seq.h" +#include "../netinet/tcp_timer.h" +#include "../netinet/tcp_var.h" +#include "../netinet/tcpip.h" +#include "../netinet/tcp_debug.h" +#include + +char *tcpstates[]; /* XXX */ + +/* + * Initial options. + */ +u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, }; /* - * Special routines to send control messages. + * Tcp output routine: figure out what should be sent and send it. */ -tcp_sndctl(tp) - struct tcb *tp; +tcp_output(tp) + register struct tcpcb *tp; { -COUNT(TCP_SNDCTL); + register struct socket *so = tp->t_inpcb->inp_socket; + register int len; + struct mbuf *m0; + int off, flags, win, error; + register struct mbuf *m; + register struct tcpiphdr *ti; + u_char *opt; + unsigned optlen = 0; + int sendalot; - if (tcp_send(tp)) - return (1); - tcp_sndnull(tp); - return(0); -} -tcp_sndwin(tp) - struct tcb *tp; -{ - int ihave, hehas; -COUNT(TCP_SNDWIN); + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ +again: + sendalot = 0; + off = tp->snd_nxt - tp->snd_una; + len = MIN(so->so_snd.sb_cc, tp->snd_wnd+tp->t_force) - off; + if (len < 0) + return (0); /* ??? */ /* past FIN */ + if (len > tp->t_maxseg) { + len = tp->t_maxseg; + sendalot = 1; + } - if (tp->rcv_adv) { - ihave = tp->t_ucb->uc_rhiwat - - (tp->t_ucb->uc_rcc + tp->seqcnt); - hehas = tp->rcv_adv - tp->rcv_nxt; - if ((100*(ihave-hehas)/tp->t_ucb->uc_rhiwat) < 35) - return; + flags = tcp_outflags[tp->t_state]; + if (tp->snd_nxt + len < tp->snd_una + so->so_snd.sb_cc) + flags &= ~TH_FIN; + if (flags & (TH_SYN|TH_RST|TH_FIN)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + + /* + * Sender silly window avoidance. If can send all data, + * a maximum segment, at least 1/4 of window do it, + * or are forced, do it; otherwise don't bother. + */ + if (len) { + if (len == tp->t_maxseg || off+len >= so->so_snd.sb_cc) + goto send; + if (len * 4 >= tp->snd_wnd) /* a lot */ + goto send; + if (tp->t_force) + goto send; } - if (tcp_send(tp)) - return (1); - tcp_sndnull(tp); - return (0); -} -tcp_sndnull(tp) - register struct tcb *tp; -{ -COUNT(TCP_SNDNULL); + /* + * Send if we owe peer an ACK. + */ + if (tp->t_flags&TF_ACKNOW) + goto send; - tcp_output(tp, 0, 0, (struct mbuf *)0); - tp->tc_flags &= ~TC_ACK_DUE; -} -tcp_sndrst(tp, n) - register struct tcb *tp; - register struct th *n; -{ -COUNT(TCP_SNDRST); + /* + * Calculate available window in i, and also amount + * of window known to peer (as advertised window less + * next expected input.) If this is 35% or more of the + * maximum possible window, then want to send a segment to peer. + */ + win = sbspace(&so->so_rcv); + if (win > 0 && + ((100*(win-(tp->rcv_adv-tp->rcv_nxt))/so->so_rcv.sb_hiwat) >= 35)) + goto send; - /* don't send a reset in response to a reset */ - if (n->th_flags&TH_RST) - return; - tp->tc_flags |= TC_SND_RST; - if (n->th_flags&TH_ACK) - tp->snd_nxt = n->t_ackno; - tp->tc_flags &= ~TC_SYN_RCVD; - tcp_sndnull(tp); - tp->tc_flags &= ~TC_SND_RST; -} + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a zero window + * (re)transmitting and thereby not persisting + * + * tp->t_timer[TCPT_PERSIST] + * is set when we are in persist state. + * tp->t_force + * is set when we are called to send a persist packet. + * tp->t_timer[TCPT_REXMT] + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is closed, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state, + * arranging to force out a byte to get more current window information + * if nothing happens soon. + */ + if (tp->snd_wnd == 0 && so->so_snd.sb_cc && + tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) { + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } -/* - * Tcp segment output routine. - */ -tcp_send(tp) - register struct tcb *tp; -{ - register struct ucb *up; - register unsigned long last, wind; - struct mbuf *m; - int flags = 0, forced, sent; - struct mbuf *tcp_sndcopy(); - int len; + /* + * No reason to send a segment, just return. + */ + return (0); -COUNT(TCP_SEND); - up = tp->t_ucb; - tp->snd_lst = tp->snd_nxt; - forced = 0; - m = NULL; - if (tp->snd_nxt == tp->iss) { - flags |= TH_SYN; - tp->snd_lst++; +send: + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ + MGET(m, M_DONTWAIT, MT_DATA); + if (m == 0) + return (ENOBUFS); + m->m_off = MMAXOFF - sizeof (struct tcpiphdr); + m->m_len = sizeof (struct tcpiphdr); + if (len) { + m->m_next = m_copy(so->so_snd.sb_mb, off, len); + if (m->m_next == 0) + len = 0; } - last = tp->snd_off; - for (m = up->uc_sbuf; m != NULL; m = m->m_next) - last += m->m_len; - if (tp->snd_nxt > last) { - if ((tp->tc_flags&TC_SND_FIN) && - (tp->seq_fin == tp->iss || tp->snd_nxt <= tp->seq_fin)) { + ti = mtod(m, struct tcpiphdr *); + if (tp->t_template == 0) + panic("tcp_output"); + bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr)); - flags |= TH_FIN; - tp->seq_fin = tp->snd_lst++; - } + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + */ + ti->ti_seq = tp->snd_nxt; + ti->ti_ack = tp->rcv_nxt; + ti->ti_seq = htonl(ti->ti_seq); + ti->ti_ack = htonl(ti->ti_ack); + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set to not do any options. + */ + if (tp->t_state < TCPS_ESTABLISHED) { + if (tp->t_flags&TF_NOOPT) + goto noopt; + opt = tcp_initopt; + optlen = sizeof (tcp_initopt); + *(u_short *)(opt + 2) = MIN(so->so_rcv.sb_hiwat / 2, 1024); + *(u_short *)(opt + 2) = htons(*(u_short *)(opt + 2)); } else { - if (tp->tc_flags&TC_SYN_ACKED) { - wind = tp->snd_una + tp->snd_wnd; - tp->snd_lst = min(last, wind); - if ((len = tp->snd_lst - tp->snd_nxt) > 1024) - tp->snd_lst -= len - 1024; - if (tp->snd_lst >= wind) - tp->t_persist = T_PERS; - } - if ((tp->tc_flags&TC_FORCE_ONE) && (tp->snd_lst == wind)) { - tp->snd_lst = tp->snd_nxt + 1; - forced = 1; - } - m = tcp_sndcopy(tp, max(tp->iss+1,tp->snd_nxt), tp->snd_lst); - if (tp->snd_end > tp->iss && tp->snd_end <= tp->snd_lst) - flags |= TH_EOL; - if ((tp->tc_flags&TC_SND_FIN) && !forced && - tp->snd_lst == last && - (tp->seq_fin == tp->iss || tp->snd_nxt <= tp->seq_fin)) { - flags |= TH_FIN; - tp->seq_fin = tp->snd_lst++; - } + if (tp->t_tcpopt == 0) + goto noopt; + opt = mtod(tp->t_tcpopt, u_char *); + optlen = tp->t_tcpopt->m_len; } - if (tp->snd_nxt >= tp->snd_lst) - return (0); - if (tp->tc_flags & TC_SND_URG) - flags |= TH_URG; - sent = tcp_output(tp, flags, tp->snd_lst - tp->snd_nxt, m); - if (!forced) { - tp->t_rexmt = tp->t_xmtime; - tp->t_rexmt_val = tp->snd_lst; - if ((tp->tc_flags&TC_REXMT) == 0) { - tp->t_rexmttl = T_REXMTTL; - tp->t_rtl_val = tp->snd_lst; + if (opt) { + m0 = m->m_next; + m->m_next = m_get(M_DONTWAIT, MT_DATA); + if (m->m_next == 0) { + (void) m_free(m); + m_freem(m0); + return (ENOBUFS); } + m->m_next->m_next = m0; + m0 = m->m_next; + m0->m_len = optlen; + bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen); + opt = (u_char *)(mtod(m0, caddr_t) + optlen); + while (m0->m_len & 0x3) { + *opt++ = TCPOPT_EOL; + m0->m_len++; + } + optlen = m0->m_len; + ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; } - if (sent) - tp->snd_nxt = tp->snd_lst; - if ((tp->tc_flags&TC_SYN_ACKED) && - tp->snd_una > tp->t_xmt_val) { - tp->t_xmt = 0; - tp->t_xmt_val = tp->snd_lst; - } - tp->tc_flags &= ~(TC_ACK_DUE|TC_REXMT|TC_FORCE_ONE); - tp->snd_hi = max(tp->snd_nxt, tp->snd_hi); - return (1); -} +noopt: + ti->ti_flags = flags; + win = sbspace(&so->so_rcv); + if (win < so->so_rcv.sb_hiwat / 4) /* avoid silly window */ + win = 0; + if (win > 0) + ti->ti_win = htons((u_short)win); + if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { + ti->ti_urp = tp->snd_up - tp->snd_nxt; + ti->ti_urp = htons(ti->ti_urp); + ti->ti_flags |= TH_URG; + } else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ + /* + * If anything to send and we can send it all, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or a PUSH comes in. + */ + if (len && off+len == so->so_snd.sb_cc) + ti->ti_flags |= TH_PUSH; -/* - * Create template to be used to send tcp packets on a connection. - * Call after host entry created, allocates an mbuf and fills - * in a skeletal tcp/ip header, minimizing the amount of work - * necessary when the connection is used. - */ -struct th * -tcp_template(tp) - struct tcb *tp; -{ - register struct host *h = tp->t_ucb->uc_host; - register struct mbuf *m; - register struct th *n; - register struct ip *ip; + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + if (len + optlen) { + ti->ti_len = sizeof (struct tcphdr) + optlen + len; + ti->ti_len = htons((u_short)ti->ti_len); + } + ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len); - if (h == 0) - return (0); - m = m_get(1); - if (m == 0) - return (0); - m->m_off = MMAXOFF - sizeof (struct th); - m->m_len = sizeof (struct th); - n = mtod(m, struct th *); - n->t_next = n->t_prev = 0; - n->t_x1 = 0; - n->t_pr = TCPROTO; - n->t_len = htons(sizeof (struct th) - sizeof (struct ip)); - n->t_s.s_addr = n_lhost.s_addr; - n->t_d.s_addr = h->h_addr.s_addr; - n->t_src = htons(tp->t_lport); - n->t_dst = htons(tp->t_fport); - n->t_seq = 0; - n->t_ackno = 0; - n->t_x2 = 0; - n->t_off = 5; - n->th_flags = 0; - n->t_win = 0; - n->t_sum = 0; - n->t_urp = 0; - return (n); -} + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, reset persist time for + * next persist. + */ + if (tp->t_force == 0) { + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & (TH_SYN|TH_FIN)) + tp->snd_nxt++; + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) + tp->snd_max = tp->snd_nxt; -tcp_output(tp, flags, len, dat) - register struct tcb *tp; - register int flags; - int len; - struct mbuf *dat; -{ - register struct mbuf *m; - register struct th *t; - register struct ip *ip; - int i; -#ifdef TCPDEBUG - struct tcp_debug tdb; -#endif -COUNT(SEND_TCP); + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (SEQ_GT(tp->snd_nxt, tp->snd_max) && tp->t_rtt == 0) { + tp->t_rtt = 1; + tp->t_rtseq = tp->snd_nxt - len; + } - if ((t = tp->t_ucb->uc_template) == 0) - return (0); - MGET(m, 0); - if (m == 0) - return (0); - m->m_off = MMAXOFF - sizeof(struct th); - m->m_len = sizeof (struct th); - m->m_next = dat; - if (flags & TH_SYN) - len--; - if (flags & TH_FIN) - len--; - bcopy((caddr_t)t, mtod(m, caddr_t), sizeof (struct th)); - t = mtod(m, struct th *); - if (tp->tc_flags&TC_SND_RST) { - flags &= ~TH_SYN; - flags |= TH_RST; - } - if (tp->tc_flags&TC_SYN_RCVD) - flags |= TH_ACK; - t->th_flags = flags; - if (flags & TH_URG) - t->t_urp = htons(tp->snd_urp); - t->t_win = - tp->t_ucb->uc_rhiwat - (tp->t_ucb->uc_rcc + tp->seqcnt); - if (tp->rcv_nxt + t->t_win > tp->rcv_adv) - tp->rcv_adv = tp->rcv_nxt + t->t_win; - if (len) - t->t_len = htons(len + TCPSIZE); - t->t_win = htons(t->t_win); -#ifdef TCPDEBUG - if ((tp->t_ucb->uc_flags & UDEBUG) || tcpconsdebug) { - t->t_seq = tp->snd_nxt; - t->t_ackno = tp->rcv_nxt; - tdb_setup(tp, t, INSEND, &tdb); - tdb_stuff(&tdb, -2); + /* + * Set retransmit timer if not currently set. + * Initial value for retransmit timer to tcp_beta*tp->t_srtt. + * Initialize shift counter which is used for exponential + * backoff of retransmit time. + */ + if (tp->t_timer[TCPT_REXMT] == 0 && + tp->snd_nxt != tp->snd_una) { + TCPT_RANGESET(tp->t_timer[TCPT_REXMT], + tcp_beta * tp->t_srtt, TCPTV_MIN, TCPTV_MAX); + tp->t_rtt = 0; + tp->t_rxtshift = 0; + } + tp->t_timer[TCPT_PERSIST] = 0; + } else { + if (SEQ_GT(tp->snd_una+1, tp->snd_max)) + tp->snd_max = tp->snd_una+1; } -#endif - t->t_seq = htonl(tp->snd_nxt); - t->t_ackno = htonl(tp->rcv_nxt); - t->t_sum = cksum(m, len + sizeof(struct th)); - ip = (struct ip *)t; - ip->ip_v = IPVERSION; - ip->ip_hl = 5; - ip->ip_tos = 0; - ip->ip_len = len + sizeof(struct th); - ip->ip_id = ip_id++; - ip->ip_off = 0; - ip->ip_ttl = MAXTTL; - i = ip_send(ip); - return(i); -} -firstempty(tp) - register struct tcb *tp; -{ - register struct th *p, *q; -COUNT(FIRSTEMPTY); + /* + * Trace. + */ + if (so->so_options & SO_DEBUG) + tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); - if ((p = tp->t_rcv_next) == (struct th *)tp || tp->rcv_nxt < p->t_seq) - return (tp->rcv_nxt); - while ((q = p->t_next) != (struct th *)tp && - (t_end(p) + 1) == q->t_seq) - p = q; - return (t_end(p) + 1); -} - -struct mbuf * -tcp_sndcopy(tp, start, end) - struct tcb *tp; - u_long start, end; -{ - register struct mbuf *m, *n, **np; - u_long off; - register int len; - int adj; - struct mbuf *top, *p; -COUNT(SND_COPY); + /* + * Fill in IP length and desired time to live and + * send to IP level. + */ + ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len; + ((struct ip *)ti)->ip_ttl = TCP_TTL; + if (error = ip_output(m, tp->t_ipopt, (so->so_options & SO_DONTROUTE) ? + &routetoif : &tp->t_inpcb->inp_route, 0)) + return (error); - if (start >= end) - return(NULL); - off = tp->snd_off; - m = tp->t_ucb->uc_sbuf; - while (m != NULL && start >= (off + m->m_len)) { - off += m->m_len; - m = m->m_next; - } - np = ⊤ - top = 0; - adj = start - off; - len = end - start; - while (m && len > 0) { - MGET(n, 1); - *np = n; - if (n == 0) - goto nospace; - n->m_len = MIN(len, m->m_len - adj); - if (m->m_off > MMAXOFF) { - p = mtod(m, struct mbuf *); - n->m_off = ((int)p - (int)n) + adj; - mprefcnt[mtopf(p)]++; - } else { - n->m_off = MMINOFF; - bcopy(mtod(m, caddr_t)+adj, mtod(n, caddr_t), - n->m_len); - } - len -= n->m_len; - adj = 0; - m = m->m_next; - /* SHOULD TRY PACKING INTO SMALL MBUFS HERE */ - np = &n->m_next; - } - /* SHOULD NEVER RUN OUT OF m WHEN LEN */ - if (len) - printf("snd_copy: m %x len %d\n", m, len); - return (top); -nospace: - printf("snd_copy: no space\n"); - m_freem(top); + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Drop send for purpose of ACK requirements. + */ + if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + win; + tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); + if (sendalot && tp->t_force == 0) + goto again; return (0); } -tcp_enq(p, prev) - register struct th *p; - register struct th *prev; -{ - - p->t_prev = prev; - p->t_next = prev->t_next; - prev->t_next->t_prev = p; - prev->t_next = p; -} - -tcp_deq(p) - register struct th *p; +tcp_setpersist(tp) + register struct tcpcb *tp; { - p->t_prev->t_next = p->t_next; - p->t_next->t_prev = p->t_prev; + if (tp->t_timer[TCPT_REXMT]) + panic("tcp_output REXMT"); + /* + * Start/restart persistance timer. + */ + TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], + ((int)(tcp_beta * tp->t_srtt)) << tp->t_rxtshift, + TCPTV_PERSMIN, TCPTV_MAX); + tp->t_rxtshift++; + if (tp->t_rxtshift >= TCP_MAXRXTSHIFT) + tp->t_rxtshift = 0; }