X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/2ff61f9de862796b4b10ad48f162b5d77847f1b3..05db08ca40dcf52f15be693f25a96a593409f690:/usr/src/sys/netinet/tcp_output.c diff --git a/usr/src/sys/netinet/tcp_output.c b/usr/src/sys/netinet/tcp_output.c index 3199ee8110..4d150a48a6 100644 --- a/usr/src/sys/netinet/tcp_output.c +++ b/usr/src/sys/netinet/tcp_output.c @@ -1,200 +1,425 @@ -/* tcp_output.c 4.17 81/11/24 */ - -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/mbuf.h" -#include "../h/socket.h" -#include "../h/socketvar.h" -#include "../net/inet.h" -#include "../net/inet_pcb.h" -#include "../net/inet_systm.h" -#include "../net/imp.h" -#include "../net/ip.h" -#include "../net/ip_var.h" -#include "../net/tcp.h" -#include "../net/tcp_var.h" -#include "../net/tcp_fsm.h" -#include "/usr/include/errno.h" - /* - * Special routines to send control messages. + * Copyright (c) 1982, 1986 Regents of the University of California. + * All rights reserved. The Berkeley software License Agreement + * specifies the terms and conditions for redistribution. + * + * @(#)tcp_output.c 7.10 (Berkeley) %G% */ -tcp_sndctl(tp) - struct tcpcb *tp; -{ -COUNT(TCP_SNDCTL); - - if (tcp_send(tp)) - return (1); - tcp_sndnull(tp); - return (0); -} - -tcp_sndwin(tp) - struct tcpcb *tp; -{ - int ihave, hehas; -COUNT(TCP_SNDWIN); - if (tp->rcv_adv) { - register struct socket *so = tp->t_inpcb->inp_socket; +#include "param.h" +#include "systm.h" +#include "mbuf.h" +#include "protosw.h" +#include "socket.h" +#include "socketvar.h" +#include "errno.h" - ihave = so->so_rcv.sb_hiwat - - (so->so_rcv.sb_cc + tp->seqcnt); - hehas = tp->rcv_adv - tp->rcv_nxt; - if ((100*(ihave-hehas)/so->so_rcv.sb_hiwat) < 35) - return; - } - if (tcp_send(tp)) - return; - tcp_sndnull(tp); -} +#include "../net/route.h" -tcp_sndnull(tp) - register struct tcpcb *tp; -{ -COUNT(TCP_SNDNULL); +#include "in.h" +#include "in_pcb.h" +#include "in_systm.h" +#include "ip.h" +#include "ip_var.h" +#include "tcp.h" +#define TCPOUTFLAGS +#include "tcp_fsm.h" +#include "tcp_seq.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "tcpip.h" +#include "tcp_debug.h" - (void) tcp_output(tp, 0, 0, (struct mbuf *)0); - tp->tc_flags &= ~TC_ACK_DUE; -} +/* + * Initial options. + */ +u_char tcp_initopt[4] = { TCPOPT_MAXSEG, 4, 0x0, 0x0, }; /* - * Tcp segment output routine. + * Tcp output routine: figure out what should be sent and send it. */ -tcp_send(tp) +tcp_output(tp) register struct tcpcb *tp; { - register unsigned long last, wind; register struct socket *so = tp->t_inpcb->inp_socket; - struct mbuf *m; - int flags = 0, forced, sent, len; - -COUNT(TCP_SEND); - tp->snd_lst = tp->snd_nxt; - forced = 0; - m = NULL; - if (tp->snd_nxt == tp->iss) { - flags |= TH_SYN; - tp->snd_lst++; - } - last = tp->snd_off; - for (m = so->so_snd.sb_mb; m != NULL; m = m->m_next) - last += m->m_len; - if (tp->snd_nxt > last) { - if ((tp->tc_flags&TC_SND_FIN) && - (tp->seq_fin == tp->iss || tp->snd_nxt <= tp->seq_fin)) { - - flags |= TH_FIN; - tp->seq_fin = tp->snd_lst++; - } - } else { - if (tp->tc_flags&TC_SYN_ACKED) { - wind = tp->snd_una + tp->snd_wnd; - tp->snd_lst = MIN(last, wind); - if ((len = tp->snd_lst - tp->snd_nxt) > 1024) - tp->snd_lst -= len - 1024; - if (tp->snd_lst >= wind) - tp->t_persist = T_PERS; + register int len, win; + struct mbuf *m0; + int off, flags, error; + register struct mbuf *m; + register struct tcpiphdr *ti; + u_char *opt; + unsigned optlen = 0; + int idle, sendalot; + + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ + idle = (tp->snd_max == tp->snd_una); +again: + sendalot = 0; + off = tp->snd_nxt - tp->snd_una; + win = MIN(tp->snd_wnd, tp->snd_cwnd); + + /* + * If in persist timeout with window of 0, send 1 byte. + * Otherwise, if window is small but nonzero + * and timer expired, we will send what we can + * and go to transmit state. + */ + if (tp->t_force) { + if (win == 0) + win = 1; + else { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; } - if ((tp->tc_flags&TC_FORCE_ONE) && (tp->snd_lst == wind)) { - tp->snd_lst = tp->snd_nxt + 1; - forced = 1; - } else if (tp->snd_nxt >= tp->snd_lst && (tp->tc_flags&TC_SND_FIN) == 0) + } + + len = MIN(so->so_snd.sb_cc, win) - off; + flags = tcp_outflags[tp->t_state]; + + if (len < 0) { + /* + * If FIN has been sent but not acked, + * but we haven't been called to retransmit, + * len will be -1; transmit if acking, otherwise no need. + * Otherwise, window shrank after we sent into it. + * If window shrank to 0, cancel pending retransmit + * and pull snd_nxt back to (closed) window. + * We will enter persist state below. + * If the window didn't close completely, + * just wait for an ACK. + */ + if (flags & TH_FIN) { + if (tp->t_flags & TF_ACKNOW) + len = 0; + else + return (0); + } else if (win == 0) { + tp->t_timer[TCPT_REXMT] = 0; + tp->snd_nxt = tp->snd_una; + len = 0; + } else return (0); - m = m_copy(so->so_snd.sb_mb, - (int)(MAX(tp->iss+1,tp->snd_nxt) - tp->snd_off), - (int)(tp->snd_lst - tp->snd_off)); - if (tp->snd_end > tp->iss && tp->snd_end <= tp->snd_lst) - flags |= TH_EOL; - if ((tp->tc_flags&TC_SND_FIN) && !forced && - tp->snd_lst == last && - (tp->seq_fin == tp->iss || tp->snd_nxt <= tp->seq_fin)) { - flags |= TH_FIN; - tp->seq_fin = tp->snd_lst++; - } } - if (tp->snd_nxt >= tp->snd_lst) - return (0); - if (tp->tc_flags & TC_SND_URG) - flags |= TH_URG; - sent = tcp_output(tp, flags, (int)(tp->snd_lst - tp->snd_nxt), m); - if (!forced) { - tp->t_rexmt = tp->t_xmtime; - tp->t_rexmt_val = tp->snd_lst; - if ((tp->tc_flags&TC_REXMT) == 0) { - tp->t_rexmttl = T_REXMTTL; - tp->t_rtl_val = tp->snd_lst; + if (len > tp->t_maxseg) { + len = tp->t_maxseg; + sendalot = 1; + } + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~TH_FIN; + win = sbspace(&so->so_rcv); + + + /* + * If our state indicates that FIN should be sent + * and we have not yet done so, or we're retransmitting the FIN, + * then we need to send. + */ + if (flags & TH_FIN && + ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) + goto send; + /* + * Send if we owe peer an ACK. + */ + if (tp->t_flags & TF_ACKNOW) + goto send; + if (flags & (TH_SYN|TH_RST)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + + /* + * Sender silly window avoidance. If connection is idle + * and can send all data, a maximum segment, + * at least a maximum default-size segment do it, + * or are forced, do it; otherwise don't bother. + * If peer's buffer is tiny, then send + * when window is at least half open. + * If retransmitting (possibly after persist timer forced us + * to send into a small window), then must resend. + */ + if (len) { + if (len == tp->t_maxseg) + goto send; + if ((idle || tp->t_flags & TF_NODELAY) && + len + off >= so->so_snd.sb_cc) + goto send; + if (tp->t_force) + goto send; + if (len >= tp->max_sndwnd / 2) + goto send; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) + goto send; + } + + /* + * Compare available window to amount of window + * known to peer (as advertised window less + * next expected input.) If the difference is 35% or more of the + * maximum possible window, then want to send a window update to peer. + */ + if (win > 0) { + int adv = win - (tp->rcv_adv - tp->rcv_nxt); + + if (100 * adv / so->so_rcv.sb_hiwat >= 35) + goto send; + if (adv >= 2 * tp->t_maxseg && so->so_rcv.sb_cc == 0) + goto send; + } + + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a small or zero window + * (re)transmitting and thereby not persisting + * + * tp->t_timer[TCPT_PERSIST] + * is set when we are in persist state. + * tp->t_force + * is set when we are called to send a persist packet. + * tp->t_timer[TCPT_REXMT] + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is too small, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state. + * If nothing happens soon, send when timer expires: + * if window is nonzero, transmit what we can, + * otherwise force out a byte. + */ + if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && + tp->t_timer[TCPT_PERSIST] == 0) { + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + + /* + * No reason to send a segment, just return. + */ + return (0); + +send: + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ + MGET(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) + return (ENOBUFS); + m->m_off = MMAXOFF - sizeof (struct tcpiphdr); + m->m_len = sizeof (struct tcpiphdr); + if (len) { + if (tp->t_force && len == 1) + tcpstat.tcps_sndprobe++; + else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { + tcpstat.tcps_sndrexmitpack++; + tcpstat.tcps_sndrexmitbyte += len; + } else { + tcpstat.tcps_sndpack++; + tcpstat.tcps_sndbyte += len; + } + m->m_next = m_copy(so->so_snd.sb_mb, off, len); + if (m->m_next == 0) + len = 0; + } else if (tp->t_flags & TF_ACKNOW) + tcpstat.tcps_sndacks++; + else if (flags & (TH_SYN|TH_FIN|TH_RST)) + tcpstat.tcps_sndctrl++; + else if (SEQ_GT(tp->snd_up, tp->snd_una)) + tcpstat.tcps_sndurg++; + else + tcpstat.tcps_sndwinup++; + + ti = mtod(m, struct tcpiphdr *); + if (tp->t_template == 0) + panic("tcp_output"); + bcopy((caddr_t)tp->t_template, (caddr_t)ti, sizeof (struct tcpiphdr)); + + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + * If resending a FIN, be sure not to use a new sequence number. + */ + if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && + tp->snd_nxt == tp->snd_max) + tp->snd_nxt--; + ti->ti_seq = htonl(tp->snd_nxt); + ti->ti_ack = htonl(tp->rcv_nxt); + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set to not do any options. + */ + opt = NULL; + if (flags & TH_SYN && (tp->t_flags & TF_NOOPT) == 0) { + u_short mss; + + mss = MIN(so->so_rcv.sb_hiwat / 2, tcp_mss(tp)); + if (mss > IP_MSS - sizeof(struct tcpiphdr)) { + opt = tcp_initopt; + optlen = sizeof (tcp_initopt); + *(u_short *)(opt + 2) = htons(mss); } } - if (sent) - tp->snd_nxt = tp->snd_lst; - if ((tp->tc_flags&TC_SYN_ACKED) && - tp->snd_una > tp->t_xmt_val) { - tp->t_xmt = 0; - tp->t_xmt_val = tp->snd_lst; + if (opt) { + m0 = m->m_next; + m->m_next = m_get(M_DONTWAIT, MT_DATA); + if (m->m_next == 0) { + (void) m_free(m); + m_freem(m0); + return (ENOBUFS); + } + m->m_next->m_next = m0; + m0 = m->m_next; + m0->m_len = optlen; + bcopy((caddr_t)opt, mtod(m0, caddr_t), optlen); + opt = (u_char *)(mtod(m0, caddr_t) + optlen); + while (m0->m_len & 0x3) { + *opt++ = TCPOPT_EOL; + m0->m_len++; + } + optlen = m0->m_len; + ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; } - tp->tc_flags &= ~(TC_ACK_DUE|TC_REXMT|TC_FORCE_ONE); - tp->snd_hi = MAX(tp->snd_nxt, tp->snd_hi); - return (1); + ti->ti_flags = flags; + /* + * Calculate receive window. Don't shrink window, + * but avoid silly window syndrome. + */ + if (win < so->so_rcv.sb_hiwat / 4 && win < tp->t_maxseg) + win = 0; + if (win < (int)(tp->rcv_adv - tp->rcv_nxt)) + win = (int)(tp->rcv_adv - tp->rcv_nxt); + ti->ti_win = htons((u_short)win); + if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { + ti->ti_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); + ti->ti_flags |= TH_URG; + } else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ + /* + * If anything to send and we can send it all, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or a PUSH comes in.) + */ + if (len && off+len == so->so_snd.sb_cc) + ti->ti_flags |= TH_PUSH; + + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + if (len + optlen) + ti->ti_len = htons((u_short)(sizeof(struct tcphdr) + + optlen + len)); + ti->ti_sum = in_cksum(m, sizeof (struct tcpiphdr) + (int)optlen + len); + + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, just set snd_max. + */ + if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { + tcp_seq startseq = tp->snd_nxt; + + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & TH_SYN) + tp->snd_nxt++; + if (flags & TH_FIN) { + tp->snd_nxt++; + tp->t_flags |= TF_SENTFIN; + } + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { + tp->snd_max = tp->snd_nxt; + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (tp->t_rtt == 0) { + tp->t_rtt = 1; + tp->t_rtseq = startseq; + tcpstat.tcps_segstimed++; + } + } + + /* + * Set retransmit timer if not currently set, + * and not doing an ack or a keep-alive probe. + * Initial value for retransmit timer is smoothed + * round-trip time + 2 * round-trip time variance. + * Initialize shift counter which is used for backoff + * of retransmit time. + */ + if (tp->t_timer[TCPT_REXMT] == 0 && + tp->snd_nxt != tp->snd_una) { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + if (tp->t_timer[TCPT_PERSIST]) { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + } else + if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) + tp->snd_max = tp->snd_nxt + len; + + /* + * Trace. + */ + if (so->so_options & SO_DEBUG) + tcp_trace(TA_OUTPUT, tp->t_state, tp, ti, 0); + + /* + * Fill in IP length and desired time to live and + * send to IP level. + */ + ((struct ip *)ti)->ip_len = sizeof (struct tcpiphdr) + optlen + len; + ((struct ip *)ti)->ip_ttl = TCP_TTL; + error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, + so->so_options & SO_DONTROUTE); + if (error) + return (error); + tcpstat.tcps_sndtotal++; + + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Any pending ACK has now been sent. + */ + if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + win; + tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); + if (sendalot) + goto again; + return (0); } -tcp_output(tp, flags, len, dat) + +tcp_setpersist(tp) register struct tcpcb *tp; - register int flags; - int len; - struct mbuf *dat; { - register struct tcpiphdr *t; /* known to be r9 */ - register struct mbuf *m; - struct socket *so = tp->t_inpcb->inp_socket; - register struct ip *ip; -COUNT(TCP_OUTPUT); - - if ((t = tp->t_template) == 0) - return (0); - MGET(m, 0); - if (m == 0) - return (0); - m->m_off = MMAXOFF - sizeof(struct tcpiphdr); - m->m_len = sizeof (struct tcpiphdr); - m->m_next = dat; - if (flags & TH_SYN) - len--; - if (flags & TH_FIN) - len--; - if (len < 0) - panic("tcp_output"); - bcopy((caddr_t)t, mtod(m, caddr_t), sizeof (struct tcpiphdr)); - t = mtod(m, struct tcpiphdr *); - if (tp->tc_flags&TC_SND_RST) { - flags &= ~TH_SYN; - flags |= TH_RST; - } - if (tp->tc_flags&TC_SYN_RCVD) - flags |= TH_ACK; - t->ti_flags = flags; - if (flags & TH_URG) - t->ti_urp = htons((u_short)tp->snd_urp); /*XXX */ - t->ti_win = - so->so_rcv.sb_hiwat - - (so->so_rcv.sb_cc + tp->seqcnt); - if (tp->rcv_nxt + t->ti_win > tp->rcv_adv) - tp->rcv_adv = tp->rcv_nxt + t->ti_win; - if (len) - t->ti_len = htons((u_short)(len + sizeof (struct tcphdr))); - t->ti_win = htons(t->ti_win); - t->ti_seq = htonl(tp->snd_nxt); - t->ti_ackno = htonl(tp->rcv_nxt); - t->ti_sum = 0; /* gratuitous? */ - t->ti_sum = inet_cksum(m, sizeof (struct tcpiphdr) + len); - ip = (struct ip *)t; - ip->ip_v = IPVERSION; - ip->ip_hl = 5; - ip->ip_tos = 0; - ip->ip_len = len + sizeof(struct tcpiphdr); - ip->ip_id = ip_id++; - ip->ip_off = 0; - ip->ip_ttl = MAXTTL; - ip_send(ip); - return (1); + register t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; + + if (tp->t_timer[TCPT_REXMT]) + panic("tcp_output REXMT"); + /* + * Start/restart persistance timer. + */ + TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], + t * tcp_backoff[tp->t_rxtshift], + TCPTV_PERSMIN, TCPTV_PERSMAX); + if (tp->t_rxtshift < TCP_MAXRXTSHIFT) + tp->t_rxtshift++; }