* Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* from: @(#)tcp_output.c 7.22 (Berkeley) 8/31/90
#include "../net/route.h"
extern struct mbuf
*m_copypack();
u_char tcp_initopt
[4] = { TCPOPT_MAXSEG
, 4, 0x0, 0x0, };
* Tcp output routine: figure out what should be sent and send it.
register struct tcpcb
*tp
;
register struct socket
*so
= tp
->t_inpcb
->inp_socket
;
register struct tcpiphdr
*ti
;
* Determine length of data that should be transmitted,
* and flags that will be used.
* If there is some data or critical controls (SYN, RST)
* to send, then transmit; otherwise, investigate further.
idle
= (tp
->snd_max
== tp
->snd_una
);
if (idle
&& tp
->t_idle
>= tp
->t_rxtcur
)
* We have been idle for "a while" and no acks are
* expected to clock out any data we send --
* slow start to get ack "clock" running again.
tp
->snd_cwnd
= tp
->t_maxseg
;
off
= tp
->snd_nxt
- tp
->snd_una
;
win
= min(tp
->snd_wnd
, tp
->snd_cwnd
);
* If in persist timeout with window of 0, send 1 byte.
* Otherwise, if window is small but nonzero
* and timer expired, we will send what we can
* and go to transmit state.
tp
->t_timer
[TCPT_PERSIST
] = 0;
flags
= tcp_outflags
[tp
->t_state
];
len
= min(so
->so_snd
.sb_cc
, win
) - off
;
* If FIN has been sent but not acked,
* but we haven't been called to retransmit,
* len will be -1. Otherwise, window shrank
* after we sent into it. If window shrank to 0,
* cancel pending retransmit and pull snd_nxt
* back to (closed) window. We will enter persist
* state below. If the window didn't close completely,
tp
->t_timer
[TCPT_REXMT
] = 0;
tp
->snd_nxt
= tp
->snd_una
;
if (len
> tp
->t_maxseg
) {
if (SEQ_LT(tp
->snd_nxt
+ len
, tp
->snd_una
+ so
->so_snd
.sb_cc
))
win
= sbspace(&so
->so_rcv
);
* Sender silly window avoidance. If connection is idle
* and can send all data, a maximum segment,
* at least a maximum default-size segment do it,
* or are forced, do it; otherwise don't bother.
* If peer's buffer is tiny, then send
* when window is at least half open.
* If retransmitting (possibly after persist timer forced us
* to send into a small window), then must resend.
if ((idle
|| tp
->t_flags
& TF_NODELAY
) &&
len
+ off
>= so
->so_snd
.sb_cc
)
if (len
>= tp
->max_sndwnd
/ 2)
if (SEQ_LT(tp
->snd_nxt
, tp
->snd_max
))
* Compare available window to amount of window
* known to peer (as advertised window less
* next expected input). If the difference is at least two
* max size segments, or at least 50% of the maximum possible
* window, then want to send a window update to peer.
long adv
= win
- (tp
->rcv_adv
- tp
->rcv_nxt
);
if (adv
>= (long) (2 * tp
->t_maxseg
))
if (2 * adv
>= (long) so
->so_rcv
.sb_hiwat
)
* Send if we owe peer an ACK.
if (tp
->t_flags
& TF_ACKNOW
)
if (flags
& (TH_SYN
|TH_RST
))
if (SEQ_GT(tp
->snd_up
, tp
->snd_una
))
* If our state indicates that FIN should be sent
* and we have not yet done so, or we're retransmitting the FIN,
((tp
->t_flags
& TF_SENTFIN
) == 0 || tp
->snd_nxt
== tp
->snd_una
))
* TCP window updates are not reliable, rather a polling protocol
* using ``persist'' packets is used to insure receipt of window
* updates. The three ``states'' for the output side are:
* idle not doing retransmits or persists
* persisting to move a small or zero window
* (re)transmitting and thereby not persisting
* tp->t_timer[TCPT_PERSIST]
* is set when we are in persist state.
* is set when we are called to send a persist packet.
* tp->t_timer[TCPT_REXMT]
* is set when we are retransmitting
* The output side is idle when both timers are zero.
* If send window is too small, there is data to transmit, and no
* retransmit or persist is pending, then go to persist state.
* If nothing happens soon, send when timer expires:
* if window is nonzero, transmit what we can,
* otherwise force out a byte.
if (so
->so_snd
.sb_cc
&& tp
->t_timer
[TCPT_REXMT
] == 0 &&
tp
->t_timer
[TCPT_PERSIST
] == 0) {
* No reason to send a segment, just return.
* Before ESTABLISHED, force sending of initial options
* unless TCP set not to do any options.
* NOTE: we assume that the IP/TCP header plus TCP options
* always fit in a single mbuf, leaving room for a maximum
* max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN
hdrlen
= sizeof (struct tcpiphdr
);
if (flags
& TH_SYN
&& (tp
->t_flags
& TF_NOOPT
) == 0) {
optlen
= sizeof (tcp_initopt
);
hdrlen
+= sizeof (tcp_initopt
);
*(u_short
*)(opt
+ 2) = htons((u_short
) tcp_mss(tp
, 0));
if (max_linkhdr
+ hdrlen
> MHLEN
)
* Grab a header mbuf, attaching a copy of data to
* be transmitted, and initialize the header from
* the template for sends on this connection.
if (tp
->t_force
&& len
== 1)
else if (SEQ_LT(tp
->snd_nxt
, tp
->snd_max
)) {
tcpstat
.tcps_sndrexmitpack
++;
tcpstat
.tcps_sndrexmitbyte
+= len
;
tcpstat
.tcps_sndbyte
+= len
;
if ((m
= m_copypack(so
->so_snd
.sb_mb
, off
,
(int)len
, max_linkhdr
+ hdrlen
)) == 0) {
* m_copypack left space for our hdr; use it.
MGETHDR(m
, M_DONTWAIT
, MT_HEADER
);
m
->m_data
+= max_linkhdr
;
if (len
<= MHLEN
- hdrlen
- max_linkhdr
) {
m_copydata(so
->so_snd
.sb_mb
, off
, (int) len
,
mtod(m
, caddr_t
) + hdrlen
);
m
->m_next
= m_copy(so
->so_snd
.sb_mb
, off
, (int) len
);
* If we're sending everything we've got, set PUSH.
* (This will keep happy those implementations which only
* give data to the user when a buffer fills or
if (off
+ len
== so
->so_snd
.sb_cc
)
if (tp
->t_flags
& TF_ACKNOW
)
else if (flags
& (TH_SYN
|TH_FIN
|TH_RST
))
else if (SEQ_GT(tp
->snd_up
, tp
->snd_una
))
MGETHDR(m
, M_DONTWAIT
, MT_HEADER
);
m
->m_data
+= max_linkhdr
;
m
->m_pkthdr
.rcvif
= (struct ifnet
*)0;
ti
= mtod(m
, struct tcpiphdr
*);
bcopy((caddr_t
)tp
->t_template
, (caddr_t
)ti
, sizeof (struct tcpiphdr
));
* Fill in fields, remembering maximum advertised
* window for use in delaying messages about window sizes.
* If resending a FIN, be sure not to use a new sequence number.
if (flags
& TH_FIN
&& tp
->t_flags
& TF_SENTFIN
&&
tp
->snd_nxt
== tp
->snd_max
)
ti
->ti_seq
= htonl(tp
->snd_nxt
);
ti
->ti_ack
= htonl(tp
->rcv_nxt
);
bcopy((caddr_t
)opt
, (caddr_t
)(ti
+ 1), optlen
);
ti
->ti_off
= (sizeof (struct tcphdr
) + optlen
) >> 2;
* Calculate receive window. Don't shrink window,
* but avoid silly window syndrome.
if (win
< (long)(so
->so_rcv
.sb_hiwat
/ 4) && win
< (long)tp
->t_maxseg
)
if (win
< (long)(tp
->rcv_adv
- tp
->rcv_nxt
))
win
= (long)(tp
->rcv_adv
- tp
->rcv_nxt
);
ti
->ti_win
= htons((u_short
)win
);
if (SEQ_GT(tp
->snd_up
, tp
->snd_nxt
)) {
ti
->ti_urp
= htons((u_short
)(tp
->snd_up
- tp
->snd_nxt
));
* If no urgent pointer to send, then we pull
* the urgent pointer to the left edge of the send window
* so that it doesn't drift into the send window on sequence
tp
->snd_up
= tp
->snd_una
; /* drag it along */
* Put TCP length in extended header, and then
* checksum extended header and data.
ti
->ti_len
= htons((u_short
)(sizeof (struct tcphdr
) +
ti
->ti_sum
= in_cksum(m
, (int)(hdrlen
+ len
));
* In transmit state, time the transmission and arrange for
* the retransmit. In persist state, just set snd_max.
if (tp
->t_force
== 0 || tp
->t_timer
[TCPT_PERSIST
] == 0) {
tcp_seq startseq
= tp
->snd_nxt
;
* Advance snd_nxt over sequence space of this segment.
if (flags
& (TH_SYN
|TH_FIN
)) {
tp
->t_flags
|= TF_SENTFIN
;
if (SEQ_GT(tp
->snd_nxt
, tp
->snd_max
)) {
tp
->snd_max
= tp
->snd_nxt
;
* Time this transmission if not a retransmission and
* not currently timing anything.
tcpstat
.tcps_segstimed
++;
* Set retransmit timer if not currently set,
* and not doing an ack or a keep-alive probe.
* Initial value for retransmit timer is smoothed
* round-trip time + 2 * round-trip time variance.
* Initialize shift counter which is used for backoff
if (tp
->t_timer
[TCPT_REXMT
] == 0 &&
tp
->snd_nxt
!= tp
->snd_una
) {
tp
->t_timer
[TCPT_REXMT
] = tp
->t_rxtcur
;
if (tp
->t_timer
[TCPT_PERSIST
]) {
tp
->t_timer
[TCPT_PERSIST
] = 0;
if (SEQ_GT(tp
->snd_nxt
+ len
, tp
->snd_max
))
tp
->snd_max
= tp
->snd_nxt
+ len
;
if (so
->so_options
& SO_DEBUG
)
tcp_trace(TA_OUTPUT
, tp
->t_state
, tp
, ti
, 0);
* Fill in IP length and desired time to live and
* send to IP level. There should be a better way
* to handle ttl and tos; we could keep them in
* the template, but need a way to checksum without them.
m
->m_pkthdr
.len
= hdrlen
+ len
;
((struct ip
*)ti
)->ip_len
= m
->m_pkthdr
.len
;
((struct ip
*)ti
)->ip_ttl
= tp
->t_inpcb
->inp_ip
.ip_ttl
; /* XXX */
((struct ip
*)ti
)->ip_tos
= tp
->t_inpcb
->inp_ip
.ip_tos
; /* XXX */
error
= ip_output(m
, tp
->t_inpcb
->inp_options
, &tp
->t_inpcb
->inp_route
,
so
->so_options
& SO_DONTROUTE
);
error
= ip_output(m
, (struct mbuf
*)0, &tp
->t_inpcb
->inp_route
,
so
->so_options
& SO_DONTROUTE
);
if ((error
== EHOSTUNREACH
|| error
== ENETDOWN
)
&& TCPS_HAVERCVDSYN(tp
->t_state
)) {
* Data sent (as far as we can tell).
* If this advertises a larger window than any other segment,
* then remember the size of the advertised window.
* Any pending ACK has now been sent.
if (win
> 0 && SEQ_GT(tp
->rcv_nxt
+win
, tp
->rcv_adv
))
tp
->rcv_adv
= tp
->rcv_nxt
+ win
;
tp
->t_flags
&= ~(TF_ACKNOW
|TF_DELACK
);
register struct tcpcb
*tp
;
register t
= ((tp
->t_srtt
>> 2) + tp
->t_rttvar
) >> 1;
if (tp
->t_timer
[TCPT_REXMT
])
panic("tcp_output REXMT");
* Start/restart persistance timer.
TCPT_RANGESET(tp
->t_timer
[TCPT_PERSIST
],
t
* tcp_backoff
[tp
->t_rxtshift
],
TCPTV_PERSMIN
, TCPTV_PERSMAX
);
if (tp
->t_rxtshift
< TCP_MAXRXTSHIFT
)