* Copyright (c) 1982 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
* specifies the terms and conditions for redistribution.
* @(#)tcp_input.c 6.11 (Berkeley) %G%
#include "../net/route.h"
struct tcpiphdr tcp_saveti
;
struct tcpcb
*tcp_newtcpcb();
* TCP input routine, follows pages 65-76 of the
* protocol specification dated September, 1981 very closely.
register struct tcpiphdr
*ti
;
register struct tcpcb
*tp
= 0;
* Get IP and TCP header together in first mbuf.
* Note: IP leaves IP header in first mbuf.
ti
= mtod(m
, struct tcpiphdr
*);
if (((struct ip
*)ti
)->ip_hl
> (sizeof (struct ip
) >> 2))
ip_stripoptions((struct ip
*)ti
, (struct mbuf
*)0);
if (m
->m_off
> MMAXOFF
|| m
->m_len
< sizeof (struct tcpiphdr
)) {
if ((m
= m_pullup(m
, sizeof (struct tcpiphdr
))) == 0) {
ti
= mtod(m
, struct tcpiphdr
*);
* Checksum extended TCP header and data.
tlen
= ((struct ip
*)ti
)->ip_len
;
len
= sizeof (struct ip
) + tlen
;
ti
->ti_next
= ti
->ti_prev
= 0;
ti
->ti_len
= (u_short
)tlen
;
ti
->ti_len
= htons((u_short
)ti
->ti_len
);
if (ti
->ti_sum
= in_cksum(m
, len
)) {
printf("tcp sum: src %x\n", ti
->ti_src
);
* Check that TCP offset makes sense,
* pull out TCP options and adjust length.
if (off
< sizeof (struct tcphdr
) || off
> tlen
) {
printf("tcp off: src %x off %d\n", ti
->ti_src
, off
);
if (off
> sizeof (struct tcphdr
)) {
if ((m
= m_pullup(m
, sizeof (struct ip
) + off
)) == 0) {
ti
= mtod(m
, struct tcpiphdr
*);
om
= m_get(M_DONTWAIT
, MT_DATA
);
om
->m_len
= off
- sizeof (struct tcphdr
);
{ caddr_t op
= mtod(m
, caddr_t
) + sizeof (struct tcpiphdr
);
bcopy(op
, mtod(om
, caddr_t
), (unsigned)om
->m_len
);
(unsigned)(m
->m_len
-sizeof (struct tcpiphdr
)));
* Drop TCP and IP headers.
off
+= sizeof (struct ip
);
* Convert TCP protocol specific fields to host format.
ti
->ti_seq
= ntohl(ti
->ti_seq
);
ti
->ti_ack
= ntohl(ti
->ti_ack
);
ti
->ti_win
= ntohs(ti
->ti_win
);
ti
->ti_urp
= ntohs(ti
->ti_urp
);
* Locate pcb for segment.
(&tcb
, ti
->ti_src
, ti
->ti_sport
, ti
->ti_dst
, ti
->ti_dport
,
* If the state is CLOSED (i.e., TCB does not exist) then
* all data in the incoming segment is discarded.
if (so
->so_options
& SO_DEBUG
) {
if (so
->so_options
& SO_ACCEPTCONN
) {
* Mark socket as temporary until we're
* committed to keeping it. The code at
* ``drop'' and ``dropwithreset'' check the
* flag dropsocket to see if the temporary
* socket created here should be discarded.
* We mark the socket as discardable until
* we're committed to it below in TCPS_LISTEN.
inp
= (struct inpcb
*)so
->so_pcb
;
inp
->inp_laddr
= ti
->ti_dst
;
inp
->inp_lport
= ti
->ti_dport
;
tp
->t_state
= TCPS_LISTEN
;
* Segment received on connection.
* Reset idle time and keep-alive timer.
tp
->t_timer
[TCPT_KEEP
] = TCPTV_KEEP
;
* Process options if not in LISTEN state,
* else do it below (after getting remote address).
if (om
&& tp
->t_state
!= TCPS_LISTEN
) {
tcp_dooptions(tp
, om
, ti
);
* Calculate amount of space in receive window,
* and then do TCP input processing.
tp
->rcv_wnd
= sbspace(&so
->so_rcv
);
* If the state is LISTEN then ignore segment if it contains an RST.
* If the segment contains an ACK then it is bad and send a RST.
* If it does not contain a SYN then it is not interesting; drop it.
* Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
* tp->iss, and send a segment:
* <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
* Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
* Fill in remote peer address fields if not previously specified.
* Enter SYN_RECEIVED state, and process any other fields of this
register struct sockaddr_in
*sin
;
if ((tiflags
& TH_SYN
) == 0)
am
= m_get(M_DONTWAIT
, MT_SONAME
);
am
->m_len
= sizeof (struct sockaddr_in
);
sin
= mtod(am
, struct sockaddr_in
*);
sin
->sin_family
= AF_INET
;
sin
->sin_addr
= ti
->ti_src
;
sin
->sin_port
= ti
->ti_sport
;
if (inp
->inp_laddr
.s_addr
== INADDR_ANY
)
inp
->inp_laddr
= ti
->ti_dst
;
if (in_pcbconnect(inp
, am
)) {
tp
->t_template
= tcp_template(tp
);
if (tp
->t_template
== 0) {
dropsocket
= 0; /* socket is already gone */
tcp_dooptions(tp
, om
, ti
);
tp
->iss
= tcp_iss
; tcp_iss
+= TCP_ISSINCR
/2;
tp
->t_state
= TCPS_SYN_RECEIVED
;
tp
->t_timer
[TCPT_KEEP
] = TCPTV_KEEP
;
dropsocket
= 0; /* committed to socket */
* If the state is SYN_SENT:
* if seg contains an ACK, but not for our SYN, drop the input.
* if seg contains a RST, then drop the connection.
* if seg does not contain SYN, then drop it.
* Otherwise this is an acceptable SYN segment
* initialize tp->rcv_nxt and tp->irs
* if seg contains ack then advance tp->snd_una
* if SYN has been acked change to ESTABLISHED else SYN_RCVD state
* arrange for segment to be acked (eventually)
* continue processing rest of data/controls, beginning with URG
if ((tiflags
& TH_ACK
) &&
/* this should be SEQ_LT; is SEQ_LEQ for BBN vax TCP only */
(SEQ_LT(ti
->ti_ack
, tp
->iss
) ||
SEQ_GT(ti
->ti_ack
, tp
->snd_max
)))
tp
= tcp_drop(tp
, ECONNREFUSED
);
if ((tiflags
& TH_SYN
) == 0)
tp
->snd_una
= ti
->ti_ack
;
if (SEQ_LT(tp
->snd_nxt
, tp
->snd_una
))
tp
->snd_nxt
= tp
->snd_una
;
tp
->t_timer
[TCPT_REXMT
] = 0;
tp
->t_flags
|= TF_ACKNOW
;
if (SEQ_GT(tp
->snd_una
, tp
->iss
)) {
tp
->t_state
= TCPS_ESTABLISHED
;
tp
->t_maxseg
= MIN(tp
->t_maxseg
, tcp_mss(tp
));
(void) tcp_reass(tp
, (struct tcpiphdr
*)0);
tp
->t_state
= TCPS_SYN_RECEIVED
;
* Advance ti->ti_seq to correspond to first data byte.
* If data, trim to stay within window,
* dropping FIN if necessary.
if (ti
->ti_len
> tp
->rcv_wnd
) {
todrop
= ti
->ti_len
- tp
->rcv_wnd
;
ti
->ti_len
= tp
->rcv_wnd
;
tp
->snd_wl1
= ti
->ti_seq
- 1;
* If data is received on a connection after the
* user processes are gone, then RST the other end.
if ((so
->so_state
& SS_NOFDREF
) && tp
->t_state
> TCPS_CLOSE_WAIT
&&
* States other than LISTEN or SYN_SENT.
* First check that at least some bytes of segment are within
* If window is closed can only take segments at
* window edge, and have to drop data and PUSH from
if (tp
->rcv_nxt
!= ti
->ti_seq
)
ti
->ti_flags
&= ~(TH_PUSH
|TH_FIN
);
* If segment begins before rcv_nxt, drop leading
* data (and SYN); if nothing left, just ack.
todrop
= tp
->rcv_nxt
- ti
->ti_seq
;
if (todrop
> ti
->ti_len
||
todrop
== ti
->ti_len
&& (tiflags
&TH_FIN
) == 0)
* If segment ends after window, drop trailing data
* (and PUSH and FIN); if nothing left, just ACK.
todrop
= (ti
->ti_seq
+ti
->ti_len
) - (tp
->rcv_nxt
+tp
->rcv_wnd
);
if (todrop
>= ti
->ti_len
)
ti
->ti_flags
&= ~(TH_PUSH
|TH_FIN
);
* If the RST bit is set examine the state:
* If passive open, return to LISTEN state.
* If active open, inform user that connection was refused.
* ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
* Inform user that connection was reset, and close tcb.
* CLOSING, LAST_ACK, TIME_WAIT STATES
if (tiflags
&TH_RST
) switch (tp
->t_state
) {
tp
= tcp_drop(tp
, ECONNREFUSED
);
tp
= tcp_drop(tp
, ECONNRESET
);
* If a SYN is in the window, then this is an
* error and we send an RST and drop the connection.
tp
= tcp_drop(tp
, ECONNRESET
);
* If the ACK bit is off we drop the segment and return.
if ((tiflags
& TH_ACK
) == 0)
* In SYN_RECEIVED state if the ack ACKs our SYN then enter
* ESTABLISHED state and continue processing, othewise
if (SEQ_GT(tp
->snd_una
, ti
->ti_ack
) ||
SEQ_GT(ti
->ti_ack
, tp
->snd_max
))
tp
->snd_una
++; /* SYN acked */
if (SEQ_LT(tp
->snd_nxt
, tp
->snd_una
))
tp
->snd_nxt
= tp
->snd_una
;
tp
->t_timer
[TCPT_REXMT
] = 0;
tp
->t_state
= TCPS_ESTABLISHED
;
tp
->t_maxseg
= MIN(tp
->t_maxseg
, tcp_mss(tp
));
(void) tcp_reass(tp
, (struct tcpiphdr
*)0);
tp
->snd_wl1
= ti
->ti_seq
- 1;
* In ESTABLISHED state: drop duplicate ACKs; ACK out of range
* ACKs. If the ack is in the range
* tp->snd_una < ti->ti_ack <= tp->snd_max
* then advance tp->snd_una to ti->ti_ack and drop
* data from the retransmission queue. If this ACK reflects
* more up to date window information we update our window information.
#define ourfinisacked (acked > 0)
if (SEQ_LEQ(ti
->ti_ack
, tp
->snd_una
))
if (SEQ_GT(ti
->ti_ack
, tp
->snd_max
))
acked
= ti
->ti_ack
- tp
->snd_una
;
* If transmit timer is running and timed sequence
* number was acked, update smoothed round trip time.
if (tp
->t_rtt
&& SEQ_GT(ti
->ti_ack
, tp
->t_rtseq
)) {
(1 - tcp_alpha
) * tp
->t_rtt
;
if (ti
->ti_ack
== tp
->snd_max
)
tp
->t_timer
[TCPT_REXMT
] = 0;
TCPT_RANGESET(tp
->t_timer
[TCPT_REXMT
],
tcp_beta
* tp
->t_srtt
, TCPTV_MIN
, TCPTV_MAX
);
* When new data is acked, open the congestion window a bit.
tp
->snd_cwnd
= MIN(11 * tp
->snd_cwnd
/ 10, 65535);
if (acked
> so
->so_snd
.sb_cc
) {
tp
->snd_wnd
-= so
->so_snd
.sb_cc
;
sbdrop(&so
->so_snd
, so
->so_snd
.sb_cc
);
sbdrop(&so
->so_snd
, acked
);
if ((so
->so_snd
.sb_flags
& SB_WAIT
) || so
->so_snd
.sb_sel
)
tp
->snd_una
= ti
->ti_ack
;
if (SEQ_LT(tp
->snd_nxt
, tp
->snd_una
))
tp
->snd_nxt
= tp
->snd_una
;
* In FIN_WAIT_1 STATE in addition to the processing
* for the ESTABLISHED state if our FIN is now acknowledged
* If we can't receive any more
* data, then closing user can proceed.
if (so
->so_state
& SS_CANTRCVMORE
)
tp
->t_state
= TCPS_FIN_WAIT_2
;
* This is contrary to the specification,
* but if we haven't gotten our FIN in
* 5 minutes, it's not forthcoming.
tp->t_timer[TCPT_2MSL] = 5 * 60 * PR_SLOWHZ;
* MUST WORRY ABOUT ONE-WAY CONNECTIONS.
* In CLOSING STATE in addition to the processing for
* the ESTABLISHED state if the ACK acknowledges our FIN
* then enter the TIME-WAIT state, otherwise ignore
tp
->t_state
= TCPS_TIME_WAIT
;
tp
->t_timer
[TCPT_2MSL
] = 2 * TCPTV_MSL
;
* The only thing that can arrive in LAST_ACK state
* is an acknowledgment of our FIN. If our FIN is now
* acknowledged, delete the TCB, enter the closed state
* In TIME_WAIT state the only thing that should arrive
* is a retransmission of the remote FIN. Acknowledge
* it and restart the finack timer.
tp
->t_timer
[TCPT_2MSL
] = 2 * TCPTV_MSL
;
* Update window information.
if (SEQ_LT(tp
->snd_wl1
, ti
->ti_seq
) || tp
->snd_wl1
== ti
->ti_seq
&&
(SEQ_LT(tp
->snd_wl2
, ti
->ti_ack
) ||
tp
->snd_wl2
== ti
->ti_ack
&& ti
->ti_win
> tp
->snd_wnd
)) {
tp
->snd_wnd
= ti
->ti_win
;
tp
->snd_wl1
= ti
->ti_seq
;
tp
->snd_wl2
= ti
->ti_ack
;
* Process segments with URG.
if ((tiflags
& TH_URG
) && ti
->ti_urp
&&
TCPS_HAVERCVDFIN(tp
->t_state
) == 0) {
* This is a kludge, but if we receive accept
* random urgent pointers, we'll crash in
* soreceive. It's hard to imagine someone
* actually wanting to send this much urgent data.
if (ti
->ti_urp
+ (unsigned) so
->so_rcv
.sb_cc
> 32767) {
ti
->ti_urp
= 0; /* XXX */
tiflags
&= ~TH_URG
; /* XXX */
ti
->ti_flags
&= ~TH_URG
; /* XXX */
* If this segment advances the known urgent pointer,
* then mark the data stream. This should not happen
* in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
* a FIN has been received from the remote side.
* In these states we ignore the URG.
if (SEQ_GT(ti
->ti_seq
+ti
->ti_urp
, tp
->rcv_up
)) {
tp
->rcv_up
= ti
->ti_seq
+ ti
->ti_urp
;
so
->so_oobmark
= so
->so_rcv
.sb_cc
+
(tp
->rcv_up
- tp
->rcv_nxt
) - 1;
so
->so_state
|= SS_RCVATMARK
;
tp
->t_oobflags
&= ~TCPOOB_HAVEDATA
;
* Remove out of band data so doesn't get presented to user.
* This can happen independent of advancing the URG pointer,
* but if two URG's are pending at once, some out-of-band
* data may creep in... ick.
if (ti
->ti_urp
<= ti
->ti_len
)
tcp_pulloutofband(so
, ti
);
* Process the segment text, merging it into the TCP sequencing queue,
* and arranging for acknowledgment of receipt if necessary.
* This process logically involves adjusting tp->rcv_wnd as data
* is presented to the user (this happens in tcp_usrreq.c,
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
if ((ti
->ti_len
|| (tiflags
&TH_FIN
)) &&
TCPS_HAVERCVDFIN(tp
->t_state
) == 0) {
tiflags
= tcp_reass(tp
, ti
);
tp
->t_flags
|= TF_DELACK
;
tp
->t_flags
|= TF_ACKNOW
;
* If FIN is received ACK the FIN and let the user know
* that the connection is closing.
if (TCPS_HAVERCVDFIN(tp
->t_state
) == 0) {
tp
->t_flags
|= TF_ACKNOW
;
* In SYN_RECEIVED and ESTABLISHED STATES
* enter the CLOSE_WAIT state.
tp
->t_state
= TCPS_CLOSE_WAIT
;
* If still in FIN_WAIT_1 STATE FIN has not been acked so
* enter the CLOSING state.
tp
->t_state
= TCPS_CLOSING
;
* In FIN_WAIT_2 state enter the TIME_WAIT state,
* starting the time-wait timer, turning off the other
tp
->t_state
= TCPS_TIME_WAIT
;
tp
->t_timer
[TCPT_2MSL
] = 2 * TCPTV_MSL
;
* In TIME_WAIT state restart the 2 MSL time_wait timer.
tp
->t_timer
[TCPT_2MSL
] = 2 * TCPTV_MSL
;
if (so
->so_options
& SO_DEBUG
)
tcp_trace(TA_INPUT
, ostate
, tp
, &tcp_saveti
, 0);
* Return any desired output.
* Generate an ACK dropping incoming segment if it occupies
* sequence space, where the ACK reflects our state.
tlen
== 0 && (tiflags
&(TH_SYN
|TH_FIN
)) == 0)
if (tp
->t_inpcb
->inp_socket
->so_options
& SO_DEBUG
)
tcp_trace(TA_RESPOND
, ostate
, tp
, &tcp_saveti
, 0);
tcp_respond(tp
, ti
, tp
->rcv_nxt
, tp
->snd_nxt
, TH_ACK
);
* Generate a RST, dropping incoming segment.
* Make ACK acceptable to originator of segment.
tcp_respond(tp
, ti
, (tcp_seq
)0, ti
->ti_ack
, TH_RST
);
tcp_respond(tp
, ti
, ti
->ti_seq
+ti
->ti_len
, (tcp_seq
)0,
/* destroy temporarily created socket */
* Drop space held by incoming segment and return.
if (tp
&& (tp
->t_inpcb
->inp_socket
->so_options
& SO_DEBUG
))
tcp_trace(TA_DROP
, ostate
, tp
, &tcp_saveti
, 0);
/* destroy temporarily created socket */
tcp_dooptions(tp
, om
, ti
)
for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
if (!(ti
->ti_flags
& TH_SYN
))
tp
->t_maxseg
= *(u_short
*)(cp
+ 2);
tp
->t_maxseg
= ntohs((u_short
)tp
->t_maxseg
);
tp
->t_maxseg
= MIN(tp
->t_maxseg
, tcp_mss(tp
));
* Pull out of band byte out of a segment so
* it doesn't appear in the user's data queue.
* It is still reflected in the segment length for
tcp_pulloutofband(so
, ti
)
int cnt
= ti
->ti_urp
- 1;
char *cp
= mtod(m
, caddr_t
) + cnt
;
struct tcpcb
*tp
= sototcpcb(so
);
tp
->t_oobflags
|= TCPOOB_HAVEDATA
;
bcopy(cp
+1, cp
, (unsigned)(m
->m_len
- cnt
- 1));
panic("tcp_pulloutofband");
* Insert segment ti into reassembly queue of tcp with
* control block tp. Return TH_FIN if reassembly now includes
register struct tcpcb
*tp
;
register struct tcpiphdr
*ti
;
register struct tcpiphdr
*q
;
struct socket
*so
= tp
->t_inpcb
->inp_socket
;
* Call with ti==0 after become established to
* force pre-ESTABLISHED data up to user socket.
* Find a segment which begins after this one does.
for (q
= tp
->seg_next
; q
!= (struct tcpiphdr
*)tp
;
q
= (struct tcpiphdr
*)q
->ti_next
)
if (SEQ_GT(q
->ti_seq
, ti
->ti_seq
))
* If there is a preceding segment, it may provide some of
* our data already. If so, drop the data from the incoming
* segment. If it provides all of our data, drop us.
if ((struct tcpiphdr
*)q
->ti_prev
!= (struct tcpiphdr
*)tp
) {
q
= (struct tcpiphdr
*)q
->ti_prev
;
/* conversion to int (in i) handles seq wraparound */
i
= q
->ti_seq
+ q
->ti_len
- ti
->ti_seq
;
q
= (struct tcpiphdr
*)(q
->ti_next
);
* While we overlap succeeding segments trim them or,
* if they are completely covered, dequeue them.
while (q
!= (struct tcpiphdr
*)tp
) {
register int i
= (ti
->ti_seq
+ ti
->ti_len
) - q
->ti_seq
;
q
= (struct tcpiphdr
*)q
->ti_next
;
* Stick new segment in its place.
* Present data to user, advancing rcv_nxt through
* completed sequence space.
if (TCPS_HAVERCVDSYN(tp
->t_state
) == 0)
if (ti
== (struct tcpiphdr
*)tp
|| ti
->ti_seq
!= tp
->rcv_nxt
)
if (tp
->t_state
== TCPS_SYN_RECEIVED
&& ti
->ti_len
)
tp
->rcv_nxt
+= ti
->ti_len
;
flags
= ti
->ti_flags
& TH_FIN
;
ti
= (struct tcpiphdr
*)ti
->ti_next
;
if (so
->so_state
& SS_CANTRCVMORE
)
sbappend(&so
->so_rcv
, m
);
} while (ti
!= (struct tcpiphdr
*)tp
&& ti
->ti_seq
== tp
->rcv_nxt
);
* Determine a reasonable value for maxseg size.
* If the route is known, use one that can be handled
* on the given interface without forcing IP to fragment.
* If bigger than a page (CLSIZE), round down to nearest pagesize
* to utilize pagesize mbufs.
* If interface pointer is unavailable, or the destination isn't local,
* use a conservative size (512 or the default IP max size),
* as we can't discover anything about intervening gateways or networks.
* This is ugly, and doesn't belong at this level, but has to happen somehow.
register struct tcpcb
*tp
;
if ((ro
->ro_rt
== (struct rtentry
*)0) ||
(ifp
= ro
->ro_rt
->rt_ifp
) == (struct ifnet
*)0) {
/* No route yet, so try to acquire one */
if (inp
->inp_faddr
.s_addr
!= INADDR_ANY
) {
ro
->ro_dst
.sa_family
= AF_INET
;
((struct sockaddr_in
*) &ro
->ro_dst
)->sin_addr
=
if ((ro
->ro_rt
== 0) || (ifp
= ro
->ro_rt
->rt_ifp
) == 0)
mss
= ifp
->if_mtu
- sizeof(struct tcpiphdr
);
#if (CLBYTES & (CLBYTES - 1)) == 0
mss
= mss
/ CLBYTES
* CLBYTES
;
if (in_localaddr(tp
->t_inpcb
->inp_faddr
))
return (MIN(mss
, TCP_MSS
));