* Copyright (c) 1982, 1986, 1988 Regents of the University of California.
* Redistribution and use in source and binary forms are permitted
* provided that the above copyright notice and this paragraph are
* duplicated in all such forms and that any documentation,
* advertising materials, and other materials related to such
* distribution and use acknowledge that the software was developed
* by the University of California, Berkeley. The name of the
* University may not be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
* @(#)tcp_input.c 7.20 (Berkeley) %G%
#include "../net/route.h"
struct tcpiphdr tcp_saveti
;
struct tcpcb
*tcp_newtcpcb();
* Insert segment ti into reassembly queue of tcp with
* control block tp. Return TH_FIN if reassembly now includes
* a segment with FIN. The macro form does the common case inline
* (segment is the next to be received on an established connection,
* and the queue is empty), avoiding linkage into and removal
* from the queue and repetition of various conversions.
* Set DELACK for segments received in order, but ack immediately
* when segments are out of order (so fast retransmit can work).
#define TCP_REASS(tp, ti, m, so, flags) { \
if ((ti)->ti_seq == (tp)->rcv_nxt && \
(tp)->seg_next == (struct tcpiphdr *)(tp) && \
(tp)->t_state == TCPS_ESTABLISHED) { \
tp->t_flags |= TF_DELACK; \
(tp)->rcv_nxt += (ti)->ti_len; \
flags = (ti)->ti_flags & TH_FIN; \
tcpstat.tcps_rcvbyte += (ti)->ti_len;\
sbappend(&(so)->so_rcv, (m)); \
(flags) = tcp_reass((tp), (ti)); \
tp->t_flags |= TF_ACKNOW; \
register struct tcpcb
*tp
;
register struct tcpiphdr
*ti
;
register struct tcpiphdr
*q
;
struct socket
*so
= tp
->t_inpcb
->inp_socket
;
* Call with ti==0 after become established to
* force pre-ESTABLISHED data up to user socket.
* Find a segment which begins after this one does.
for (q
= tp
->seg_next
; q
!= (struct tcpiphdr
*)tp
;
q
= (struct tcpiphdr
*)q
->ti_next
)
if (SEQ_GT(q
->ti_seq
, ti
->ti_seq
))
* If there is a preceding segment, it may provide some of
* our data already. If so, drop the data from the incoming
* segment. If it provides all of our data, drop us.
if ((struct tcpiphdr
*)q
->ti_prev
!= (struct tcpiphdr
*)tp
) {
q
= (struct tcpiphdr
*)q
->ti_prev
;
/* conversion to int (in i) handles seq wraparound */
i
= q
->ti_seq
+ q
->ti_len
- ti
->ti_seq
;
tcpstat
.tcps_rcvduppack
++;
tcpstat
.tcps_rcvdupbyte
+= ti
->ti_len
;
q
= (struct tcpiphdr
*)(q
->ti_next
);
tcpstat
.tcps_rcvoopack
++;
tcpstat
.tcps_rcvoobyte
+= ti
->ti_len
;
* While we overlap succeeding segments trim them or,
* if they are completely covered, dequeue them.
while (q
!= (struct tcpiphdr
*)tp
) {
register int i
= (ti
->ti_seq
+ ti
->ti_len
) - q
->ti_seq
;
q
= (struct tcpiphdr
*)q
->ti_next
;
* Stick new segment in its place.
* Present data to user, advancing rcv_nxt through
* completed sequence space.
if (TCPS_HAVERCVDSYN(tp
->t_state
) == 0)
if (ti
== (struct tcpiphdr
*)tp
|| ti
->ti_seq
!= tp
->rcv_nxt
)
if (tp
->t_state
== TCPS_SYN_RECEIVED
&& ti
->ti_len
)
tp
->rcv_nxt
+= ti
->ti_len
;
flags
= ti
->ti_flags
& TH_FIN
;
ti
= (struct tcpiphdr
*)ti
->ti_next
;
if (so
->so_state
& SS_CANTRCVMORE
)
sbappend(&so
->so_rcv
, m
);
} while (ti
!= (struct tcpiphdr
*)tp
&& ti
->ti_seq
== tp
->rcv_nxt
);
* TCP input routine, follows pages 65-76 of the
* protocol specification dated September, 1981 very closely.
register struct tcpiphdr
*ti
;
register struct tcpcb
*tp
= 0;
int todrop
, acked
, ourfinisacked
, needoutput
= 0;
* Get IP and TCP header together in first mbuf.
* Note: IP leaves IP header in first mbuf.
ti
= mtod(m
, struct tcpiphdr
*);
if (iphlen
> sizeof (struct ip
))
ip_stripoptions(m
, (struct mbuf
*)0);
if (m
->m_flags
& M_EXT
|| m
->m_len
< sizeof (struct tcpiphdr
)) {
if ((m
= m_pullup(m
, sizeof (struct tcpiphdr
))) == 0) {
ti
= mtod(m
, struct tcpiphdr
*);
* Checksum extended TCP header and data.
tlen
= ((struct ip
*)ti
)->ip_len
;
len
= sizeof (struct ip
) + tlen
;
ti
->ti_next
= ti
->ti_prev
= 0;
ti
->ti_len
= (u_short
)tlen
;
ti
->ti_len
= htons((u_short
)ti
->ti_len
);
if (ti
->ti_sum
= in_cksum(m
, len
)) {
printf("tcp sum: src %x\n", ti
->ti_src
);
tcpstat
.tcps_rcvbadsum
++;
* Check that TCP offset makes sense,
* pull out TCP options and adjust length.
if (off
< sizeof (struct tcphdr
) || off
> tlen
) {
printf("tcp off: src %x off %d\n", ti
->ti_src
, off
);
tcpstat
.tcps_rcvbadoff
++;
if (off
> sizeof (struct tcphdr
)) {
if (m
->m_len
< sizeof(struct ip
) + off
) {
if ((m
= m_pullup(m
, sizeof (struct ip
) + off
)) == 0) {
ti
= mtod(m
, struct tcpiphdr
*);
om
= m_get(M_DONTWAIT
, MT_DATA
);
om
->m_len
= off
- sizeof (struct tcphdr
);
{ caddr_t op
= mtod(m
, caddr_t
) + sizeof (struct tcpiphdr
);
bcopy(op
, mtod(om
, caddr_t
), (unsigned)om
->m_len
);
m
->m_pkthdr
.len
-= om
->m_len
;
(unsigned)(m
->m_len
-sizeof (struct tcpiphdr
)));
* Drop TCP and IP headers; TCP options were dropped above.
m
->m_data
+= sizeof(struct tcpiphdr
);
m
->m_len
-= sizeof(struct tcpiphdr
);
m
->m_pkthdr
.len
-= sizeof(struct tcpiphdr
);
* Convert TCP protocol specific fields to host format.
ti
->ti_seq
= ntohl(ti
->ti_seq
);
ti
->ti_ack
= ntohl(ti
->ti_ack
);
ti
->ti_win
= ntohs(ti
->ti_win
);
ti
->ti_urp
= ntohs(ti
->ti_urp
);
* Locate pcb for segment.
(&tcb
, ti
->ti_src
, ti
->ti_sport
, ti
->ti_dst
, ti
->ti_dport
,
* If the state is CLOSED (i.e., TCB does not exist) then
* all data in the incoming segment is discarded.
* If the TCB exists but is in CLOSED state, it is embryonic,
* but should either do a listen or a connect soon.
if (tp
->t_state
== TCPS_CLOSED
)
if (so
->so_options
& SO_DEBUG
) {
if (so
->so_options
& SO_ACCEPTCONN
) {
* Mark socket as temporary until we're
* committed to keeping it. The code at
* ``drop'' and ``dropwithreset'' check the
* flag dropsocket to see if the temporary
* socket created here should be discarded.
* We mark the socket as discardable until
* we're committed to it below in TCPS_LISTEN.
inp
= (struct inpcb
*)so
->so_pcb
;
inp
->inp_laddr
= ti
->ti_dst
;
inp
->inp_lport
= ti
->ti_dport
;
inp
->inp_options
= ip_srcroute();
tp
->t_state
= TCPS_LISTEN
;
* Segment received on connection.
* Reset idle time and keep-alive timer.
tp
->t_timer
[TCPT_KEEP
] = tcp_keepidle
;
* Process options if not in LISTEN state,
* else do it below (after getting remote address).
if (om
&& tp
->t_state
!= TCPS_LISTEN
) {
tcp_dooptions(tp
, om
, ti
);
* Calculate amount of space in receive window,
* and then do TCP input processing.
* Receive window is amount of space in rcv queue,
* but not less than advertised window.
win
= sbspace(&so
->so_rcv
);
tp
->rcv_wnd
= max(win
, (int)(tp
->rcv_adv
- tp
->rcv_nxt
));
* If the state is LISTEN then ignore segment if it contains an RST.
* If the segment contains an ACK then it is bad and send a RST.
* If it does not contain a SYN then it is not interesting; drop it.
* Don't bother responding if the destination was a broadcast.
* Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
* tp->iss, and send a segment:
* <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
* Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
* Fill in remote peer address fields if not previously specified.
* Enter SYN_RECEIVED state, and process any other fields of this
register struct sockaddr_in
*sin
;
if ((tiflags
& TH_SYN
) == 0)
if (m
->m_flags
& M_BCAST
)
am
= m_get(M_DONTWAIT
, MT_SONAME
);
am
->m_len
= sizeof (struct sockaddr_in
);
sin
= mtod(am
, struct sockaddr_in
*);
sin
->sin_family
= AF_INET
;
sin
->sin_addr
= ti
->ti_src
;
sin
->sin_port
= ti
->ti_sport
;
if (inp
->inp_laddr
.s_addr
== INADDR_ANY
)
inp
->inp_laddr
= ti
->ti_dst
;
if (in_pcbconnect(inp
, am
)) {
tp
->t_template
= tcp_template(tp
);
if (tp
->t_template
== 0) {
tp
= tcp_drop(tp
, ENOBUFS
);
dropsocket
= 0; /* socket is already gone */
tcp_dooptions(tp
, om
, ti
);
tcp_iss
+= TCP_ISSINCR
/2;
tp
->t_flags
|= TF_ACKNOW
;
tp
->t_state
= TCPS_SYN_RECEIVED
;
tp
->t_timer
[TCPT_KEEP
] = TCPTV_KEEP_INIT
;
dropsocket
= 0; /* committed to socket */
* If the state is SYN_SENT:
* if seg contains an ACK, but not for our SYN, drop the input.
* if seg contains a RST, then drop the connection.
* if seg does not contain SYN, then drop it.
* Otherwise this is an acceptable SYN segment
* initialize tp->rcv_nxt and tp->irs
* if seg contains ack then advance tp->snd_una
* if SYN has been acked change to ESTABLISHED else SYN_RCVD state
* arrange for segment to be acked (eventually)
* continue processing rest of data/controls, beginning with URG
if ((tiflags
& TH_ACK
) &&
(SEQ_LEQ(ti
->ti_ack
, tp
->iss
) ||
SEQ_GT(ti
->ti_ack
, tp
->snd_max
)))
tp
= tcp_drop(tp
, ECONNREFUSED
);
if ((tiflags
& TH_SYN
) == 0)
tp
->snd_una
= ti
->ti_ack
;
if (SEQ_LT(tp
->snd_nxt
, tp
->snd_una
))
tp
->snd_nxt
= tp
->snd_una
;
tp
->t_timer
[TCPT_REXMT
] = 0;
tp
->t_flags
|= TF_ACKNOW
;
if (tiflags
& TH_ACK
&& SEQ_GT(tp
->snd_una
, tp
->iss
)) {
tp
->t_state
= TCPS_ESTABLISHED
;
tp
->t_maxseg
= min(tp
->t_maxseg
, tcp_mss(tp
));
(void) tcp_reass(tp
, (struct tcpiphdr
*)0);
* if we didn't have to retransmit the SYN,
* use its rtt as our initial srtt & rtt var.
tp
->t_srtt
= tp
->t_rtt
<< 3;
tp
->t_rttvar
= tp
->t_rtt
<< 1;
TCPT_RANGESET(tp
->t_rxtcur
,
((tp
->t_srtt
>> 2) + tp
->t_rttvar
) >> 1,
TCPTV_MIN
, TCPTV_REXMTMAX
);
tp
->t_state
= TCPS_SYN_RECEIVED
;
* Advance ti->ti_seq to correspond to first data byte.
* If data, trim to stay within window,
* dropping FIN if necessary.
if (ti
->ti_len
> tp
->rcv_wnd
) {
todrop
= ti
->ti_len
- tp
->rcv_wnd
;
/* XXX work around 4.2 m_adj bug */
/* skip tcp/ip header in first mbuf */
m_adj(m
->m_next
, -todrop
);
ti
->ti_len
= tp
->rcv_wnd
;
tcpstat
.tcps_rcvpackafterwin
++;
tcpstat
.tcps_rcvbyteafterwin
+= todrop
;
tp
->snd_wl1
= ti
->ti_seq
- 1;
* States other than LISTEN or SYN_SENT.
* First check that at least some bytes of segment are within
* receive window. If segment begins before rcv_nxt,
* drop leading data (and SYN); if nothing left, just ack.
todrop
= tp
->rcv_nxt
- ti
->ti_seq
;
if (todrop
> ti
->ti_len
||
todrop
== ti
->ti_len
&& (tiflags
&TH_FIN
) == 0) {
tcpstat
.tcps_rcvduppack
++;
tcpstat
.tcps_rcvdupbyte
+= ti
->ti_len
;
* If segment is just one to the left of the window,
* check two special cases:
* 1. Don't toss RST in response to 4.2-style keepalive.
* 2. If the only thing to drop is a FIN, we can drop
* it, but check the ACK or we will get into FIN
* wars if our FINs crossed (both CLOSING).
* In either case, send ACK to resynchronize,
* but keep on processing for RST or ACK.
if ((tiflags
& TH_FIN
&& todrop
== ti
->ti_len
+ 1)
|| (tiflags
& TH_RST
&& ti
->ti_seq
== tp
->rcv_nxt
- 1)
tp
->t_flags
|= TF_ACKNOW
;
tcpstat
.tcps_rcvpartduppack
++;
tcpstat
.tcps_rcvpartdupbyte
+= todrop
;
* If new data are received on a connection after the
* user processes are gone, then RST the other end.
if ((so
->so_state
& SS_NOFDREF
) &&
tp
->t_state
> TCPS_CLOSE_WAIT
&& ti
->ti_len
) {
tcpstat
.tcps_rcvafterclose
++;
* If segment ends after window, drop trailing data
* (and PUSH and FIN); if nothing left, just ACK.
todrop
= (ti
->ti_seq
+ti
->ti_len
) - (tp
->rcv_nxt
+tp
->rcv_wnd
);
tcpstat
.tcps_rcvpackafterwin
++;
if (todrop
>= ti
->ti_len
) {
tcpstat
.tcps_rcvbyteafterwin
+= ti
->ti_len
;
* If a new connection request is received
* while in TIME_WAIT, drop the old connection
* and start over if the sequence numbers
* are above the previous ones.
tp
->t_state
== TCPS_TIME_WAIT
&&
SEQ_GT(ti
->ti_seq
, tp
->rcv_nxt
)) {
iss
= tp
->rcv_nxt
+ TCP_ISSINCR
;
* If window is closed can only take segments at
* window edge, and have to drop data and PUSH from
* incoming segments. Continue processing, but
* remember to ack. Otherwise, drop segment
if (tp
->rcv_wnd
== 0 && ti
->ti_seq
== tp
->rcv_nxt
) {
tp
->t_flags
|= TF_ACKNOW
;
tcpstat
.tcps_rcvwinprobe
++;
tcpstat
.tcps_rcvbyteafterwin
+= todrop
;
/* XXX work around m_adj bug */
/* skip tcp/ip header in first mbuf */
m_adj(m
->m_next
, -todrop
);
tiflags
&= ~(TH_PUSH
|TH_FIN
);
* If the RST bit is set examine the state:
* If passive open, return to LISTEN state.
* If active open, inform user that connection was refused.
* ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
* Inform user that connection was reset, and close tcb.
* CLOSING, LAST_ACK, TIME_WAIT STATES
if (tiflags
&TH_RST
) switch (tp
->t_state
) {
so
->so_error
= ECONNREFUSED
;
so
->so_error
= ECONNRESET
;
tp
->t_state
= TCPS_CLOSED
;
* If a SYN is in the window, then this is an
* error and we send an RST and drop the connection.
tp
= tcp_drop(tp
, ECONNRESET
);
* If the ACK bit is off we drop the segment and return.
if ((tiflags
& TH_ACK
) == 0)
* In SYN_RECEIVED state if the ack ACKs our SYN then enter
* ESTABLISHED state and continue processing, otherwise
if (SEQ_GT(tp
->snd_una
, ti
->ti_ack
) ||
SEQ_GT(ti
->ti_ack
, tp
->snd_max
))
tp
->t_state
= TCPS_ESTABLISHED
;
tp
->t_maxseg
= min(tp
->t_maxseg
, tcp_mss(tp
));
(void) tcp_reass(tp
, (struct tcpiphdr
*)0);
tp
->snd_wl1
= ti
->ti_seq
- 1;
* In ESTABLISHED state: drop duplicate ACKs; ACK out of range
* ACKs. If the ack is in the range
* tp->snd_una < ti->ti_ack <= tp->snd_max
* then advance tp->snd_una to ti->ti_ack and drop
* data from the retransmission queue. If this ACK reflects
* more up to date window information we update our window information.
if (SEQ_LEQ(ti
->ti_ack
, tp
->snd_una
)) {
if (ti
->ti_len
== 0 && ti
->ti_win
== tp
->snd_wnd
) {
tcpstat
.tcps_rcvdupack
++;
* If we have outstanding data (not a
* window probe), this is a completely
* duplicate ack (ie, window info didn't
* change), the ack is the biggest we've
* seen and we've seen exactly our rexmt
* threshhold of them, assume a packet
* has been dropped and retransmit it.
* Kludge snd_nxt & the congestion
* window so we send only this one
* packet. If this packet fills the
* only hole in the receiver's seq.
* space, the next real ack will fully
* open our window. This means we
* have to do the usual slow-start to
* not overwhelm an intermediate gateway
* with a burst of packets. Leave
* here with the congestion window set
* to allow 2 packets on the next real
* ack and the exp-to-linear thresh
* set for half the current window
* size (since we know we're losing at
* the current window size).
if (tp
->t_timer
[TCPT_REXMT
] == 0 ||
ti
->ti_ack
!= tp
->snd_una
)
else if (++tp
->t_dupacks
== tcprexmtthresh
) {
tcp_seq onxt
= tp
->snd_nxt
;
min(tp
->snd_wnd
, tp
->snd_cwnd
) / 2 /
tp
->snd_ssthresh
= win
* tp
->t_maxseg
;
tp
->t_timer
[TCPT_REXMT
] = 0;
tp
->snd_nxt
= ti
->ti_ack
;
tp
->snd_cwnd
= tp
->t_maxseg
;
if (SEQ_GT(onxt
, tp
->snd_nxt
))
if (SEQ_GT(ti
->ti_ack
, tp
->snd_max
)) {
tcpstat
.tcps_rcvacktoomuch
++;
acked
= ti
->ti_ack
- tp
->snd_una
;
tcpstat
.tcps_rcvackpack
++;
tcpstat
.tcps_rcvackbyte
+= acked
;
* If transmit timer is running and timed sequence
* number was acked, update smoothed round trip time.
* Since we now have an rtt measurement, cancel the
* timer backoff (cf., Phil Karn's retransmit alg.).
* Recompute the initial retransmit timer.
if (tp
->t_rtt
&& SEQ_GT(ti
->ti_ack
, tp
->t_rtseq
)) {
tcpstat
.tcps_rttupdated
++;
* srtt is stored as fixed point with 3 bits
* after the binary point (i.e., scaled by 8).
* The following magic is equivalent
* to the smoothing algorithm in rfc793
* (srtt = rtt/8 + srtt*7/8 in fixed point).
* Adjust t_rtt to origin 0.
delta
= tp
->t_rtt
- 1 - (tp
->t_srtt
>> 3);
if ((tp
->t_srtt
+= delta
) <= 0)
* We accumulate a smoothed rtt variance
* (actually, a smoothed mean difference),
* then set the retransmit timer to smoothed
* rtt + 2 times the smoothed variance.
* rttvar is stored as fixed point
* with 2 bits after the binary point
* (scaled by 4). The following is equivalent
* to rfc793 smoothing with an alpha of .75
* (rttvar = rttvar*3/4 + |delta| / 4).
* This replaces rfc793's wired-in beta.
delta
-= (tp
->t_rttvar
>> 2);
if ((tp
->t_rttvar
+= delta
) <= 0)
* No rtt measurement yet - use the
* unsmoothed rtt. Set the variance
* to half the rtt (so our first
* retransmit happens at 2*rtt)
tp
->t_srtt
= tp
->t_rtt
<< 3;
tp
->t_rttvar
= tp
->t_rtt
<< 1;
TCPT_RANGESET(tp
->t_rxtcur
,
((tp
->t_srtt
>> 2) + tp
->t_rttvar
) >> 1,
TCPTV_MIN
, TCPTV_REXMTMAX
);
* If all outstanding data is acked, stop retransmit
* timer and remember to restart (more output or persist).
* If there is more data to be acked, restart retransmit
* timer, using current (possibly backed-off) value.
if (ti
->ti_ack
== tp
->snd_max
) {
tp
->t_timer
[TCPT_REXMT
] = 0;
} else if (tp
->t_timer
[TCPT_PERSIST
] == 0)
tp
->t_timer
[TCPT_REXMT
] = tp
->t_rxtcur
;
* When new data is acked, open the congestion window.
* If the window gives us less than ssthresh packets
* in flight, open exponentially (maxseg per packet).
* Otherwise open linearly (maxseg per window,
* or maxseg^2 / cwnd per packet).
u_int incr
= tp
->t_maxseg
;
if (tp
->snd_cwnd
> tp
->snd_ssthresh
)
incr
= max(incr
* incr
/ tp
->snd_cwnd
, 1);
tp
->snd_cwnd
= min(tp
->snd_cwnd
+ incr
, USHRT_MAX
); /* XXX */
if (acked
> so
->so_snd
.sb_cc
) {
tp
->snd_wnd
-= so
->so_snd
.sb_cc
;
sbdrop(&so
->so_snd
, (int)so
->so_snd
.sb_cc
);
sbdrop(&so
->so_snd
, acked
);
tp
->snd_una
= ti
->ti_ack
;
if (SEQ_LT(tp
->snd_nxt
, tp
->snd_una
))
tp
->snd_nxt
= tp
->snd_una
;
* In FIN_WAIT_1 STATE in addition to the processing
* for the ESTABLISHED state if our FIN is now acknowledged
* If we can't receive any more
* data, then closing user can proceed.
* Starting the timer is contrary to the
* specification, but if we don't get a FIN
if (so
->so_state
& SS_CANTRCVMORE
) {
tp
->t_timer
[TCPT_2MSL
] = tcp_maxidle
;
tp
->t_state
= TCPS_FIN_WAIT_2
;
* In CLOSING STATE in addition to the processing for
* the ESTABLISHED state if the ACK acknowledges our FIN
* then enter the TIME-WAIT state, otherwise ignore
tp
->t_state
= TCPS_TIME_WAIT
;
tp
->t_timer
[TCPT_2MSL
] = 2 * TCPTV_MSL
;
* In LAST_ACK, we may still be waiting for data to drain
* and/or to be acked, as well as for the ack of our FIN.
* If our FIN is now acknowledged, delete the TCB,
* enter the closed state and return.
* In TIME_WAIT state the only thing that should arrive
* is a retransmission of the remote FIN. Acknowledge
* it and restart the finack timer.
tp
->t_timer
[TCPT_2MSL
] = 2 * TCPTV_MSL
;
* Update window information.
* Don't look at window if no ACK: TAC's send garbage on first SYN.
if ((tiflags
& TH_ACK
) &&
(SEQ_LT(tp
->snd_wl1
, ti
->ti_seq
) || tp
->snd_wl1
== ti
->ti_seq
&&
(SEQ_LT(tp
->snd_wl2
, ti
->ti_ack
) ||
tp
->snd_wl2
== ti
->ti_ack
&& ti
->ti_win
> tp
->snd_wnd
))) {
/* keep track of pure window updates */
tp
->snd_wl2
== ti
->ti_ack
&& ti
->ti_win
> tp
->snd_wnd
)
tcpstat
.tcps_rcvwinupd
++;
tp
->snd_wnd
= ti
->ti_win
;
tp
->snd_wl1
= ti
->ti_seq
;
tp
->snd_wl2
= ti
->ti_ack
;
if (tp
->snd_wnd
> tp
->max_sndwnd
)
tp
->max_sndwnd
= tp
->snd_wnd
;
* Process segments with URG.
if ((tiflags
& TH_URG
) && ti
->ti_urp
&&
TCPS_HAVERCVDFIN(tp
->t_state
) == 0) {
* This is a kludge, but if we receive and accept
* random urgent pointers, we'll crash in
* soreceive. It's hard to imagine someone
* actually wanting to send this much urgent data.
if (ti
->ti_urp
+ so
->so_rcv
.sb_cc
> SB_MAX
) {
ti
->ti_urp
= 0; /* XXX */
tiflags
&= ~TH_URG
; /* XXX */
* If this segment advances the known urgent pointer,
* then mark the data stream. This should not happen
* in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
* a FIN has been received from the remote side.
* In these states we ignore the URG.
* According to RFC961 (Assigned Protocols),
* the urgent pointer points to the last octet
* of urgent data. We continue, however,
* to consider it to indicate the first octet
* of data past the urgent section
* as the original spec states.
if (SEQ_GT(ti
->ti_seq
+ti
->ti_urp
, tp
->rcv_up
)) {
tp
->rcv_up
= ti
->ti_seq
+ ti
->ti_urp
;
so
->so_oobmark
= so
->so_rcv
.sb_cc
+
(tp
->rcv_up
- tp
->rcv_nxt
) - 1;
so
->so_state
|= SS_RCVATMARK
;
tp
->t_oobflags
&= ~(TCPOOB_HAVEDATA
| TCPOOB_HADDATA
);
* Remove out of band data so doesn't get presented to user.
* This can happen independent of advancing the URG pointer,
* but if two URG's are pending at once, some out-of-band
* data may creep in... ick.
if (ti
->ti_urp
<= ti
->ti_len
&& (so
->so_options
& SO_OOBINLINE
) == 0
tcp_pulloutofband(so
, ti
);
* If no out of band data is expected,
* pull receive urgent pointer along
* with the receive window.
if (SEQ_GT(tp
->rcv_nxt
, tp
->rcv_up
))
tp
->rcv_up
= tp
->rcv_nxt
;
* Process the segment text, merging it into the TCP sequencing queue,
* and arranging for acknowledgment of receipt if necessary.
* This process logically involves adjusting tp->rcv_wnd as data
* is presented to the user (this happens in tcp_usrreq.c,
* case PRU_RCVD). If a FIN has already been received on this
* connection then we just ignore the text.
if ((ti
->ti_len
|| (tiflags
&TH_FIN
)) &&
TCPS_HAVERCVDFIN(tp
->t_state
) == 0) {
TCP_REASS(tp
, ti
, m
, so
, tiflags
);
* Note the amount of data that peer has sent into
* our window, in order to estimate the sender's
len
= so
->so_rcv
.sb_hiwat
- (tp
->rcv_adv
- tp
->rcv_nxt
);
* If FIN is received ACK the FIN and let the user know
* that the connection is closing.
if (TCPS_HAVERCVDFIN(tp
->t_state
) == 0) {
tp
->t_flags
|= TF_ACKNOW
;
* In SYN_RECEIVED and ESTABLISHED STATES
* enter the CLOSE_WAIT state.
tp
->t_state
= TCPS_CLOSE_WAIT
;
* If still in FIN_WAIT_1 STATE FIN has not been acked so
* enter the CLOSING state.
tp
->t_state
= TCPS_CLOSING
;
* In FIN_WAIT_2 state enter the TIME_WAIT state,
* starting the time-wait timer, turning off the other
tp
->t_state
= TCPS_TIME_WAIT
;
tp
->t_timer
[TCPT_2MSL
] = 2 * TCPTV_MSL
;
* In TIME_WAIT state restart the 2 MSL time_wait timer.
tp
->t_timer
[TCPT_2MSL
] = 2 * TCPTV_MSL
;
if (so
->so_options
& SO_DEBUG
)
tcp_trace(TA_INPUT
, ostate
, tp
, &tcp_saveti
, 0);
* Return any desired output.
if (needoutput
|| (tp
->t_flags
& TF_ACKNOW
))
* Generate an ACK dropping incoming segment if it occupies
* sequence space, where the ACK reflects our state.
tp
->t_flags
|= TF_ACKNOW
;
* Generate a RST, dropping incoming segment.
* Make ACK acceptable to originator of segment.
* Don't bother to respond if destination was broadcast.
if ((tiflags
& TH_RST
) || m
->m_flags
& M_BCAST
)
tcp_respond(tp
, ti
, m
, (tcp_seq
)0, ti
->ti_ack
, TH_RST
);
tcp_respond(tp
, ti
, m
, ti
->ti_seq
+ti
->ti_len
, (tcp_seq
)0,
/* destroy temporarily created socket */
* Drop space held by incoming segment and return.
if (tp
&& (tp
->t_inpcb
->inp_socket
->so_options
& SO_DEBUG
))
tcp_trace(TA_DROP
, ostate
, tp
, &tcp_saveti
, 0);
/* destroy temporarily created socket */
tcp_dooptions(tp
, om
, ti
)
for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
if (!(ti
->ti_flags
& TH_SYN
))
tp
->t_maxseg
= *(u_short
*)(cp
+ 2);
tp
->t_maxseg
= ntohs((u_short
)tp
->t_maxseg
);
tp
->t_maxseg
= min(tp
->t_maxseg
, tcp_mss(tp
));
* Pull out of band byte out of a segment so
* it doesn't appear in the user's data queue.
* It is still reflected in the segment length for
tcp_pulloutofband(so
, ti
)
int cnt
= ti
->ti_urp
- 1;
char *cp
= mtod(m
, caddr_t
) + cnt
;
struct tcpcb
*tp
= sototcpcb(so
);
tp
->t_oobflags
|= TCPOOB_HAVEDATA
;
bcopy(cp
+1, cp
, (unsigned)(m
->m_len
- cnt
- 1));
panic("tcp_pulloutofband");
* Determine a reasonable value for maxseg size.
* If the route is known, use one that can be handled
* on the given interface without forcing IP to fragment.
* If bigger than an mbuf cluster (MCLBYTES), round down to nearest size
* to utilize large mbufs.
* If interface pointer is unavailable, or the destination isn't local,
* use a conservative size (512 or the default IP max size, but no more
* than the mtu of the interface through which we route),
* as we can't discover anything about intervening gateways or networks.
* We also initialize the congestion/slow start window to be a single
* segment if the destination isn't local; this information should
* probably all be saved with the routing entry at the transport level.
* This is ugly, and doesn't belong at this level, but has to happen somehow.
register struct tcpcb
*tp
;
if ((ro
->ro_rt
== (struct rtentry
*)0) ||
(ifp
= ro
->ro_rt
->rt_ifp
) == (struct ifnet
*)0) {
/* No route yet, so try to acquire one */
if (inp
->inp_faddr
.s_addr
!= INADDR_ANY
) {
ro
->ro_dst
.sa_family
= AF_INET
;
((struct sockaddr_in
*) &ro
->ro_dst
)->sin_addr
=
if ((ro
->ro_rt
== 0) || (ifp
= ro
->ro_rt
->rt_ifp
) == 0)
mss
= ifp
->if_mtu
- sizeof(struct tcpiphdr
);
#if (MCLBYTES & (MCLBYTES - 1)) == 0
mss
= mss
/ MCLBYTES
* MCLBYTES
;
if (in_localaddr(inp
->inp_faddr
))
/* XXX this belongs in netinet/in.c */
register u_long i
= ntohl(in
.s_addr
);
register struct ifnet
*ifp
;
register struct sockaddr_in
*sin
;
for (ifp
= ifnet
; ifp
; ifp
= ifp
->if_next
) {
if (ifp
->if_addr
.sa_family
!= AF_INET
)
sin
= (struct sockaddr_in
*)&ifp
->if_addr
;
if ((sin
->sin_addr
.s_addr
& mask
) == i
)