* Copyright (c) 1982, 1986, 1988, 1990, 1993
* The Regents of the University of California. All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93
#include <sys/socketvar.h>
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
/* patchable/settable parameters for tcp */
int tcp_mssdflt
= TCP_MSS
;
int tcp_rttdflt
= TCPTV_SRTTDFLT
/ PR_SLOWHZ
;
extern struct inpcb
*tcp_last_inpcb
;
tcb
.inp_next
= tcb
.inp_prev
= &tcb
;
if (max_protohdr
< sizeof(struct tcpiphdr
))
max_protohdr
= sizeof(struct tcpiphdr
);
if (max_linkhdr
+ sizeof(struct tcpiphdr
) > MHLEN
)
* Create template to be used to send tcp packets on a connection.
* Call after host entry created, allocates an mbuf and fills
* in a skeletal tcp/ip header, minimizing the amount of work
* necessary when the connection is used.
register struct inpcb
*inp
= tp
->t_inpcb
;
register struct tcpiphdr
*n
;
if ((n
= tp
->t_template
) == 0) {
m
= m_get(M_DONTWAIT
, MT_HEADER
);
m
->m_len
= sizeof (struct tcpiphdr
);
n
= mtod(m
, struct tcpiphdr
*);
n
->ti_next
= n
->ti_prev
= 0;
n
->ti_len
= htons(sizeof (struct tcpiphdr
) - sizeof (struct ip
));
n
->ti_src
= inp
->inp_laddr
;
n
->ti_dst
= inp
->inp_faddr
;
n
->ti_sport
= inp
->inp_lport
;
n
->ti_dport
= inp
->inp_fport
;
* Send a single message to the TCP at address specified by
* the given TCP/IP header. If m == 0, then we make a copy
* of the tcpiphdr at ti and send directly to the addressed host.
* This is used to force keep alive messages out using the TCP
* template for a connection tp->t_template. If flags are given
* then we send a message back to the TCP which originated the
* segment ti, and discard the mbuf containing it and any other
* In any case the ack and sequence number of the transmitted
* segment are as specified by the parameters.
tcp_respond(tp
, ti
, m
, ack
, seq
, flags
)
register struct tcpiphdr
*ti
;
win
= sbspace(&tp
->t_inpcb
->inp_socket
->so_rcv
);
ro
= &tp
->t_inpcb
->inp_route
;
m
= m_gethdr(M_DONTWAIT
, MT_HEADER
);
m
->m_data
+= max_linkhdr
;
*mtod(m
, struct tcpiphdr
*) = *ti
;
ti
= mtod(m
, struct tcpiphdr
*);
m
->m_len
= sizeof (struct tcpiphdr
);
#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
xchg(ti
->ti_dst
.s_addr
, ti
->ti_src
.s_addr
, u_long
);
xchg(ti
->ti_dport
, ti
->ti_sport
, u_short
);
ti
->ti_len
= htons((u_short
)(sizeof (struct tcphdr
) + tlen
));
tlen
+= sizeof (struct tcpiphdr
);
m
->m_pkthdr
.rcvif
= (struct ifnet
*) 0;
ti
->ti_next
= ti
->ti_prev
= 0;
ti
->ti_off
= sizeof (struct tcphdr
) >> 2;
ti
->ti_win
= htons((u_short
) (win
>> tp
->rcv_scale
));
ti
->ti_win
= htons((u_short
)win
);
ti
->ti_sum
= in_cksum(m
, tlen
);
((struct ip
*)ti
)->ip_len
= tlen
;
((struct ip
*)ti
)->ip_ttl
= ip_defttl
;
(void) ip_output(m
, NULL
, ro
, 0, NULL
);
* Create a new TCP control block, making an
* empty reassembly queue and hooking it to the argument
* protocol control block.
register struct tcpcb
*tp
;
tp
= malloc(sizeof(*tp
), M_PCB
, M_NOWAIT
);
return ((struct tcpcb
*)0);
bzero((char *) tp
, sizeof(struct tcpcb
));
tp
->seg_next
= tp
->seg_prev
= (struct tcpiphdr
*)tp
;
tp
->t_maxseg
= tcp_mssdflt
;
tp
->t_flags
= tcp_do_rfc1323
? (TF_REQ_SCALE
|TF_REQ_TSTMP
) : 0;
* Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
* rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
* reasonable initial retransmit time.
tp
->t_srtt
= TCPTV_SRTTBASE
;
tp
->t_rttvar
= tcp_rttdflt
* PR_SLOWHZ
<< 2;
tp
->t_rttmin
= TCPTV_MIN
;
TCPT_RANGESET(tp
->t_rxtcur
,
((TCPTV_SRTTBASE
>> 2) + (TCPTV_SRTTDFLT
<< 2)) >> 1,
TCPTV_MIN
, TCPTV_REXMTMAX
);
tp
->snd_cwnd
= TCP_MAXWIN
<< TCP_MAX_WINSHIFT
;
tp
->snd_ssthresh
= TCP_MAXWIN
<< TCP_MAX_WINSHIFT
;
inp
->inp_ip
.ip_ttl
= ip_defttl
;
inp
->inp_ppcb
= (caddr_t
)tp
;
* Drop a TCP connection, reporting
* the specified error. If connection is synchronized,
* then send a RST to peer.
register struct tcpcb
*tp
;
struct socket
*so
= tp
->t_inpcb
->inp_socket
;
if (TCPS_HAVERCVDSYN(tp
->t_state
)) {
tp
->t_state
= TCPS_CLOSED
;
tcpstat
.tcps_conndrops
++;
if (errno
== ETIMEDOUT
&& tp
->t_softerror
)
* Close a TCP control block:
* discard all space held by the tcp
* discard internet protocol block
register struct tcpcb
*tp
;
register struct tcpiphdr
*t
;
struct inpcb
*inp
= tp
->t_inpcb
;
struct socket
*so
= inp
->inp_socket
;
register struct rtentry
*rt
;
* If we sent enough data to get some meaningful characteristics,
* save them in the routing entry. 'Enough' is arbitrarily
* defined as the sendpipesize (default 4K) * 16. This would
* give us 16 rtt samples assuming we only get one sample per
* window (the usual case on a long haul net). 16 samples is
* enough for the srtt filter to converge to within 5% of the correct
* value; fewer samples and we could save a very bogus rtt.
* Don't update the default route's characteristics and don't
* update anything that the user "locked".
if (SEQ_LT(tp
->iss
+ so
->so_snd
.sb_hiwat
* 16, tp
->snd_max
) &&
(rt
= inp
->inp_route
.ro_rt
) &&
((struct sockaddr_in
*)rt_key(rt
))->sin_addr
.s_addr
!= INADDR_ANY
) {
if ((rt
->rt_rmx
.rmx_locks
& RTV_RTT
) == 0) {
(RTM_RTTUNIT
/ (PR_SLOWHZ
* TCP_RTT_SCALE
));
if (rt
->rt_rmx
.rmx_rtt
&& i
)
* filter this update to half the old & half
* the new values, converting scale.
* See route.h and tcp_var.h for a
* description of the scaling constants.
(rt
->rt_rmx
.rmx_rtt
+ i
) / 2;
if ((rt
->rt_rmx
.rmx_locks
& RTV_RTTVAR
) == 0) {
(RTM_RTTUNIT
/ (PR_SLOWHZ
* TCP_RTTVAR_SCALE
));
if (rt
->rt_rmx
.rmx_rttvar
&& i
)
(rt
->rt_rmx
.rmx_rttvar
+ i
) / 2;
rt
->rt_rmx
.rmx_rttvar
= i
;
* update the pipelimit (ssthresh) if it has been updated
* already or if a pipesize was specified & the threshhold
* got below half the pipesize. I.e., wait for bad news
* before we start updating, then update on both good
if ((rt
->rt_rmx
.rmx_locks
& RTV_SSTHRESH
) == 0 &&
(i
= tp
->snd_ssthresh
) && rt
->rt_rmx
.rmx_ssthresh
||
i
< (rt
->rt_rmx
.rmx_sendpipe
/ 2)) {
* convert the limit from user data bytes to
* packets then to packet data bytes.
i
= (i
+ tp
->t_maxseg
/ 2) / tp
->t_maxseg
;
i
*= (u_long
)(tp
->t_maxseg
+ sizeof (struct tcpiphdr
));
if (rt
->rt_rmx
.rmx_ssthresh
)
rt
->rt_rmx
.rmx_ssthresh
=
(rt
->rt_rmx
.rmx_ssthresh
+ i
) / 2;
rt
->rt_rmx
.rmx_ssthresh
= i
;
/* free the reassembly queue, if any */
while (t
!= (struct tcpiphdr
*)tp
) {
t
= (struct tcpiphdr
*)t
->ti_next
;
m
= REASS_MBUF((struct tcpiphdr
*)t
->ti_prev
);
(void) m_free(dtom(tp
->t_template
));
/* clobber input pcb cache if we're closing the cached connection */
if (inp
== tcp_last_inpcb
)
return ((struct tcpcb
*)0);
* Notify a tcp user of an asynchronous error;
* store error as soft error, but wake up user
* (for now, won't do anything until can select for soft error).
register struct tcpcb
*tp
= (struct tcpcb
*)inp
->inp_ppcb
;
register struct socket
*so
= inp
->inp_socket
;
* Ignore some errors if we are hooked up.
* If connection hasn't completed, has retransmitted several times,
* and receives a second error, give up now. This is better
* than waiting a long time to establish a connection that
if (tp
->t_state
== TCPS_ESTABLISHED
&&
(error
== EHOSTUNREACH
|| error
== ENETUNREACH
||
} else if (tp
->t_state
< TCPS_ESTABLISHED
&& tp
->t_rxtshift
> 3 &&
wakeup((caddr_t
) &so
->so_timeo
);
tcp_ctlinput(cmd
, sa
, ip
)
register struct tcphdr
*th
;
extern struct in_addr zeroin_addr
;
extern u_char inetctlerrmap
[];
void (*notify
) __P((struct inpcb
*, int)) = tcp_notify
;
else if (!PRC_IS_REDIRECT(cmd
) &&
((unsigned)cmd
> PRC_NCMDS
|| inetctlerrmap
[cmd
] == 0))
th
= (struct tcphdr
*)((caddr_t
)ip
+ (ip
->ip_hl
<< 2));
in_pcbnotify(&tcb
, sa
, th
->th_dport
, ip
->ip_src
, th
->th_sport
,
in_pcbnotify(&tcb
, sa
, 0, zeroin_addr
, 0, cmd
, notify
);
* When a source quench is received, close congestion window
* to one segment. We will gradually open it again as we proceed.
struct tcpcb
*tp
= intotcpcb(inp
);
tp
->snd_cwnd
= tp
->t_maxseg
;