* Copyright (c) 1982, 1986 Regents of the University of California.
* Redistribution and use in source and binary forms are permitted
* provided that the above copyright notice and this paragraph are
* duplicated in all such forms and that any documentation,
* advertising materials, and other materials related to such
* distribution and use acknowledge that the software was developed
* by the University of California, Berkeley. The name of the
* University may not be used to endorse or promote products derived
* from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
* @(#)ip_input.c 7.10 (Berkeley) %G%
#include "../net/route.h"
u_char ip_protox
[IPPROTO_MAX
];
int ipqmaxlen
= IFQ_MAXLEN
;
struct in_ifaddr
*in_ifaddr
; /* first inet address */
* We need to save the IP options in case a protocol wants to respond
* to an incoming packet over the same route if the packet got here
* using IP source routing. This allows connection establishment and
* maintenance when the remote end is on a network that is not known
char nop
; /* one NOP to align */
char srcopt
[IPOPT_OFFSET
+ 1]; /* OPTVAL, OLEN and OFFSET */
struct in_addr route
[MAX_IPOPTLEN
];
* IP initialization: fill in IP protocol switch table.
* All protocols not implemented in kernel go to raw IP protocol handler.
register struct protosw
*pr
;
pr
= pffindproto(PF_INET
, IPPROTO_RAW
, SOCK_RAW
);
for (i
= 0; i
< IPPROTO_MAX
; i
++)
ip_protox
[i
] = pr
- inetsw
;
for (pr
= inetdomain
.dom_protosw
;
pr
< inetdomain
.dom_protoswNPROTOSW
; pr
++)
if (pr
->pr_domain
->dom_family
== PF_INET
&&
pr
->pr_protocol
&& pr
->pr_protocol
!= IPPROTO_RAW
)
ip_protox
[pr
->pr_protocol
] = pr
- inetsw
;
ipq
.next
= ipq
.prev
= &ipq
;
ip_id
= time
.tv_sec
& 0xffff;
ipintrq
.ifq_maxlen
= ipqmaxlen
;
struct sockaddr_in ipaddr
= { AF_INET
};
struct route ipforward_rt
;
* Ip input routine. Checksum and byte swap header. If fragmented
* try to reassamble. If complete and fragment queue exists, discard.
* Process options. Pass to next level.
register struct in_ifaddr
*ia
;
* Get next datagram off input queue and get IP header
IF_DEQUEUEIF(&ipintrq
, m
, ifp
);
* If no IP addresses have been set yet but the interfaces
* are receiving, can't do anything with incoming packets yet.
if ((m
->m_off
> MMAXOFF
|| m
->m_len
< sizeof (struct ip
)) &&
(m
= m_pullup(m
, sizeof (struct ip
))) == 0) {
ip
= mtod(m
, struct ip
*);
if (hlen
< sizeof(struct ip
)) { /* minimum header length */
if ((m
= m_pullup(m
, hlen
)) == 0) {
ip
= mtod(m
, struct ip
*);
if (ip
->ip_sum
= in_cksum(m
, hlen
)) {
* Convert fields to host representation.
ip
->ip_len
= ntohs((u_short
)ip
->ip_len
);
ip
->ip_id
= ntohs(ip
->ip_id
);
ip
->ip_off
= ntohs((u_short
)ip
->ip_off
);
* Check that the amount of data in the buffers
* is as at least much as the IP header would have us expect.
* Trim mbufs if longer than we expect.
* Drop packet if shorter than we expect.
i
= -(u_short
)ip
->ip_len
;
* Process options and, if not destined for us,
* ship it on. ip_dooptions returns 1 when an
* error was detected (causing an icmp message
* to be sent and the original packet to be freed).
ip_nhops
= 0; /* for source routed packets */
if (hlen
> sizeof (struct ip
) && ip_dooptions(ip
, ifp
))
* Check our list of addresses, to see if the packet is for us.
for (ia
= in_ifaddr
; ia
; ia
= ia
->ia_next
) {
#define satosin(sa) ((struct sockaddr_in *)(sa))
if (IA_SIN(ia
)->sin_addr
.s_addr
== ip
->ip_dst
.s_addr
)
#ifdef DIRECTED_BROADCAST
(ia
->ia_ifp
->if_flags
& IFF_BROADCAST
)) {
if (satosin(&ia
->ia_broadaddr
)->sin_addr
.s_addr
==
if (ip
->ip_dst
.s_addr
== ia
->ia_netbroadcast
.s_addr
)
* Look for all-0's host part (old broadcast addr),
* either for subnet or net.
t
= ntohl(ip
->ip_dst
.s_addr
);
if (ip
->ip_dst
.s_addr
== (u_long
)INADDR_BROADCAST
)
if (ip
->ip_dst
.s_addr
== INADDR_ANY
)
* Not for us; forward if possible and desirable.
* If offset or IP_MF are set, must reassemble.
* Otherwise, nothing need be done.
* (We could look in the reassembly queue to see
* if the packet was previously fragmented,
* but it's not worth the time; just let them time out.)
if (ip
->ip_off
&~ IP_DF
) {
* Look for queue of fragments
for (fp
= ipq
.next
; fp
!= &ipq
; fp
= fp
->next
)
if (ip
->ip_id
== fp
->ipq_id
&&
ip
->ip_src
.s_addr
== fp
->ipq_src
.s_addr
&&
ip
->ip_dst
.s_addr
== fp
->ipq_dst
.s_addr
&&
* Adjust ip_len to not reflect header,
* set ip_mff if more fragments are expected,
* convert offset of this to bytes.
((struct ipasfrag
*)ip
)->ipf_mff
= 0;
((struct ipasfrag
*)ip
)->ipf_mff
= 1;
* If datagram marked as having more fragments
* or if this is not the first fragment,
* attempt reassembly; if it succeeds, proceed.
if (((struct ipasfrag
*)ip
)->ipf_mff
|| ip
->ip_off
) {
ip
= ip_reass((struct ipasfrag
*)ip
, fp
);
* Switch out to protocol's input routine.
(*inetsw
[ip_protox
[ip
->ip_p
]].pr_input
)(m
, ifp
);
* Take incoming datagram fragment and try to
* reassemble it into whole datagram. If a chain for
* reassembly of this datagram already exists, then it
* is given as fp; otherwise have to make a chain.
register struct ipasfrag
*ip
;
register struct mbuf
*m
= dtom(ip
);
register struct ipasfrag
*q
;
int hlen
= ip
->ip_hl
<< 2;
* Presence of header sizes in mbufs
* would confuse code below.
* If first fragment to arrive, create a reassembly queue.
if ((t
= m_get(M_DONTWAIT
, MT_FTABLE
)) == NULL
)
fp
= mtod(t
, struct ipq
*);
fp
->ipq_next
= fp
->ipq_prev
= (struct ipasfrag
*)fp
;
fp
->ipq_src
= ((struct ip
*)ip
)->ip_src
;
fp
->ipq_dst
= ((struct ip
*)ip
)->ip_dst
;
q
= (struct ipasfrag
*)fp
;
* Find a segment which begins after this one does.
for (q
= fp
->ipq_next
; q
!= (struct ipasfrag
*)fp
; q
= q
->ipf_next
)
if (q
->ip_off
> ip
->ip_off
)
* If there is a preceding segment, it may provide some of
* our data already. If so, drop the data from the incoming
* segment. If it provides all of our data, drop us.
if (q
->ipf_prev
!= (struct ipasfrag
*)fp
) {
i
= q
->ipf_prev
->ip_off
+ q
->ipf_prev
->ip_len
- ip
->ip_off
;
* While we overlap succeeding segments trim them or,
* if they are completely covered, dequeue them.
while (q
!= (struct ipasfrag
*)fp
&& ip
->ip_off
+ ip
->ip_len
> q
->ip_off
) {
i
= (ip
->ip_off
+ ip
->ip_len
) - q
->ip_off
;
m_freem(dtom(q
->ipf_prev
));
* Stick new segment in its place;
* check for complete reassembly.
for (q
= fp
->ipq_next
; q
!= (struct ipasfrag
*)fp
; q
= q
->ipf_next
) {
if (q
->ipf_prev
->ipf_mff
)
* Reassembly is complete; concatenate fragments.
while (q
!= (struct ipasfrag
*)fp
) {
* Create header for new ip packet by
* modifying header of first packet;
* dequeue and discard fragment reassembly header.
((struct ip
*)ip
)->ip_src
= fp
->ipq_src
;
((struct ip
*)ip
)->ip_dst
= fp
->ipq_dst
;
m
->m_len
+= (ip
->ip_hl
<< 2);
m
->m_off
-= (ip
->ip_hl
<< 2);
return ((struct ip
*)ip
);
ipstat
.ips_fragdropped
++;
* Free a fragment reassembly header and all
register struct ipasfrag
*q
, *p
;
for (q
= fp
->ipq_next
; q
!= (struct ipasfrag
*)fp
; q
= p
) {
* Put an ip fragment on a reassembly chain.
* Like insque, but pointers in middle of structure.
register struct ipasfrag
*p
, *prev
;
p
->ipf_next
= prev
->ipf_next
;
prev
->ipf_next
->ipf_prev
= p
;
* To ip_enq as remque is to insque.
register struct ipasfrag
*p
;
p
->ipf_prev
->ipf_next
= p
->ipf_next
;
p
->ipf_next
->ipf_prev
= p
->ipf_prev
;
* if a timer expires on a reassembly
if (fp
->prev
->ipq_ttl
== 0) {
ipstat
.ips_fragtimeout
++;
* Drain off all datagram fragments.
while (ipq
.next
!= &ipq
) {
ipstat
.ips_fragdropped
++;
extern struct in_ifaddr
*ifptoia();
struct in_ifaddr
*ip_rtaddr();
* Do option processing on a datagram,
* possibly discarding it if bad options
int opt
, optlen
, cnt
, off
, code
, type
= ICMP_PARAMPROB
;
register struct ip_timestamp
*ipt
;
register struct in_ifaddr
*ia
;
cnt
= (ip
->ip_hl
<< 2) - sizeof (struct ip
);
for (; cnt
> 0; cnt
-= optlen
, cp
+= optlen
) {
if (optlen
<= 0 || optlen
> cnt
) {
code
= &cp
[IPOPT_OLEN
] - (u_char
*)ip
;
* Source routing with record.
* Find interface with current destination address.
* If none on this machine then drop if strictly routed,
* or do nothing if loosely routed.
* Record interface address and bring up next address
* component. If strictly routed make sure next
* address on directly accessible net.
if ((off
= cp
[IPOPT_OFFSET
]) < IPOPT_MINOFF
) {
code
= &cp
[IPOPT_OFFSET
] - (u_char
*)ip
;
ipaddr
.sin_addr
= ip
->ip_dst
;
ia
= (struct in_ifaddr
*)
ifa_ifwithaddr((struct sockaddr
*)&ipaddr
);
code
= ICMP_UNREACH_SRCFAIL
;
* Loose routing, and not at next destination
* yet; nothing to do except forward.
if (off
> optlen
- sizeof(struct in_addr
)) {
* End of source route. Should be for us.
save_rte(cp
, ip
->ip_src
);
* locate outgoing interface
bcopy((caddr_t
)(cp
+ off
), (caddr_t
)&ipaddr
.sin_addr
,
sizeof(ipaddr
.sin_addr
));
if ((opt
== IPOPT_SSRR
&&
in_iaonnetof(in_netof(ipaddr
.sin_addr
)) == 0) ||
(ia
= ip_rtaddr(ipaddr
.sin_addr
)) == 0) {
code
= ICMP_UNREACH_SRCFAIL
;
ip
->ip_dst
= ipaddr
.sin_addr
;
bcopy((caddr_t
)&(IA_SIN(ia
)->sin_addr
),
(caddr_t
)(cp
+ off
), sizeof(struct in_addr
));
cp
[IPOPT_OFFSET
] += sizeof(struct in_addr
);
if ((off
= cp
[IPOPT_OFFSET
]) < IPOPT_MINOFF
) {
code
= &cp
[IPOPT_OFFSET
] - (u_char
*)ip
;
* If no space remains, ignore.
if (off
> optlen
- sizeof(struct in_addr
))
bcopy((caddr_t
)(&ip
->ip_dst
), (caddr_t
)&ipaddr
.sin_addr
,
sizeof(ipaddr
.sin_addr
));
* locate outgoing interface
if ((ia
= ip_rtaddr(ipaddr
.sin_addr
)) == 0) {
code
= ICMP_UNREACH_HOST
;
bcopy((caddr_t
)&(IA_SIN(ia
)->sin_addr
),
(caddr_t
)(cp
+ off
), sizeof(struct in_addr
));
cp
[IPOPT_OFFSET
] += sizeof(struct in_addr
);
code
= cp
- (u_char
*)ip
;
ipt
= (struct ip_timestamp
*)cp
;
if (ipt
->ipt_ptr
> ipt
->ipt_len
- sizeof (long)) {
if (++ipt
->ipt_oflw
== 0)
sin
= (struct in_addr
*)(cp
+ ipt
->ipt_ptr
- 1);
if (ipt
->ipt_ptr
+ sizeof(n_time
) +
sizeof(struct in_addr
) > ipt
->ipt_len
)
bcopy((caddr_t
)&IA_SIN(ia
)->sin_addr
,
(caddr_t
)sin
, sizeof(struct in_addr
));
ipt
->ipt_ptr
+= sizeof(struct in_addr
);
if (ipt
->ipt_ptr
+ sizeof(n_time
) +
sizeof(struct in_addr
) > ipt
->ipt_len
)
bcopy((caddr_t
)sin
, (caddr_t
)&ipaddr
.sin_addr
,
if (ifa_ifwithaddr((struct sockaddr
*)&ipaddr
) == 0)
ipt
->ipt_ptr
+= sizeof(struct in_addr
);
bcopy((caddr_t
)&ntime
, (caddr_t
)cp
+ ipt
->ipt_ptr
- 1,
ipt
->ipt_ptr
+= sizeof(n_time
);
icmp_error(ip
, type
, code
, ifp
);
* Given address of next destination (final or next hop),
* return internet address info of interface to be used to get there.
register struct sockaddr_in
*sin
;
register struct in_ifaddr
*ia
;
sin
= (struct sockaddr_in
*) &ipforward_rt
.ro_dst
;
if (ipforward_rt
.ro_rt
== 0 || dst
.s_addr
!= sin
->sin_addr
.s_addr
) {
if (ipforward_rt
.ro_rt
) {
RTFREE(ipforward_rt
.ro_rt
);
sin
->sin_family
= AF_INET
;
if (ipforward_rt
.ro_rt
== 0)
return ((struct in_ifaddr
*)0);
* Find address associated with outgoing interface.
for (ia
= in_ifaddr
; ia
; ia
= ia
->ia_next
)
if (ia
->ia_ifp
== ipforward_rt
.ro_rt
->rt_ifp
)
* Save incoming source route for use in replies,
* to be picked up later by ip_srcroute if the receiver is interested.
olen
= option
[IPOPT_OLEN
];
if (olen
> sizeof(ip_srcrt
) - 1) {
printf("save_rte: olen %d\n", olen
);
bcopy((caddr_t
)option
, (caddr_t
)ip_srcrt
.srcopt
, olen
);
ip_nhops
= (olen
- IPOPT_OFFSET
- 1) / sizeof(struct in_addr
);
ip_srcrt
.route
[ip_nhops
++] = dst
;
* Retrieve incoming source route for use in replies,
* in the same form used by setsockopt.
* The first hop is placed before the options, will be removed later.
register struct in_addr
*p
, *q
;
return ((struct mbuf
*)0);
m
= m_get(M_DONTWAIT
, MT_SOOPTS
);
return ((struct mbuf
*)0);
m
->m_len
= ip_nhops
* sizeof(struct in_addr
) + IPOPT_OFFSET
+ 1 + 1;
* First save first hop for return route
p
= &ip_srcrt
.route
[ip_nhops
- 1];
*(mtod(m
, struct in_addr
*)) = *p
--;
* Copy option fields and padding (nop) to mbuf.
ip_srcrt
.nop
= IPOPT_NOP
;
bcopy((caddr_t
)&ip_srcrt
, mtod(m
, caddr_t
) + sizeof(struct in_addr
),
q
= (struct in_addr
*)(mtod(m
, caddr_t
) +
sizeof(struct in_addr
) + IPOPT_OFFSET
+ 1 + 1);
* Record return path as an IP source route,
* reversing the path (pointers are now aligned).
while (p
>= ip_srcrt
.route
)
* Strip out IP options, at higher
* level protocol in the kernel.
* Second argument is buffer to which options
* will be moved, and return value is their length.
ip_stripoptions(ip
, mopt
)
olen
= (ip
->ip_hl
<<2) - sizeof (struct ip
);
opts
= (caddr_t
)(ip
+ 1);
bcopy(opts
, mtod(mopt
, caddr_t
), (unsigned)olen
);
i
= m
->m_len
- (sizeof (struct ip
) + olen
);
bcopy(opts
+ olen
, opts
, (unsigned)i
);
ip
->ip_hl
= sizeof(struct ip
) >> 2;
u_char inetctlerrmap
[PRC_NCMDS
] = {
0, 0, EHOSTDOWN
, EHOSTUNREACH
,
ENETUNREACH
, EHOSTUNREACH
, ECONNREFUSED
, ECONNREFUSED
,
EMSGSIZE
, EHOSTUNREACH
, 0, 0,
#define IPSENDREDIRECTS 1
int ipforwarding
= IPFORWARDING
;
extern int in_interfaces
;
int ipsendredirects
= IPSENDREDIRECTS
;
* Forward a packet. If some error occurs return the sender
* an icmp packet. Note we can't always generate a meaningful
* icmp message because icmp doesn't have a large enough repertoire
* If not forwarding (possibly because we have only a single external
* network), just drop the packet. This could be confusing if ipforwarding
* was zero but some routing protocol was advancing us as a gateway
* to somewhere. However, we must let the routing protocol deal with that.
register int error
, type
= 0, code
;
register struct sockaddr_in
*sin
;
printf("forward: src %x dst %x ttl %x\n", ip
->ip_src
,
ip
->ip_id
= htons(ip
->ip_id
);
if (ipforwarding
== 0 || in_interfaces
<= 1) {
ipstat
.ips_cantforward
++;
type
= ICMP_UNREACH
, code
= ICMP_UNREACH_NET
;
if (ip
->ip_ttl
<= IPTTLDEC
) {
type
= ICMP_TIMXCEED
, code
= ICMP_TIMXCEED_INTRANS
;
* Save at most 64 bytes of the packet in case
* we need to generate an ICMP message to the src.
mcopy
= m_copy(dtom(ip
), 0, imin((int)ip
->ip_len
, 64));
sin
= (struct sockaddr_in
*)&ipforward_rt
.ro_dst
;
if (ipforward_rt
.ro_rt
== 0 ||
ip
->ip_dst
.s_addr
!= sin
->sin_addr
.s_addr
) {
if (ipforward_rt
.ro_rt
) {
RTFREE(ipforward_rt
.ro_rt
);
sin
->sin_family
= AF_INET
;
sin
->sin_addr
= ip
->ip_dst
;
* If forwarding packet using same interface that it came in on,
* perhaps should send a redirect to sender to shortcut a hop.
* Only send redirect if source is sending directly to us,
* and if packet was not source routed (or has any options).
* Also, don't send redirect if forwarding using a default route
* or a route modfied by a redirect.
#define satosin(sa) ((struct sockaddr_in *)(sa))
if (ipforward_rt
.ro_rt
&& ipforward_rt
.ro_rt
->rt_ifp
== ifp
&&
(ipforward_rt
.ro_rt
->rt_flags
& RTF_DYNAMIC
) == 0 &&
satosin(&ipforward_rt
.ro_rt
->rt_dst
)->sin_addr
.s_addr
!= 0 &&
ipsendredirects
&& ip
->ip_hl
== (sizeof(struct ip
) >> 2)) {
u_long src
= ntohl(ip
->ip_src
.s_addr
);
u_long dst
= ntohl(ip
->ip_dst
.s_addr
);
if ((ia
= ifptoia(ifp
)) &&
(src
& ia
->ia_subnetmask
) == ia
->ia_subnet
) {
if (ipforward_rt
.ro_rt
->rt_flags
& RTF_GATEWAY
)
dest
= satosin(&ipforward_rt
.ro_rt
->rt_gateway
)->sin_addr
;
* If the destination is reached by a route to host,
* is on a subnet of a local net, or is directly
* on the attached net (!), use host redirect.
* (We may be the correct first hop for other subnets.)
code
= ICMP_REDIRECT_NET
;
if ((ipforward_rt
.ro_rt
->rt_flags
& RTF_HOST
) ||
(ipforward_rt
.ro_rt
->rt_flags
& RTF_GATEWAY
) == 0)
code
= ICMP_REDIRECT_HOST
;
else for (ia
= in_ifaddr
; ia
= ia
->ia_next
; )
if ((dst
& ia
->ia_netmask
) == ia
->ia_net
) {
if (ia
->ia_subnetmask
!= ia
->ia_netmask
)
code
= ICMP_REDIRECT_HOST
;
printf("redirect (%d) to %x\n", code
, dest
);
error
= ip_output(dtom(ip
), (struct mbuf
*)0, &ipforward_rt
,
ipstat
.ips_cantforward
++;
ipstat
.ips_redirectsent
++;
ip
= mtod(mcopy
, struct ip
*);
case 0: /* forwarded, but need redirect */
if (in_localaddr(ip
->ip_dst
))
code
= ICMP_UNREACH_HOST
;
code
= ICMP_UNREACH_NEEDFRAG
;
code
= ICMP_UNREACH_PORT
;
type
= ICMP_SOURCEQUENCH
;
code
= ICMP_UNREACH_HOST
;
icmp_error(ip
, type
, code
, ifp
, dest
);