fixes for range locking
[unix-history] / usr / src / sys / deprecated / bbnnet / ip_output.c
#ifdef RCSIDENT
static char rcsident[] = "$Header: ip_output.c,v 1.28 85/07/31 09:32:09 walsh Exp $";
#endif
#include "../h/param.h"
#include "../h/dir.h"
#include "../h/user.h"
#include "../h/mbuf.h"
#include "../h/socket.h"
#include "../h/socketvar.h"
#include "../h/protosw.h"
#include "../h/domain.h"
#include "../h/ioctl.h"
#include "../h/syslog.h"
#include "../net/if.h"
#include "../net/route.h"
#include "../bbnnet/in.h"
#include "../bbnnet/net.h"
#include "../bbnnet/in_pcb.h"
#include "../bbnnet/in_var.h"
#include "../bbnnet/ip.h"
#include "../bbnnet/icmp.h"
/*
* If you're going to a specific host or via a gateway, the routing
* entry gateway field holds the best way to get there. Otherwise,
* the routing entry tells you how to get onto that net -- it has
* the net address portion of our local host:
*
* On bbn-labs-b:
*
* rt_dst rt_gateway flags
* il0 => 0x00000b80 0x2010b80 UP
* imp0 => 0x00000008 0x2000708 UP
* loop => 0x0000007f 0x100007f UP
*
* So you can see that the rt_gateway is our local address, and the
* rt_dst may be the net number of the media. If it's a route
* to a net, the other guy is on this net and you want to route the
* packet to him anyway.
*
* gateway 0 0x1000b80 UP, RTF_GATEWAY
*/
#define IF_SEND(ifp, mp, rt, retval) \
{\
static struct sockaddr_in tmproute = {AF_INET}; \
\
if (! ((ifp)->if_flags & IFF_UP)){ \
/* goes with PRC_IFDOWN in in.c */ \
m_freem(mp); \
retval = ENETUNREACH; \
} else if ((rt)->rt_flags & (RTF_GATEWAY|RTF_HOST)) \
retval = (*(ifp)->if_output)(ifp, mp, &(rt)->rt_gateway); \
else { \
tmproute.sin_addr.s_addr = (mtod(mp, struct ip *))->ip_dst.s_addr; \
retval = (*(ifp)->if_output)(ifp, mp, (struct sockaddr *) &tmproute); \
}}
if_send(ifp, mp, rt)
register struct ifnet *ifp;
register struct mbuf *mp;
register struct rtentry *rt;
{
int retval;
IF_SEND (ifp, mp, rt, retval);
return (retval);
}
/*
* Find a route to this destination. Given the source and destination
* addresses, it returns a local net address
* to send to (either the address of the destination itself or a gateway).
* Taken mostly from rtalloc; expanded to route according to
* both ends of the connection.
*/
struct rtentry *ip_route(src, dst)
struct in_addr *src;
struct in_addr *dst;
{
register struct rtentry *rt;
register struct mbuf *m;
register unsigned hash;
net_t snet, dnet;
int doinghost;
struct rtentry *rtmin;
struct mbuf **table;
static struct in_addr wildcard;
/* get network parts of src and dest addresses */
snet = iptonet(*src);
dnet = iptonet(*dst);
rtmin = NULL;
hash = HOSTHASH(dst->s_addr);
table = rthost;
doinghost = TRUE;
again :
for (m = table[hash % RTHASHSIZ]; m; m = m->m_next)
{
rt = mtod(m, struct rtentry *);
if (rt->rt_hash != hash)
continue;
if (! (rt->rt_flags & RTF_UP))
continue;
if (! (rt->rt_ifp->if_flags & IFF_UP))
continue;
if (rt->rt_dst.sa_family != AF_INET)
continue;
/* packets go out an interface with our local IP address */
if (iptonet(((struct sockaddr_in *)&(rt->rt_gateway))->sin_addr) != snet)
continue;
/* does this route get us there? */
if (doinghost)
{
if (((struct sockaddr_in *)&(rt->rt_dst))->sin_addr.s_addr !=
dst->s_addr)
continue;
}
else
{
/*
* iptonet == 0 => smart gateway (route to anywhere)
* iptonet != 0 => gateway to another net (route to net)
*/
if (iptonet(((struct sockaddr_in *)&(rt->rt_dst))->sin_addr) != dnet)
continue;
}
/* and try to share load across gateways */
if (rtmin == NULL)
rtmin = rt;
else if (rt->rt_use < rtmin->rt_use)
rtmin = rt;
}
if (rtmin == NULL)
{
if (doinghost)
{
doinghost = FALSE;
hash = NETHASH(*dst), table = rtnet;
goto again;
}
/*
* Check for wildcard gateway, by convention network 0.
*/
if (dst != &wildcard)
{
hash = 0;
dst = &wildcard;
dnet = 0;
goto again;
}
rtstat.rts_unreach++;
return(NULL);
}
rtmin->rt_refcnt++;
if (dst == &wildcard)
rtstat.rts_wildcard++;
return(rtmin);
}
/*
* Ip_send is called from the higher protocol layer (TCP/RDP/UDP) and is passed
* an mbuf chain containing a packet to send to the local network. The first
* mbuf contains the protocol header and an IP header which is partially
* filled in. After determining a route (outgoing interface + first hop) for
* the packet, it is fragmented (if necessary) and sent to the local net
* through the local net send routine.
*
* For non-raw output, caller should have stuffed:
* ip protocol type, type of service, source addr, destin addr
*
* ip_tos is left to caller so that people using raw sockets can do whatever
* they please. (They don't have an inpcb in which to store such info.)
*
* The asis argument is TRUE for raw output and the gateway (packet forwarding)
* code. It indicates that the IP header is fully constructed.
*
* Errors at the IP layer and below occur synchronously, and can be reported
* back via subroutine return values. Higher level protocols should remember
* that if they do things asynchronous to a system call (ie., packet
* retransmission) that they should post error back to user via advise_user()
* so that user gets error next time he rendezvous with the kernel.
*/
ip_send(inp, mp, len, asis)
struct inpcb *inp;
register struct mbuf *mp;
register int len;
int asis;
{
register struct ip *p;
register struct ifnet *ifp;
register struct rtentry *rt;
register int hlen;
int free_route = FALSE;
int retval;
p = mtod(mp, struct ip *); /* -> ip header */
/*
* Find route for datagram if one has not been assigned.
*/
if ((rt = inp->inp_route.ro_rt) == NULL)
{
if ((rt = ip_route(&p->ip_src, &p->ip_dst)) == NULL)
{
if (asis || (p->ip_src.s_addr == INADDR_ANY))
{
/*
* asis: forwarding a packet not sourced by us
* eg., by raw interface and user level repeater process
* INADDR_ANY: sending icmp packet for which
* we're trying to avoid routing twice.
*/
struct route tmproute;
struct sockaddr_in *sin;
bzero ((caddr_t) &tmproute, sizeof(tmproute));
sin = (struct sockaddr_in *) &tmproute.ro_dst;
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = p->ip_dst.s_addr;
rtalloc (&tmproute);
rt = tmproute.ro_rt;
if (rt && (p->ip_src.s_addr == INADDR_ANY))
p->ip_src = IA_INADDR(in_iafromif(rt->rt_ifp));
}
if (rt == NULL)
{
m_freem(mp);
return(ENETUNREACH);
}
}
free_route = TRUE;
}
ifp = rt->rt_ifp;
/*
* Copy ip source route to header. Know asis must be FALSE, if do.
*/
if (inp->inp_optlen > 0)
{
char *q;
if (mp->m_off - inp->inp_optlen >= MMINOFF)
{
struct in_addr *ipa;
mp->m_off -= inp->inp_optlen;
mp->m_len += inp->inp_optlen;
q = (char *) p;
p = (struct ip *) (q - inp->inp_optlen);
bcopy(q, (caddr_t)p, sizeof(struct ip));
bcopy(inp->inp_options, (caddr_t)(p+1), (unsigned)inp->inp_optlen);
/*
* And replate eventual destination with first hop.
* Eventual destination is in source route just
* copied in.
*/
ipa = (struct in_addr *) (&inp->inp_options[0]);
p->ip_dst = ipa[inp->inp_optlen/sizeof(struct in_addr)];
}
else
log(LOG_INFO, "ip_send: optlen %d inpcb 0x%x\n",
(int)inp->inp_optlen, inp);
}
/*
* fill in ip header fields
*/
if (asis)
{
/*
* RAW OUTPUT. Must get len, hlen, off from packet header.
* Byte swap is ugly (since we must swap back below), but
* necessary in case we must fragment.
*/
hlen = p->ip_hl << IP_HLSHIFT;
len = ntohs(p->ip_len);
p->ip_off = ntohs(p->ip_off);
}
else
{
static u_short next_ip_id; /* some day RDP may want to force for rxmit */
hlen = sizeof(struct ip) + inp->inp_optlen;
len += hlen;
p->ip_v = IPVERSION;
p->ip_hl = hlen >> IP_HLSHIFT;
p->ip_off = 0;
p->ip_ttl = MAXTTL; /* ### should come from route */
p->ip_id = htons(next_ip_id++);
}
/*
* let ip_frag do the send if needed, otherwise do it directly.
*/
/* for testing IP reassembly code */
#ifdef FORCE_FRAG
#define MTU(ifp) (((ifp)->if_mtu >> FORCE_FRAG) & (~3))
#else
#define MTU(ifp) (ifp)->if_mtu
#endif
if (len > MTU(ifp))
{
p->ip_len = len;
retval = ip_frag(p, ifp, rt, hlen);
}
else
{
/*
* complete header, byte swap, and send to local net
*/
p->ip_len = htons((u_short)len);
p->ip_off = htons(p->ip_off);
/*
* No reason not to have kernel checksum, even for raw packets.
*/
p->ip_sum = 0;
p->ip_sum = in_cksum(dtom(p), hlen);
IF_SEND (ifp, mp, rt, retval);
}
rt->rt_use ++; /* Yet another IP packet sent away */
if (free_route)
{
struct socket *so;
if ((so = inp->inp_socket) &&
(so->so_proto->pr_flags & PR_CONNREQUIRED))
/*
* Found a new route after old one pinged out.
*/
inp->inp_route.ro_rt = rt;
else
rtfree(rt);
}
return(retval);
}
/*
* Ip_frag is called with a packet with a completed ip header
* (except for checksum). It fragments the packet, inserts the
* IP checksum, and calls the appropriate local net output routine
* to send it to the net.
*
* Previously, when there was only one kind of mbuf, it tried to
* reduce space requirements by recycling the chain to be fragmented.
* Preserving this approach is overly complicated, and should mbufs
* change again, cause problems. Therefore, have switched to copying
* the chain to be fragmented.
*/
ip_frag(p, ifp, rt, hlen)
register struct ip *p;
struct ifnet *ifp;
struct rtentry *rt;
register int hlen;
{
register struct mbuf *m; /* original chunk */
register struct mbuf *mhdr; /* fragment */
register struct ip *fip; /* the fragment IP header */
int off; /* offset into entire IP datagram */
int here; /* offset into this chunk of it */
register int len; /* length of data in this chunk */
int flags; /* of this chunk to fragment */
int max; /* max data length in a fragment */
int fdlen; /* actual fragment data length */
int error;
m = dtom(p);
if (p->ip_off & ip_df)
{ /* can't fragment */
m_freem(m);
return(EMSGSIZE);
}
max = MTU(ifp) - hlen; /* max data length in frag */
len = p->ip_len - hlen; /* data length */
/*
* this only needs to be this complicated if we are handed
* an already-fragmented packet
*/
flags = p->ip_off&(ip_mf|ip_df); /* save old flags */
p->ip_off &= ~flags; /* take them out of ip_off */
off = p->ip_off << IP_OFFSHIFT; /* fragment offset */
here = hlen;
error = 0;
while (len > 0)
{
/*
* Allocate mbuf for fragment IP header
*/
mhdr = m_get(M_DONTWAIT, MT_HEADER);
if (mhdr == NULL)
{
error = ENOBUFS;
break;
}
/*
* get copy of data for fragment
*/
if (len < max)
fdlen = len;
else
fdlen = max & (~7); /* 7 == 2^IP_OFFSHIFT -1 */
mhdr->m_next = m_copy(m, here, fdlen);
if (mhdr->m_next == NULL)
{
m_free(mhdr);
error = ENOBUFS;
break;
}
/*
* build the header for this fragment and ship it off.
*/
mhdr->m_len = hlen;
mhdr->m_off = MMAXOFF - hlen;
fip = mtod(mhdr, struct ip *);
bcopy((caddr_t)p, (caddr_t)fip, (unsigned)hlen);
fip->ip_off = off >> IP_OFFSHIFT;
if (fdlen >= len)
/* it's the last fragment */
fip->ip_off |= flags;
else
fip->ip_off |= ip_mf;
fip->ip_off = htons((u_short)fip->ip_off);
fip->ip_len = htons((u_short)fdlen + hlen);
fip->ip_sum = 0;
fip->ip_sum = in_cksum(mhdr, hlen);
if (error = if_send (ifp, mhdr, rt))
break;
/*
* and get ready for next pass through the loop
*/
len -= fdlen;
off += fdlen;
here += fdlen;
}
m_freem(m);
return (error);
}
/*
* Current connection should use a new path.
*/
struct rtentry *ip_reroute(inp)
register struct inpcb *inp;
{
register struct route *ro = &inp->inp_route;
rtfree(ro->ro_rt);
return(ro->ro_rt = ip_route(&inp->inp_laddr, &inp->inp_faddr));
}
/*
* A gateway has gone down. Change route used by all connections currently
* using it.
*/
ip_gdown(addr)
u_long addr;
{
register struct protosw *psw;
for(psw=inetdomain.dom_protosw; psw < inetdomain.dom_protoswNPROTOSW; psw++)
if (psw->pr_type != SOCK_RAW)
if (psw->pr_ctlinput)
(*(psw->pr_ctlinput)) (PRC_GWDOWN, addr);
}
/*
* Called from protocol ctlinput routine. This way, IP/ICMP don't need to know
* about protocol's head of inpcbs... for all the protocols.
*/
in_gdown (head, addr)
register struct inpcb *head;
u_long addr;
{
register struct inpcb *inp;
register struct rtentry *rt;
if (head == NULL)
return;
for(inp = head->inp_next; inp != head; inp = inp->inp_next)
{
if (rt = inp->inp_route.ro_rt)
{
if (rt->rt_flags & RTF_GATEWAY)
{
if (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == addr)
{
/*
* Don't remove route permanently, since want to catch
* the gateway when it reboots:
* -- rtrequest (SIOCDELRT, rt) --
*
* make sure rtfree() not remove route mbuf
* incrementing reference count here, and decrementing
* when timeout on reinstatement goes off. Cannot call
* rtfree with zero reference count when have not done
* SIOCDELRT.
*/
if (rt->rt_flags & RTF_UP)
{
rt->rt_flags &= ~RTF_UP;
rt->rt_flags |= RTF_REINSTATE;
rt->irt_gdown = RT_REINSTATE;
rt->rt_refcnt ++;
}
if (!ip_reroute(inp))
advise_user(inp->inp_socket, ENETUNREACH);
}
}
}
}
}