X-Git-Url: https://git.subgeniuskitty.com/unix-history/.git/blobdiff_plain/39674d5f4e66e750df77c36159b66e1554511bb5..b532c831831a2b8eee5609d72d747afe00056338:/usr/src/sys/netinet/ip_input.c diff --git a/usr/src/sys/netinet/ip_input.c b/usr/src/sys/netinet/ip_input.c index b591e24992..e352420dd0 100644 --- a/usr/src/sys/netinet/ip_input.c +++ b/usr/src/sys/netinet/ip_input.c @@ -1,23 +1,56 @@ -/* ip_input.c 1.42 82/04/25 */ - -#include "../h/param.h" -#include "../h/systm.h" -#include "../h/clock.h" -#include "../h/mbuf.h" -#include "../h/protosw.h" -#include "../h/socket.h" -#include "../net/in.h" -#include "../net/in_systm.h" +/* + * Copyright (c) 1982, 1986 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms are permitted + * provided that this notice is preserved and that due credit is given + * to the University of California at Berkeley. The name of the University + * may not be used to endorse or promote products derived from this + * software without specific prior written permission. This software + * is provided ``as is'' without express or implied warranty. + * + * @(#)ip_input.c 7.9 (Berkeley) %G% + */ + +#include "param.h" +#include "systm.h" +#include "mbuf.h" +#include "domain.h" +#include "protosw.h" +#include "socket.h" +#include "errno.h" +#include "time.h" +#include "kernel.h" + #include "../net/if.h" -#include "../net/ip.h" /* belongs before in.h */ -#include "../net/ip_var.h" -#include "../net/ip_icmp.h" -#include "../net/tcp.h" -#include +#include "../net/route.h" + +#include "in.h" +#include "in_pcb.h" +#include "in_systm.h" +#include "in_var.h" +#include "ip.h" +#include "ip_var.h" +#include "ip_icmp.h" +#include "tcp.h" u_char ip_protox[IPPROTO_MAX]; int ipqmaxlen = IFQ_MAXLEN; -struct ifnet *ifinet; /* first inet interface */ +struct in_ifaddr *in_ifaddr; /* first inet address */ + +/* + * We need to save the IP options in case a protocol wants to respond + * to an incoming packet over the same route if the packet got here + * using IP source routing. This allows connection establishment and + * maintenance when the remote end is on a network that is not known + * to us. + */ +int ip_nhops = 0; +static struct ip_srcrt { + char nop; /* one NOP to align */ + char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ + struct in_addr route[MAX_IPOPTLEN]; +} ip_srcrt; /* * IP initialization: fill in IP protocol switch table. @@ -28,25 +61,25 @@ ip_init() register struct protosw *pr; register int i; -COUNT(IP_INIT); - pr = pffindproto(PF_INET, IPPROTO_RAW); + pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip_init"); for (i = 0; i < IPPROTO_MAX; i++) - ip_protox[i] = pr - protosw; - for (pr = protosw; pr <= protoswLAST; pr++) - if (pr->pr_family == PF_INET && + ip_protox[i] = pr - inetsw; + for (pr = inetdomain.dom_protosw; + pr < inetdomain.dom_protoswNPROTOSW; pr++) + if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) - ip_protox[pr->pr_protocol] = pr - protosw; + ip_protox[pr->pr_protocol] = pr - inetsw; ipq.next = ipq.prev = &ipq; - ip_id = time & 0xffff; + ip_id = time.tv_sec & 0xffff; ipintrq.ifq_maxlen = ipqmaxlen; - ifinet = if_ifwithaf(AF_INET); } u_char ipcksum = 1; struct ip *ip_reass(); struct sockaddr_in ipaddr = { AF_INET }; +struct route ipforward_rt; /* * Ip input routine. Checksum and byte swap header. If fragmented @@ -57,46 +90,64 @@ ipintr() { register struct ip *ip; register struct mbuf *m; - struct mbuf *m0, *mopt; + struct mbuf *m0; register int i; register struct ipq *fp; + register struct in_ifaddr *ia; + struct ifnet *ifp; int hlen, s; -COUNT(IPINTR); next: /* * Get next datagram off input queue and get IP header * in first mbuf. */ s = splimp(); - IF_DEQUEUE(&ipintrq, m); + IF_DEQUEUEIF(&ipintrq, m, ifp); splx(s); if (m == 0) return; + /* + * If no IP addresses have been set yet but the interfaces + * are receiving, can't do anything with incoming packets yet. + */ + if (in_ifaddr == NULL) + goto bad; + ipstat.ips_total++; if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) && - (m = m_pullup(m, sizeof (struct ip))) == 0) - return; + (m = m_pullup(m, sizeof (struct ip))) == 0) { + ipstat.ips_toosmall++; + goto next; + } ip = mtod(m, struct ip *); - if ((hlen = ip->ip_hl << 2) > m->m_len) { - if ((m = m_pullup(m, hlen)) == 0) - return; + hlen = ip->ip_hl << 2; + if (hlen < sizeof(struct ip)) { /* minimum header length */ + ipstat.ips_badhlen++; + goto bad; + } + if (hlen > m->m_len) { + if ((m = m_pullup(m, hlen)) == 0) { + ipstat.ips_badhlen++; + goto next; + } ip = mtod(m, struct ip *); } if (ipcksum) if (ip->ip_sum = in_cksum(m, hlen)) { - printf("ip_sum %x\n", ip->ip_sum); /* XXX */ ipstat.ips_badsum++; goto bad; } -#if vax /* * Convert fields to host representation. */ ip->ip_len = ntohs((u_short)ip->ip_len); + if (ip->ip_len < hlen) { + ipstat.ips_badlen++; + goto bad; + } ip->ip_id = ntohs(ip->ip_id); ip->ip_off = ntohs((u_short)ip->ip_off); -#endif /* * Check that the amount of data in the buffers @@ -104,7 +155,7 @@ next: * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ - i = -ip->ip_len; + i = -(u_short)ip->ip_len; m0 = m; for (;;) { i += m->m_len; @@ -115,6 +166,7 @@ next: if (i != 0) { if (i < 0) { ipstat.ips_tooshort++; + m = m0; goto bad; } if (i <= m->m_len) @@ -128,76 +180,108 @@ next: * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an * error was detected (causing an icmp message - * to be sent). + * to be sent and the original packet to be freed). */ - if (hlen > sizeof (struct ip) && ip_dooptions(ip)) + ip_nhops = 0; /* for source routed packets */ + if (hlen > sizeof (struct ip) && ip_dooptions(ip, ifp)) goto next; /* - * Fast check on the first internet - * interface in the list. + * Check our list of addresses, to see if the packet is for us. */ - if (ifinet) { - struct sockaddr_in *sin; + for (ia = in_ifaddr; ia; ia = ia->ia_next) { +#define satosin(sa) ((struct sockaddr_in *)(sa)) - sin = (struct sockaddr_in *)&ifinet->if_addr; - if (sin->sin_addr.s_addr == ip->ip_dst.s_addr) - goto ours; - sin = (struct sockaddr_in *)&ifinet->if_broadaddr; - if ((ifinet->if_flags & IFF_BROADCAST) && - sin->sin_addr.s_addr == ip->ip_dst.s_addr) + if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) goto ours; + if ( +#ifdef DIRECTED_BROADCAST + ia->ia_ifp == ifp && +#endif + (ia->ia_ifp->if_flags & IFF_BROADCAST)) { + u_long t; + + if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == + ip->ip_dst.s_addr) + goto ours; + if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr) + goto ours; + /* + * Look for all-0's host part (old broadcast addr), + * either for subnet or net. + */ + t = ntohl(ip->ip_dst.s_addr); + if (t == ia->ia_subnet) + goto ours; + if (t == ia->ia_net) + goto ours; + } } - ipaddr.sin_addr = ip->ip_dst; - if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) { - ip_forward(ip); - goto next; - } + if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) + goto ours; + if (ip->ip_dst.s_addr == INADDR_ANY) + goto ours; -ours: /* - * Look for queue of fragments - * of this datagram. + * Not for us; forward if possible and desirable. */ - for (fp = ipq.next; fp != &ipq; fp = fp->next) - if (ip->ip_id == fp->ipq_id && - ip->ip_src.s_addr == fp->ipq_src.s_addr && - ip->ip_dst.s_addr == fp->ipq_dst.s_addr && - ip->ip_p == fp->ipq_p) - goto found; - fp = 0; -found: + ip_forward(ip, ifp); + goto next; +ours: /* - * Adjust ip_len to not reflect header, - * set ip_mff if more fragments are expected, - * convert offset of this to bytes. + * If offset or IP_MF are set, must reassemble. + * Otherwise, nothing need be done. + * (We could look in the reassembly queue to see + * if the packet was previously fragmented, + * but it's not worth the time; just let them time out.) */ - ip->ip_len -= hlen; - ((struct ipasfrag *)ip)->ipf_mff = 0; - if (ip->ip_off & IP_MF) - ((struct ipasfrag *)ip)->ipf_mff = 1; - ip->ip_off <<= 3; + if (ip->ip_off &~ IP_DF) { + /* + * Look for queue of fragments + * of this datagram. + */ + for (fp = ipq.next; fp != &ipq; fp = fp->next) + if (ip->ip_id == fp->ipq_id && + ip->ip_src.s_addr == fp->ipq_src.s_addr && + ip->ip_dst.s_addr == fp->ipq_dst.s_addr && + ip->ip_p == fp->ipq_p) + goto found; + fp = 0; +found: - /* - * If datagram marked as having more fragments - * or if this is not the first fragment, - * attempt reassembly; if it succeeds, proceed. - */ - if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) { - ip = ip_reass((struct ipasfrag *)ip, fp); - if (ip == 0) - goto next; - hlen = ip->ip_hl << 2; - m = dtom(ip); + /* + * Adjust ip_len to not reflect header, + * set ip_mff if more fragments are expected, + * convert offset of this to bytes. + */ + ip->ip_len -= hlen; + ((struct ipasfrag *)ip)->ipf_mff = 0; + if (ip->ip_off & IP_MF) + ((struct ipasfrag *)ip)->ipf_mff = 1; + ip->ip_off <<= 3; + + /* + * If datagram marked as having more fragments + * or if this is not the first fragment, + * attempt reassembly; if it succeeds, proceed. + */ + if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) { + ipstat.ips_fragments++; + ip = ip_reass((struct ipasfrag *)ip, fp); + if (ip == 0) + goto next; + m = dtom(ip); + } else + if (fp) + ip_freef(fp); } else - if (fp) - (void) ip_freef(fp); + ip->ip_len -= hlen; /* * Switch out to protocol's input routine. */ - (*protosw[ip_protox[ip->ip_p]].pr_input)(m); + (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, ifp); goto next; bad: m_freem(m); @@ -220,7 +304,6 @@ ip_reass(ip, fp) struct mbuf *t; int hlen = ip->ip_hl << 2; int i, next; -COUNT(IP_REASS); /* * Presence of header sizes in mbufs @@ -233,9 +316,8 @@ COUNT(IP_REASS); * If first fragment to arrive, create a reassembly queue. */ if (fp == 0) { - if ((t = m_get(M_WAIT)) == NULL) + if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL) goto dropfrag; - t->m_off = MMINOFF; fp = mtod(t, struct ipq *); insque(fp, &ipq); fp->ipq_ttl = IPFRAGTTL; @@ -331,11 +413,12 @@ insert: remque(fp); (void) m_free(dtom(fp)); m = dtom(ip); - m->m_len += sizeof (struct ipasfrag); - m->m_off -= sizeof (struct ipasfrag); + m->m_len += (ip->ip_hl << 2); + m->m_off -= (ip->ip_hl << 2); return ((struct ip *)ip); dropfrag: + ipstat.ips_fragdropped++; m_freem(m); return (0); } @@ -344,21 +427,18 @@ dropfrag: * Free a fragment reassembly header and all * associated datagrams. */ -struct ipq * ip_freef(fp) struct ipq *fp; { - register struct ipasfrag *q; - struct mbuf *m; -COUNT(IP_FREEF); + register struct ipasfrag *q, *p; - for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) + for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) { + p = q->ipf_next; + ip_deq(q); m_freem(dtom(q)); - m = dtom(fp); - fp = fp->next; - remque(fp->prev); - (void) m_free(m); - return (fp); + } + remque(fp); + (void) m_free(dtom(fp)); } /* @@ -369,7 +449,6 @@ ip_enq(p, prev) register struct ipasfrag *p, *prev; { -COUNT(IP_ENQ); p->ipf_prev = prev; p->ipf_next = prev->ipf_next; prev->ipf_next->ipf_prev = p; @@ -383,7 +462,6 @@ ip_deq(p) register struct ipasfrag *p; { -COUNT(IP_DEQ); p->ipf_prev->ipf_next = p->ipf_next; p->ipf_next->ipf_prev = p->ipf_prev; } @@ -398,17 +476,19 @@ ip_slowtimo() register struct ipq *fp; int s = splnet(); -COUNT(IP_SLOWTIMO); fp = ipq.next; if (fp == 0) { splx(s); return; } - while (fp != &ipq) - if (--fp->ipq_ttl == 0) - fp = ip_freef(fp); - else - fp = fp->next; + while (fp != &ipq) { + --fp->ipq_ttl; + fp = fp->next; + if (fp->prev->ipq_ttl == 0) { + ipstat.ips_fragtimeout++; + ip_freef(fp->prev); + } + } splx(s); } @@ -418,37 +498,46 @@ COUNT(IP_SLOWTIMO); ip_drain() { -COUNT(IP_DRAIN); - while (ipq.next != &ipq) - (void) ip_freef(ipq.next); + while (ipq.next != &ipq) { + ipstat.ips_fragdropped++; + ip_freef(ipq.next); + } } +extern struct in_ifaddr *ifptoia(); +struct in_ifaddr *ip_rtaddr(); + /* * Do option processing on a datagram, * possibly discarding it if bad options * are encountered. */ -ip_dooptions(ip) - struct ip *ip; +ip_dooptions(ip, ifp) + register struct ip *ip; + struct ifnet *ifp; { register u_char *cp; - int opt, optlen, cnt, code, type; - struct in_addr *sin; + int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB; register struct ip_timestamp *ipt; - register struct ifnet *ifp; - struct in_addr t; + register struct in_ifaddr *ia; + struct in_addr *sin; + n_time ntime; -COUNT(IP_DOOPTIONS); cp = (u_char *)(ip + 1); cnt = (ip->ip_hl << 2) - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { - opt = cp[0]; + opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; - else - optlen = cp[1]; + else { + optlen = cp[IPOPT_OLEN]; + if (optlen <= 0 || optlen > cnt) { + code = &cp[IPOPT_OLEN] - (u_char *)ip; + goto bad; + } + } switch (opt) { default: @@ -464,30 +553,80 @@ COUNT(IP_DOOPTIONS); * address on directly accessible net. */ case IPOPT_LSRR: - if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1)) - break; - sin = (struct in_addr *)(cp + cp[2]); - ipaddr.sin_addr = *sin; - ifp = if_ifwithaddr((struct sockaddr *)&ipaddr); - type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL; - if (ifp == 0) { - if (opt == IPOPT_SSRR) + case IPOPT_SSRR: + if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { + code = &cp[IPOPT_OFFSET] - (u_char *)ip; + goto bad; + } + ipaddr.sin_addr = ip->ip_dst; + ia = (struct in_ifaddr *) + ifa_ifwithaddr((struct sockaddr *)&ipaddr); + if (ia == 0) { + if (opt == IPOPT_SSRR) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; goto bad; + } + /* + * Loose routing, and not at next destination + * yet; nothing to do except forward. + */ + break; + } + off--; /* 0 origin */ + if (off > optlen - sizeof(struct in_addr)) { + /* + * End of source route. Should be for us. + */ + save_rte(cp, ip->ip_src); break; } - t = ip->ip_dst; ip->ip_dst = *sin; *sin = t; - cp[2] += 4; - if (cp[2] > optlen - (sizeof (long) - 1)) + /* + * locate outgoing interface + */ + bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr, + sizeof(ipaddr.sin_addr)); + if ((opt == IPOPT_SSRR && + in_iaonnetof(in_netof(ipaddr.sin_addr)) == 0) || + (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; + goto bad; + } + ip->ip_dst = ipaddr.sin_addr; + bcopy((caddr_t)&(IA_SIN(ia)->sin_addr), + (caddr_t)(cp + off), sizeof(struct in_addr)); + cp[IPOPT_OFFSET] += sizeof(struct in_addr); + break; + + case IPOPT_RR: + if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { + code = &cp[IPOPT_OFFSET] - (u_char *)ip; + goto bad; + } + /* + * If no space remains, ignore. + */ + off--; /* 0 origin */ + if (off > optlen - sizeof(struct in_addr)) break; - ip->ip_dst = sin[1]; - if (opt == IPOPT_SSRR && - if_ifonnetof(ip->ip_dst.s_net) == 0) + bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr, + sizeof(ipaddr.sin_addr)); + /* + * locate outgoing interface + */ + if ((ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_HOST; goto bad; + } + bcopy((caddr_t)&(IA_SIN(ia)->sin_addr), + (caddr_t)(cp + off), sizeof(struct in_addr)); + cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; case IPOPT_TS: code = cp - (u_char *)ip; - type = ICMP_PARAMPROB; ipt = (struct ip_timestamp *)cp; if (ipt->ipt_len < 5) goto bad; @@ -496,42 +635,145 @@ COUNT(IP_DOOPTIONS); goto bad; break; } - sin = (struct in_addr *)(cp+cp[2]); + sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1); switch (ipt->ipt_flg) { case IPOPT_TS_TSONLY: break; case IPOPT_TS_TSANDADDR: - if (ipt->ipt_ptr + 8 > ipt->ipt_len) + if (ipt->ipt_ptr + sizeof(n_time) + + sizeof(struct in_addr) > ipt->ipt_len) goto bad; - if (ifinet == 0) - goto bad; /* ??? */ - *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr; + ia = ifptoia(ifp); + bcopy((caddr_t)&IA_SIN(ia)->sin_addr, + (caddr_t)sin, sizeof(struct in_addr)); + ipt->ipt_ptr += sizeof(struct in_addr); break; case IPOPT_TS_PRESPEC: - ipaddr.sin_addr = *sin; - if (!if_ifwithaddr((struct sockaddr *)&ipaddr)) - continue; - if (ipt->ipt_ptr + 8 > ipt->ipt_len) + if (ipt->ipt_ptr + sizeof(n_time) + + sizeof(struct in_addr) > ipt->ipt_len) goto bad; - ipt->ipt_ptr += 4; + bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr, + sizeof(struct in_addr)); + if (ifa_ifwithaddr((struct sockaddr *)&ipaddr) == 0) + continue; + ipt->ipt_ptr += sizeof(struct in_addr); break; default: goto bad; } - *(n_time *)sin = iptime(); - ipt->ipt_ptr += 4; + ntime = iptime(); + bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1, + sizeof(n_time)); + ipt->ipt_ptr += sizeof(n_time); } } return (0); bad: - icmp_error(ip, type, code); + icmp_error(ip, type, code, ifp); return (1); } +/* + * Given address of next destination (final or next hop), + * return internet address info of interface to be used to get there. + */ +struct in_ifaddr * +ip_rtaddr(dst) + struct in_addr dst; +{ + register struct sockaddr_in *sin; + register struct in_ifaddr *ia; + + sin = (struct sockaddr_in *) &ipforward_rt.ro_dst; + + if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) { + if (ipforward_rt.ro_rt) { + RTFREE(ipforward_rt.ro_rt); + ipforward_rt.ro_rt = 0; + } + sin->sin_family = AF_INET; + sin->sin_addr = dst; + + rtalloc(&ipforward_rt); + } + if (ipforward_rt.ro_rt == 0) + return ((struct in_ifaddr *)0); + /* + * Find address associated with outgoing interface. + */ + for (ia = in_ifaddr; ia; ia = ia->ia_next) + if (ia->ia_ifp == ipforward_rt.ro_rt->rt_ifp) + break; + return (ia); +} + +/* + * Save incoming source route for use in replies, + * to be picked up later by ip_srcroute if the receiver is interested. + */ +save_rte(option, dst) + u_char *option; + struct in_addr dst; +{ + unsigned olen; + extern ipprintfs; + + olen = option[IPOPT_OLEN]; + if (olen > sizeof(ip_srcrt) - 1) { + if (ipprintfs) + printf("save_rte: olen %d\n", olen); + return; + } + bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen); + ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); + ip_srcrt.route[ip_nhops++] = dst; +} + +/* + * Retrieve incoming source route for use in replies, + * in the same form used by setsockopt. + * The first hop is placed before the options, will be removed later. + */ +struct mbuf * +ip_srcroute() +{ + register struct in_addr *p, *q; + register struct mbuf *m; + + if (ip_nhops == 0) + return ((struct mbuf *)0); + m = m_get(M_DONTWAIT, MT_SOOPTS); + if (m == 0) + return ((struct mbuf *)0); + m->m_len = ip_nhops * sizeof(struct in_addr) + IPOPT_OFFSET + 1 + 1; + + /* + * First save first hop for return route + */ + p = &ip_srcrt.route[ip_nhops - 1]; + *(mtod(m, struct in_addr *)) = *p--; + + /* + * Copy option fields and padding (nop) to mbuf. + */ + ip_srcrt.nop = IPOPT_NOP; + bcopy((caddr_t)&ip_srcrt, mtod(m, caddr_t) + sizeof(struct in_addr), + IPOPT_OFFSET + 1 + 1); + q = (struct in_addr *)(mtod(m, caddr_t) + + sizeof(struct in_addr) + IPOPT_OFFSET + 1 + 1); + /* + * Record return path as an IP source route, + * reversing the path (pointers are now aligned). + */ + while (p >= ip_srcrt.route) + *q++ = *p--; + return (m); +} + /* * Strip out IP options, at higher * level protocol in the kernel. @@ -544,109 +786,195 @@ ip_stripoptions(ip, mopt) { register int i; register struct mbuf *m; + register caddr_t opts; int olen; -COUNT(IP_STRIPOPTIONS); olen = (ip->ip_hl<<2) - sizeof (struct ip); m = dtom(ip); - ip++; + opts = (caddr_t)(ip + 1); if (mopt) { mopt->m_len = olen; mopt->m_off = MMINOFF; - bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen); + bcopy(opts, mtod(mopt, caddr_t), (unsigned)olen); } i = m->m_len - (sizeof (struct ip) + olen); - bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i); + bcopy(opts + olen, opts, (unsigned)i); m->m_len -= olen; + ip->ip_hl = sizeof(struct ip) >> 2; } -u_char inetctlerrmap[] = { - ECONNABORTED, ECONNABORTED, 0, 0, - 0, -#ifdef notdef - EHOSTUNREACH, EHOSTDOWN, ENETUNREACH, EHOSTUNREACH, -#else - ENETUNREACH, ENETUNREACH, ENETUNREACH, ENETUNREACH, -#endif - ECONNREFUSED, ECONNREFUSED, EMSGSIZE, 0, - 0, 0, 0, 0 +u_char inetctlerrmap[PRC_NCMDS] = { + 0, 0, 0, 0, + 0, 0, EHOSTDOWN, EHOSTUNREACH, + ENETUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, + EMSGSIZE, EHOSTUNREACH, 0, 0, + 0, 0, 0, 0, + ENOPROTOOPT }; -ip_ctlinput(cmd, arg) - int cmd; - caddr_t arg; -{ - struct in_addr *sin; - int tcp_abort(), udp_abort(); - extern struct inpcb tcb, udb; - - if (cmd < 0 || cmd > PRC_NCMDS) - return; - if (inetctlerrmap[cmd] == 0) - return; /* XXX */ - if (cmd == PRC_IFDOWN) - sin = &((struct sockaddr_in *)arg)->sin_addr; - else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH) - sin = (struct in_addr *)arg; - else - sin = &((struct icmp *)arg)->icmp_ip.ip_dst; - in_pcbnotify(&tcb, sin, inetctlerrmap[cmd], tcp_abort); - in_pcbnotify(&udb, sin, inetctlerrmap[cmd], udp_abort); -} - +#ifndef IPFORWARDING +#define IPFORWARDING 1 +#endif +#ifndef IPSENDREDIRECTS +#define IPSENDREDIRECTS 1 +#endif int ipprintfs = 0; -int ipforwarding = 1; +int ipforwarding = IPFORWARDING; +extern int in_interfaces; +int ipsendredirects = IPSENDREDIRECTS; + /* * Forward a packet. If some error occurs return the sender - * and icmp packet. Note we can't always generate a meaningful - * icmp message because icmp doesn't have a large enough repetoire + * an icmp packet. Note we can't always generate a meaningful + * icmp message because icmp doesn't have a large enough repertoire * of codes and types. + * + * If not forwarding (possibly because we have only a single external + * network), just drop the packet. This could be confusing if ipforwarding + * was zero but some routing protocol was advancing us as a gateway + * to somewhere. However, we must let the routing protocol deal with that. */ -ip_forward(ip) +ip_forward(ip, ifp) register struct ip *ip; + struct ifnet *ifp; { - register int error, type, code; - struct mbuf *mopt; + register int error, type = 0, code; + register struct sockaddr_in *sin; + struct mbuf *mcopy; + struct in_addr dest; + dest.s_addr = 0; if (ipprintfs) printf("forward: src %x dst %x ttl %x\n", ip->ip_src, ip->ip_dst, ip->ip_ttl); - if (ipforwarding == 0) { - /* can't tell difference between net and host */ + ip->ip_id = htons(ip->ip_id); + if (ipforwarding == 0 || in_interfaces <= 1) { + ipstat.ips_cantforward++; +#ifdef GATEWAY type = ICMP_UNREACH, code = ICMP_UNREACH_NET; goto sendicmp; +#else + m_freem(dtom(ip)); + return; +#endif } - if (ip->ip_ttl < IPTTLDEC) { + if (ip->ip_ttl <= IPTTLDEC) { type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS; goto sendicmp; } ip->ip_ttl -= IPTTLDEC; - mopt = m_get(M_DONTWAIT); - if (mopt == 0) { - m_freem(dtom(ip)); - return; - } - ip_stripoptions(ip, mopt); - /* last 0 here means no directed broadcast */ - if ((error = ip_output(dtom(ip), mopt, 0, 0)) == 0) - return; -#ifdef notdef /* - * Want to generate a message, but lower - * layers assume they can free up a message - * in the event of an error. This causes - * the call to icmp_error to work on ``freed'' - * mbuf's, and worse. + * Save at most 64 bytes of the packet in case + * we need to generate an ICMP message to the src. + */ + mcopy = m_copy(dtom(ip), 0, imin((int)ip->ip_len, 64)); + + sin = (struct sockaddr_in *)&ipforward_rt.ro_dst; + if (ipforward_rt.ro_rt == 0 || + ip->ip_dst.s_addr != sin->sin_addr.s_addr) { + if (ipforward_rt.ro_rt) { + RTFREE(ipforward_rt.ro_rt); + ipforward_rt.ro_rt = 0; + } + sin->sin_family = AF_INET; + sin->sin_addr = ip->ip_dst; + + rtalloc(&ipforward_rt); + } + /* + * If forwarding packet using same interface that it came in on, + * perhaps should send a redirect to sender to shortcut a hop. + * Only send redirect if source is sending directly to us, + * and if packet was not source routed (or has any options). + * Also, don't send redirect if forwarding using a default route + * or a route modfied by a redirect. */ - type = ICMP_UNREACH, code = 0; /* need ``undefined'' */ - if (error == ENETUNREACH || error == ENETDOWN) - code = ICMP_UNREACH_NET; - else if (error == EMSGSIZE) +#define satosin(sa) ((struct sockaddr_in *)(sa)) + if (ipforward_rt.ro_rt && ipforward_rt.ro_rt->rt_ifp == ifp && + (ipforward_rt.ro_rt->rt_flags & RTF_DYNAMIC) == 0 && + satosin(&ipforward_rt.ro_rt->rt_dst)->sin_addr.s_addr != 0 && + ipsendredirects && ip->ip_hl == (sizeof(struct ip) >> 2)) { + struct in_ifaddr *ia; + u_long src = ntohl(ip->ip_src.s_addr); + u_long dst = ntohl(ip->ip_dst.s_addr); + + if ((ia = ifptoia(ifp)) && + (src & ia->ia_subnetmask) == ia->ia_subnet) { + if (ipforward_rt.ro_rt->rt_flags & RTF_GATEWAY) + dest = satosin(&ipforward_rt.ro_rt->rt_gateway)->sin_addr; + else + dest = ip->ip_dst; + /* + * If the destination is reached by a route to host, + * is on a subnet of a local net, or is directly + * on the attached net (!), use host redirect. + * (We may be the correct first hop for other subnets.) + */ + type = ICMP_REDIRECT; + code = ICMP_REDIRECT_NET; + if ((ipforward_rt.ro_rt->rt_flags & RTF_HOST) || + (ipforward_rt.ro_rt->rt_flags & RTF_GATEWAY) == 0) + code = ICMP_REDIRECT_HOST; + else for (ia = in_ifaddr; ia = ia->ia_next; ) + if ((dst & ia->ia_netmask) == ia->ia_net) { + if (ia->ia_subnetmask != ia->ia_netmask) + code = ICMP_REDIRECT_HOST; + break; + } + if (ipprintfs) + printf("redirect (%d) to %x\n", code, dest); + } + } + + error = ip_output(dtom(ip), (struct mbuf *)0, &ipforward_rt, + IP_FORWARDING); + if (error) + ipstat.ips_cantforward++; + else if (type) + ipstat.ips_redirectsent++; + else { + if (mcopy) + m_freem(mcopy); + ipstat.ips_forward++; + return; + } + if (mcopy == NULL) + return; + ip = mtod(mcopy, struct ip *); + type = ICMP_UNREACH; + switch (error) { + + case 0: /* forwarded, but need redirect */ + type = ICMP_REDIRECT; + /* code set above */ + break; + + case ENETUNREACH: + case ENETDOWN: + if (in_localaddr(ip->ip_dst)) + code = ICMP_UNREACH_HOST; + else + code = ICMP_UNREACH_NET; + break; + + case EMSGSIZE: code = ICMP_UNREACH_NEEDFRAG; -#else - return; -#endif + break; + + case EPERM: + code = ICMP_UNREACH_PORT; + break; + + case ENOBUFS: + type = ICMP_SOURCEQUENCH; + break; + + case EHOSTDOWN: + case EHOSTUNREACH: + code = ICMP_UNREACH_HOST; + break; + } sendicmp: - icmp_error(ip, type, code); + icmp_error(ip, type, code, ifp, dest); }