collect more statistics; add sanity check to avoid bad icmp packets
[unix-history] / usr / src / sys / netinet / ip_input.c
index cf55af6..37f7c27 100644 (file)
-/* ip_input.c 1.4 81/10/18 */
+/*     ip_input.c      1.65    83/02/23        */
+
 #include "../h/param.h"
 #include "../h/param.h"
-#include "../bbnnet/net.h"
-#include "../bbnnet/tcp.h"
-#include "../bbnnet/ip.h"
-#include "../bbnnet/ucb.h"
 #include "../h/systm.h"
 #include "../h/systm.h"
+#include "../h/mbuf.h"
+#include "../h/domain.h"
+#include "../h/protosw.h"
+#include "../h/socket.h"
+#include "../h/errno.h"
+#include "../h/time.h"
+#include "../h/kernel.h"
+
+#include "../net/if.h"
+#include "../net/route.h"
+
+#include "../netinet/in.h"
+#include "../netinet/in_pcb.h"
+#include "../netinet/in_systm.h"
+#include "../netinet/ip.h"
+#include "../netinet/ip_var.h"
+#include "../netinet/ip_icmp.h"
+#include "../netinet/tcp.h"
+
+u_char ip_protox[IPPROTO_MAX];
+int    ipqmaxlen = IFQ_MAXLEN;
+struct ifnet *ifinet;                  /* first inet interface */
+
+/*
+ * IP initialization: fill in IP protocol switch table.
+ * All protocols not implemented in kernel go to raw IP protocol handler.
+ */
+ip_init()
+{
+       register struct protosw *pr;
+       register int i;
+
+       pr = pffindproto(PF_INET, IPPROTO_RAW);
+       if (pr == 0)
+               panic("ip_init");
+       for (i = 0; i < IPPROTO_MAX; i++)
+               ip_protox[i] = pr - inetsw;
+       for (pr = inetdomain.dom_protosw;
+           pr <= inetdomain.dom_protoswNPROTOSW; pr++)
+               if (pr->pr_family == PF_INET &&
+                   pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
+                       ip_protox[pr->pr_protocol] = pr - inetsw;
+       ipq.next = ipq.prev = &ipq;
+       ip_id = time.tv_sec & 0xffff;
+       ipintrq.ifq_maxlen = ipqmaxlen;
+       ifinet = if_ifwithaf(AF_INET);
+}
 
 
-int nosum = 0;
+u_char ipcksum = 1;
+struct ip *ip_reass();
+struct sockaddr_in ipaddr = { AF_INET };
 
 
-ip_input(mp)
-struct mbuf *mp;
+/*
+ * Ip input routine.  Checksum and byte swap header.  If fragmented
+ * try to reassamble.  If complete and fragment queue exists, discard.
+ * Process options.  Pass to next level.
+ */
+ipintr()
 {
 {
-       register struct ip *ip, *q;
-       register struct ipq *fp;
+       register struct ip *ip;
        register struct mbuf *m;
        register struct mbuf *m;
-       register i;
-       struct mbuf *n;
-       int hlen;
-       struct ip *p, *savq;
-       struct ipq *ip_findf();
-
-COUNT(IP_INPUT);
-       ip = (struct ip *)((int)mp + mp->m_off);        /* ->ip hdr */
-
-       /* make sure header does not overflow mbuf */
+       struct mbuf *m0;
+       register int i;
+       register struct ipq *fp;
+       int hlen, s;
 
 
-       if ((hlen = ip->ip_hl << 2) > mp->m_len) {
-               printf("ip header overflow\n");
-               m_freem(mp);
+next:
+       /*
+        * Get next datagram off input queue and get IP header
+        * in first mbuf.
+        */
+       s = splimp();
+       IF_DEQUEUE(&ipintrq, m);
+       splx(s);
+       if (m == 0)
                return;
                return;
+       if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
+           (m = m_pullup(m, sizeof (struct ip))) == 0) {
+               ipstat.ips_toosmall++;
+               goto next;
        }
        }
-
-       i = (unsigned short)ip->ip_sum;
-       ip->ip_sum = 0;
-
-       if (i != (unsigned short)cksum(mp, hlen)) {     /* verify checksum */
-               netstat.ip_badsum++;
-               if (!nosum) {
-                       m_freem(mp);
-                       return;
+       ip = mtod(m, struct ip *);
+       if ((hlen = ip->ip_hl << 2) > m->m_len) {
+               if ((m = m_pullup(m, hlen)) == 0) {
+                       ipstat.ips_badhlen++;
+                       goto next;
                }
                }
+               ip = mtod(m, struct ip *);
        }
        }
-       ip_bswap(ip);
-       fp = netcb.n_ip_head ? ip_findf(ip) : 0;
+       if (ipcksum)
+               if (ip->ip_sum = in_cksum(m, hlen)) {
+                       ipstat.ips_badsum++;
+                       goto bad;
+               }
 
        /*
 
        /*
-        * adjust message length to remove any padding
+        * Convert fields to host representation.
         */
         */
-       for (i=0, m=mp; m != NULL; m = m->m_next) {
-               i += m->m_len;
-               n = m;
+       ip->ip_len = ntohs((u_short)ip->ip_len);
+       if (ip->ip_len < hlen) {
+               ipstat.ips_badlen++;
+               goto bad;
        }
        }
-       i -= ip->ip_len;
-
-       if (i != 0)
-               if (i > (int)n->m_len)
-                       m_adj(mp, -i);
-               else
-                       n->m_len -= i;
+       ip->ip_id = ntohs(ip->ip_id);
+       ip->ip_off = ntohs((u_short)ip->ip_off);
 
 
-       ip->ip_len -= hlen;     /* length of data */
-       ip->ip_mff = ((ip->ip_off & ip_mf) ? TRUE : FALSE);
-       ip->ip_off <<= 3;
-       if (ip->ip_mff || ip->ip_off)
-               goto fragged;
-       if (fp != NULL) {
-               q = fp->iqx.ip_next;
-               while (q != (struct ip *)fp) {
-                       m_freem(dtom(q));
-                       q = q->ip_next;
-               }
-               ip_freef(fp);           /* free header */
+       /*
+        * Check that the amount of data in the buffers
+        * is as at least much as the IP header would have us expect.
+        * Trim mbufs if longer than we expect.
+        * Drop packet if shorter than we expect.
+        */
+       i = -ip->ip_len;
+       m0 = m;
+       for (;;) {
+               i += m->m_len;
+               if (m->m_next == 0)
+                       break;
+               m = m->m_next;
        }
        }
-       if (hlen > sizeof (struct ip))
-               ip_opt(ip, hlen);
-       switch (ip->ip_p) {
-
-       case TCPROTO:
-               tcp_input(mp);
-               break;
-
-       default:
-               raw_input(mp, ip->ip_p, UIP);
-               break;
+       if (i != 0) {
+               if (i < 0) {
+                       ipstat.ips_tooshort++;
+                       goto bad;
+               }
+               if (i <= m->m_len)
+                       m->m_len -= i;
+               else
+                       m_adj(m0, -i);
        }
        }
-       return;
+       m = m0;
 
 
-fragged:
-       /* -> msg buf beyond ip hdr if not first fragment */
+       /*
+        * Process options and, if not destined for us,
+        * ship it on.  ip_dooptions returns 1 when an
+        * error was detected (causing an icmp message
+        * to be sent).
+        */
+       if (hlen > sizeof (struct ip) && ip_dooptions(ip))
+               goto next;
 
 
-       if (ip->ip_off != 0) {
-               mp->m_off += hlen;
-               mp->m_len -= hlen;
+       /*
+        * Fast check on the first internet
+        * interface in the list.
+        */
+       if (ifinet) {
+               struct sockaddr_in *sin;
+
+               sin = (struct sockaddr_in *)&ifinet->if_addr;
+               if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
+                       goto ours;
+               sin = (struct sockaddr_in *)&ifinet->if_broadaddr;
+               if ((ifinet->if_flags & IFF_BROADCAST) &&
+                   sin->sin_addr.s_addr == ip->ip_dst.s_addr)
+                       goto ours;
        }
        }
-
-       if (fp == NULL) {               /* first fragment of datagram in */
-
-       /* set up reass.q header: enq it, set up as head of frag
-          chain, set a timer value, and move in ip header */
-
-               if ((m = m_get(1)) == NULL) {   /* allocate an mbuf */
-                       m_freem(mp);
-                       return;
-               }
-
-               fp = (struct ipq *)((int)m + MHEAD);
-               fp->iqx.ip_next = fp->iqx.ip_prev = (struct ip *)fp;
-               bcopy(ip, &fp->iqh, min(MLEN-28, hlen));
-               fp->iqh.ip_ttl = MAXTTL;
-               fp->iq_next = NULL;
-               fp->iq_prev = netcb.n_ip_tail;
-               if (netcb.n_ip_head != NULL)
-                       netcb.n_ip_tail->iq_next = fp;
-               else
-                       netcb.n_ip_head = fp;
-               netcb.n_ip_tail = fp;
+/* BEGIN GROT */
+#include "nd.h"
+#if NND > 0
+       /*
+        * Diskless machines don't initially know
+        * their address, so take packets from them
+        * if we're acting as a network disk server.
+        */
+       if (ip->ip_dst.s_addr == INADDR_ANY &&
+           (in_netof(ip->ip_src) == INADDR_ANY &&
+            in_lnaof(ip->ip_src) != INADDR_ANY))
+               goto ours;
+#endif
+/* END GROT */
+       ipaddr.sin_addr = ip->ip_dst;
+       if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
+               ip_forward(ip);
+               goto next;
        }
 
        }
 
-       /***********************************************************
-       *                                                          *
-       *              merge fragment into reass.q                 *
-       *    algorithm:   match  start  and  end  bytes  of new    *
-       *    fragment  with  fragments  on  the  queue.   if   no  *
-       *    overlaps  are  found,  add  new  frag. to the queue.  *
-       *    otherwise, adjust start and end of new frag.  so  no  *
-       *    overlap   and   add  remainder  to  queue.   if  any  *
-       *    fragments are completely covered by the new one,  or  *
-       *    if  the  new  one is completely duplicated, free the  *
-       *    fragments.                                            *
-       *                                                          *
-       ***********************************************************/
-
-       q = fp->iqx.ip_next;    /* -> top of reass. chain */
-       ip->ip_end = ip->ip_off + ip->ip_len - 1;
-
-       /* skip frags which new doesn't overlap at end */
-
-       while ((q != (struct ip *)fp) && (ip->ip_off > q->ip_end))
-               q = q->ip_next;
-
-       if (q == (struct ip *)fp)               /* frag at end of chain */
-               ip_enq(ip, fp->iqx.ip_prev);
-       
-       else {
-               if (ip->ip_end < q->ip_off)     /* frag doesn't overlap any on chain */
-                       ip_enq(ip, q->ip_prev);
-
-               /* new overlaps beginning of next frag only */
-
-               else if (ip->ip_end < q->ip_end) {
-                       if ((i = ip->ip_end - q->ip_off + 1) < ip->ip_len) {
-                               ip->ip_len -= i;
-                               ip->ip_end -= i;
-                               m_adj(mp, -i);
-                               ip_enq(ip, q->ip_prev);
-                       } else
-                               m_freem(mp);
-
-               /* new overlaps end of previous frag */
-
-               } else {
-
-                       savq = q;
-                       if (ip->ip_off <= q->ip_off) {  /* complete cover */
-                               savq = q->ip_prev;
-                               ip_deq(q);
-                               m_freem(dtom(q));
-                       
-                       } else {                        /* overlap */
-                               if ((i = q->ip_end - ip->ip_off + 1) < ip->ip_len) {
-                                       ip->ip_off += i;
-                                       ip->ip_len -= i;
-                                       m_adj(mp, i);
-                               } else
-                                       ip->ip_len = 0;
-                       }
+ours:
+       /*
+        * Look for queue of fragments
+        * of this datagram.
+        */
+       for (fp = ipq.next; fp != &ipq; fp = fp->next)
+               if (ip->ip_id == fp->ipq_id &&
+                   ip->ip_src.s_addr == fp->ipq_src.s_addr &&
+                   ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
+                   ip->ip_p == fp->ipq_p)
+                       goto found;
+       fp = 0;
+found:
 
 
-               /* new overlaps at beginning of successor frags */
+       /*
+        * Adjust ip_len to not reflect header,
+        * set ip_mff if more fragments are expected,
+        * convert offset of this to bytes.
+        */
+       ip->ip_len -= hlen;
+       ((struct ipasfrag *)ip)->ipf_mff = 0;
+       if (ip->ip_off & IP_MF)
+               ((struct ipasfrag *)ip)->ipf_mff = 1;
+       ip->ip_off <<= 3;
 
 
-                       q = savq->ip_next;
-                       while ((q != (struct ip *)fp) && (ip->ip_len != 0) &&
-                               (q->ip_off < ip->ip_end))
+       /*
+        * If datagram marked as having more fragments
+        * or if this is not the first fragment,
+        * attempt reassembly; if it succeeds, proceed.
+        */
+       if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
+               ip = ip_reass((struct ipasfrag *)ip, fp);
+               if (ip == 0)
+                       goto next;
+               hlen = ip->ip_hl << 2;
+               m = dtom(ip);
+       } else
+               if (fp)
+                       ip_freef(fp);
 
 
-                               /* complete cover */
+       /*
+        * Switch out to protocol's input routine.
+        */
+       (*inetsw[ip_protox[ip->ip_p]].pr_input)(m);
+       goto next;
+bad:
+       m_freem(m);
+       goto next;
+}
 
 
-                               if (q->ip_end <= ip->ip_end) {
-                                       p = q->ip_next;
-                                       ip_deq(q);
-                                       m_freem(dtom(q));
-                                       q = p;
+/*
+ * Take incoming datagram fragment and try to
+ * reassemble it into whole datagram.  If a chain for
+ * reassembly of this datagram already exists, then it
+ * is given as fp; otherwise have to make a chain.
+ */
+struct ip *
+ip_reass(ip, fp)
+       register struct ipasfrag *ip;
+       register struct ipq *fp;
+{
+       register struct mbuf *m = dtom(ip);
+       register struct ipasfrag *q;
+       struct mbuf *t;
+       int hlen = ip->ip_hl << 2;
+       int i, next;
 
 
-                               } else {        /* overlap */
+       /*
+        * Presence of header sizes in mbufs
+        * would confuse code below.
+        */
+       m->m_off += hlen;
+       m->m_len -= hlen;
 
 
-                                       if ((i = ip->ip_end - q->ip_off + 1) < ip->ip_len) {
-                                               ip->ip_len -= i;
-                                               ip->ip_end -= i;
-                                               m_adj(mp, -i);
-                                       } else
-                                               ip->ip_len = 0;
-                                       break;
-                               }
+       /*
+        * If first fragment to arrive, create a reassembly queue.
+        */
+       if (fp == 0) {
+               if ((t = m_get(M_WAIT, MT_FTABLE)) == NULL)
+                       goto dropfrag;
+               fp = mtod(t, struct ipq *);
+               insque(fp, &ipq);
+               fp->ipq_ttl = IPFRAGTTL;
+               fp->ipq_p = ip->ip_p;
+               fp->ipq_id = ip->ip_id;
+               fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
+               fp->ipq_src = ((struct ip *)ip)->ip_src;
+               fp->ipq_dst = ((struct ip *)ip)->ip_dst;
+               q = (struct ipasfrag *)fp;
+               goto insert;
+       }
 
 
-               /* enqueue whatever is left of new before successors */
+       /*
+        * Find a segment which begins after this one does.
+        */
+       for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
+               if (q->ip_off > ip->ip_off)
+                       break;
 
 
-                       if (ip->ip_len != 0)
-                               ip_enq(ip, savq);
-                       else
-                               m_freem(mp);
+       /*
+        * If there is a preceding segment, it may provide some of
+        * our data already.  If so, drop the data from the incoming
+        * segment.  If it provides all of our data, drop us.
+        */
+       if (q->ipf_prev != (struct ipasfrag *)fp) {
+               i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
+               if (i > 0) {
+                       if (i >= ip->ip_len)
+                               goto dropfrag;
+                       m_adj(dtom(ip), i);
+                       ip->ip_off += i;
+                       ip->ip_len -= i;
                }
        }
 
                }
        }
 
-       /* check for completed fragment reassembly */
-
-       if ((i = ip_done(fp)) == 0)
-               return;
+       /*
+        * While we overlap succeeding segments trim them or,
+        * if they are completely covered, dequeue them.
+        */
+       while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
+               i = (ip->ip_off + ip->ip_len) - q->ip_off;
+               if (i < q->ip_len) {
+                       q->ip_len -= i;
+                       q->ip_off += i;
+                       m_adj(dtom(q), i);
+                       break;
+               }
+               q = q->ipf_next;
+               m_freem(dtom(q->ipf_prev));
+               ip_deq(q->ipf_prev);
+       }
 
 
-       p = fp->iqx.ip_next;            /* -> top mbuf */
-       m = dtom(p);
-       p->ip_len = i;                  /* total data length */
-       ip_opt(p, p->ip_hl << 2);       /* option processing */
-       ip_mergef(fp);                  /* cleanup frag chain */
+insert:
+       /*
+        * Stick new segment in its place;
+        * check for complete reassembly.
+        */
+       ip_enq(ip, q->ipf_prev);
+       next = 0;
+       for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
+               if (q->ip_off != next)
+                       return (0);
+               next += q->ip_len;
+       }
+       if (q->ipf_prev->ipf_mff)
+               return (0);
 
 
-       /* copy src/dst internet address to header mbuf */
+       /*
+        * Reassembly is complete; concatenate fragments.
+        */
+       q = fp->ipq_next;
+       m = dtom(q);
+       t = m->m_next;
+       m->m_next = 0;
+       m_cat(m, t);
+       q = q->ipf_next;
+       while (q != (struct ipasfrag *)fp) {
+               t = dtom(q);
+               q = q->ipf_next;
+               m_cat(m, t);
+       }
 
 
-       bcopy(&fp->iqh.ip_src, &p->ip_src, 2*sizeof(struct socket));
-       ip_freef(fp);                   /* dequeue header */
-       i = p->ip_p;
-       if (i == TCPROTO)
-               tcp_input(m);
-       else
-               raw_input(m, i, UIP);
+       /*
+        * Create header for new ip packet by
+        * modifying header of first packet;
+        * dequeue and discard fragment reassembly header.
+        * Make header visible.
+        */
+       ip = fp->ipq_next;
+       ip->ip_len = next;
+       ((struct ip *)ip)->ip_src = fp->ipq_src;
+       ((struct ip *)ip)->ip_dst = fp->ipq_dst;
+       remque(fp);
+       (void) m_free(dtom(fp));
+       m = dtom(ip);
+       m->m_len += sizeof (struct ipasfrag);
+       m->m_off -= sizeof (struct ipasfrag);
+       return ((struct ip *)ip);
+
+dropfrag:
+       m_freem(m);
+       return (0);
 }
 
 }
 
-ip_done(p)
-       register struct ip *p;
+/*
+ * Free a fragment reassembly header and all
+ * associated datagrams.
+ */
+ip_freef(fp)
+       struct ipq *fp;
 {
 {
-       register struct ip *q;
-       register next;
-
-COUNT(IP_DONE);
-       q = p->ip_next;
+       register struct ipasfrag *q, *p;
 
 
-       if (q->ip_off != 0)
-               return(0);
-       do {
-               next = q->ip_end + 1;
-               q = q->ip_next;
-       } while ((q != p) && (q->ip_off == next));
-
-       if ((q == p) && !(q->ip_prev->ip_mff))        /* all fragments in */
-               return(next);                         /* total data length */
-       else
-               return(0);
+       for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
+               p = q->ipf_next;
+               ip_deq(q);
+               m_freem(dtom(q));
+       }
+       remque(fp);
+       (void) m_free(dtom(fp));
 }
 
 }
 
-ip_mergef(p)    /* merge mbufs of fragments of completed datagram */
-register struct ip *p;
+/*
+ * Put an ip fragment on a reassembly chain.
+ * Like insque, but pointers in middle of structure.
+ */
+ip_enq(p, prev)
+       register struct ipasfrag *p, *prev;
 {
 {
-       register struct mbuf *m, *n;
-       register struct ip *q;
-       int dummy;
-
-COUNT(IP_MERGEF);
-       q = p->ip_next;                         /* -> bottom of reass chain */
-       n = (struct mbuf *)&dummy;              /* dummy for init assignment */
-
-       while (q != p) {        /* through chain */
-
-               n->m_next = m = dtom(q);
-               while (m != NULL)
-                       if (m->m_len != 0) {
-                               n = m;
-                               m = m->m_next;
-                       } else                  /* free null mbufs */
-                               n->m_next = m = m_free(m);
-               q = q->ip_next;
-       }
-}
 
 
+       p->ipf_prev = prev;
+       p->ipf_next = prev->ipf_next;
+       prev->ipf_next->ipf_prev = p;
+       prev->ipf_next = p;
+}
 
 
-ip_freef(fp)           /* deq and free reass.q header */
-register struct ipq *fp;
+/*
+ * To ip_enq as remque is to insque.
+ */
+ip_deq(p)
+       register struct ipasfrag *p;
 {
 {
-COUNT(IP_FREEF);
-       if (fp->iq_prev != NULL)
-               (fp->iq_prev)->iq_next = fp->iq_next;
-       else
-               netcb.n_ip_head = fp->iq_next;
-       if (fp->iq_next != NULL)
-               (fp->iq_next)->iq_prev = fp->iq_prev;
-       else
-               netcb.n_ip_tail = fp->iq_prev;
-       m_free(dtom(fp));
+
+       p->ipf_prev->ipf_next = p->ipf_next;
+       p->ipf_next->ipf_prev = p->ipf_prev;
 }
 
 }
 
-struct ipq *
-ip_findf(p)         /* does fragment reass chain w/this hdr exist? */
-register struct ip *p;
+/*
+ * IP timer processing;
+ * if a timer expires on a reassembly
+ * queue, discard it.
+ */
+ip_slowtimo()
 {
        register struct ipq *fp;
 {
        register struct ipq *fp;
+       int s = splnet();
 
 
-COUNT(IP_FINDF);
-       for (fp = netcb.n_ip_head; (fp != NULL && (
-                       p->ip_src.s_addr != fp->iqh.ip_src.s_addr ||
-                       p->ip_dst.s_addr != fp->iqh.ip_dst.s_addr ||
-                       p->ip_id != fp->iqh.ip_id ||
-                       p->ip_p != fp->iqh.ip_p)); fp = fp->iq_next);
-       return(fp);
+       fp = ipq.next;
+       if (fp == 0) {
+               splx(s);
+               return;
+       }
+       while (fp != &ipq) {
+               --fp->ipq_ttl;
+               fp = fp->next;
+               if (fp->prev->ipq_ttl == 0)
+                       ip_freef(fp->prev);
+       }
+       splx(s);
 }
 
 }
 
-ip_opt(ip, hlen)        /* process ip options */
-struct ip *ip;
-int hlen;
+/*
+ * Drain off all datagram fragments.
+ */
+ip_drain()
 {
 {
-       register char *p, *q;
-       register i, len;
-       register struct mbuf *m;
 
 
-COUNT(IP_OPT);
-       p = q = (char *)((int)ip + sizeof(struct ip));  /* -> at options */
-       
-       if ((i = hlen - sizeof(struct ip)) > 0) {       /* any options */
+       while (ipq.next != &ipq)
+               ip_freef(ipq.next);
+}
 
 
-/*      *** IP OPTION PROCESSING ***
+/*
+ * Do option processing on a datagram,
+ * possibly discarding it if bad options
+ * are encountered.
+ */
+ip_dooptions(ip)
+       struct ip *ip;
+{
+       register u_char *cp;
+       int opt, optlen, cnt, code, type;
+       struct in_addr *sin;
+       register struct ip_timestamp *ipt;
+       register struct ifnet *ifp;
+       struct in_addr t;
+
+       cp = (u_char *)(ip + 1);
+       cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+       for (; cnt > 0; cnt -= optlen, cp += optlen) {
+               opt = cp[0];
+               if (opt == IPOPT_EOL)
+                       break;
+               if (opt == IPOPT_NOP)
+                       optlen = 1;
+               else
+                       optlen = cp[1];
+               switch (opt) {
+
+               default:
+                       break;
+
+               /*
+                * Source routing with record.
+                * Find interface with current destination address.
+                * If none on this machine then drop if strictly routed,
+                * or do nothing if loosely routed.
+                * Record interface address and bring up next address
+                * component.  If strictly routed make sure next
+                * address on directly accessible net.
+                */
+               case IPOPT_LSRR:
+               case IPOPT_SSRR:
+                       if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
+                               break;
+                       sin = (struct in_addr *)(cp + cp[2]);
+                       ipaddr.sin_addr = *sin;
+                       ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
+                       type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL;
+                       if (ifp == 0) {
+                               if (opt == IPOPT_SSRR)
+                                       goto bad;
+                               break;
+                       }
+                       t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
+                       cp[2] += 4;
+                       if (cp[2] > optlen - (sizeof (long) - 1))
+                               break;
+                       ip->ip_dst = sin[1];
+                       if (opt == IPOPT_SSRR &&
+                           if_ifonnetof(in_netof(ip->ip_dst)) == 0)
+                               goto bad;
+                       break;
+
+               case IPOPT_TS:
+                       code = cp - (u_char *)ip;
+                       type = ICMP_PARAMPROB;
+                       ipt = (struct ip_timestamp *)cp;
+                       if (ipt->ipt_len < 5)
+                               goto bad;
+                       if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
+                               if (++ipt->ipt_oflw == 0)
+                                       goto bad;
+                               break;
+                       }
+                       sin = (struct in_addr *)(cp+cp[2]);
+                       switch (ipt->ipt_flg) {
 
 
-               while (i > 0)
+                       case IPOPT_TS_TSONLY:
+                               break;
 
 
-                       switch (*q++) {
-                       case 0:
-                       case 1:
-                               i--;
+                       case IPOPT_TS_TSANDADDR:
+                               if (ipt->ipt_ptr + 8 > ipt->ipt_len)
+                                       goto bad;
+                               if (ifinet == 0)
+                                       goto bad;       /* ??? */
+                               *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
+                               break;
+
+                       case IPOPT_TS_PRESPEC:
+                               ipaddr.sin_addr = *sin;
+                               if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
+                                       continue;
+                               if (ipt->ipt_ptr + 8 > ipt->ipt_len)
+                                       goto bad;
+                               ipt->ipt_ptr += 4;
                                break;
 
                        default:
                                break;
 
                        default:
-                               i -= *q;
-                               q += *q;
+                               goto bad;
                        }
                        }
-*/              q += i;
-               m = dtom(q);
-               len = (int)m + m->m_off + m->m_len - (int)q;
-               bcopy((caddr_t)q, (caddr_t)p, len);    /* remove options */
-               m->m_len -= i;
+                       *(n_time *)sin = iptime();
+                       ipt->ipt_ptr += 4;
+               }
        }
        }
+       return (0);
+bad:
+       icmp_error(ip, type, code);
+       return (1);
 }
 
 }
 
-ip_enq(p, prev)
-register struct ip *p;
-register struct ip *prev;
+/*
+ * Strip out IP options, at higher
+ * level protocol in the kernel.
+ * Second argument is buffer to which options
+ * will be moved, and return value is their length.
+ */
+ip_stripoptions(ip, mopt)
+       struct ip *ip;
+       struct mbuf *mopt;
 {
 {
-COUNT(IP_ENQ);
-       p->ip_prev = prev;
-       p->ip_next = prev->ip_next;
-       prev->ip_next->ip_prev = p;
-       prev->ip_next = p;
+       register int i;
+       register struct mbuf *m;
+       int olen;
+
+       olen = (ip->ip_hl<<2) - sizeof (struct ip);
+       m = dtom(ip);
+       ip++;
+       if (mopt) {
+               mopt->m_len = olen;
+               mopt->m_off = MMINOFF;
+               bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
+       }
+       i = m->m_len - (sizeof (struct ip) + olen);
+       bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
+       m->m_len -= olen;
 }
 
 }
 
-ip_deq(p)
-register struct ip *p;
+u_char inetctlerrmap[] = {
+       ECONNABORTED,   ECONNABORTED,   0,              0,
+       0,              0,
+       EHOSTDOWN,      EHOSTUNREACH,   ENETUNREACH,    EHOSTUNREACH,
+       ECONNREFUSED,   ECONNREFUSED,   EMSGSIZE,       0,
+       0,              0,              0,              0
+};
+
+ip_ctlinput(cmd, arg)
+       int cmd;
+       caddr_t arg;
 {
 {
-COUNT(IP_DEQ);
-       p->ip_prev->ip_next = p->ip_next;
-       p->ip_next->ip_prev = p->ip_prev;
+       struct in_addr *in;
+       int tcp_abort(), udp_abort();
+       extern struct inpcb tcb, udb;
+
+       if (cmd < 0 || cmd > PRC_NCMDS)
+               return;
+       if (inetctlerrmap[cmd] == 0)
+               return;         /* XXX */
+       if (cmd == PRC_IFDOWN)
+               in = &((struct sockaddr_in *)arg)->sin_addr;
+       else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH)
+               in = (struct in_addr *)arg;
+       else
+               in = &((struct icmp *)arg)->icmp_ip.ip_dst;
+/* THIS IS VERY QUESTIONABLE, SHOULD HIT ALL PROTOCOLS */
+       in_pcbnotify(&tcb, in, (int)inetctlerrmap[cmd], tcp_abort);
+       in_pcbnotify(&udb, in, (int)inetctlerrmap[cmd], udp_abort);
 }
 
 }
 
-ip_timeo()      /* frag reass.q timeout routine */
+int    ipprintfs = 0;
+int    ipforwarding = 1;
+/*
+ * Forward a packet.  If some error occurs return the sender
+ * and icmp packet.  Note we can't always generate a meaningful
+ * icmp message because icmp doesn't have a large enough repetoire
+ * of codes and types.
+ */
+ip_forward(ip)
+       register struct ip *ip;
 {
 {
-       register struct ip *q;
-       register struct ipq *fp;
-       int s = splnet();
+       register int error, type, code;
+       struct mbuf *mopt, *mcopy;
+
+       if (ipprintfs)
+               printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
+                       ip->ip_dst, ip->ip_ttl);
+       if (ipforwarding == 0) {
+               /* can't tell difference between net and host */
+               type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
+               goto sendicmp;
+       }
+       if (ip->ip_ttl < IPTTLDEC) {
+               type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
+               goto sendicmp;
+       }
+       ip->ip_ttl -= IPTTLDEC;
+       mopt = m_get(M_DONTWAIT, MT_DATA);
+       if (mopt == NULL) {
+               m_freem(dtom(ip));
+               return;
+       }
 
 
-COUNT(IP_TIMEO);
-       timeout(ip_timeo, 0, hz);       /* reschedule every second */
+       /*
+        * Save at most 64 bytes of the packet in case
+        * we need to generate an ICMP message to the src.
+        */
+       mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64));
+       ip_stripoptions(ip, mopt);
 
 
-       /* search through reass.q */
+       /* last 0 here means no directed broadcast */
+       if ((error = ip_output(dtom(ip), mopt, (struct route *)0, 0)) == 0) {
+               if (mcopy)
+                       m_freem(mcopy);
+               return;
+       }
+       ip = mtod(mcopy, struct ip *);
+       type = ICMP_UNREACH, code = 0;          /* need ``undefined'' */
+       switch (error) {
 
 
-       for (fp = netcb.n_ip_head; fp != NULL; fp = fp->iq_next)
+       case ENETUNREACH:
+       case ENETDOWN:
+               code = ICMP_UNREACH_NET;
+               break;
 
 
-               if (--(fp->iqx.ip_ttl) == 0) {  /* time to die */
+       case EMSGSIZE:
+               code = ICMP_UNREACH_NEEDFRAG;
+               break;
 
 
-                       q = fp->iqx.ip_next;    /* free mbufs assoc. w/chain */
-                       while (q != (struct ip *)fp) {
-                               m_freem(dtom(q));
-                               q = q->ip_next;
-                       }
-                       ip_freef(fp);           /* free header */
-               }
-       splx(s);
+       case EPERM:
+               code = ICMP_UNREACH_PORT;
+               break;
+
+       case ENOBUFS:
+               type = ICMP_SOURCEQUENCH;
+               break;
+
+       case EHOSTDOWN:
+       case EHOSTUNREACH:
+               code = ICMP_UNREACH_HOST;
+               break;
+       }
+sendicmp:
+       icmp_error(ip, type, code);
 }
 }