For now, take out directory operation locking. Get rid of buffer
[unix-history] / usr / src / sys / netinet / ip_input.c
CommitLineData
8ae0e4b4 1/*
b293fc3e 2 * Copyright (c) 1982, 1986, 1988 Regents of the University of California.
2b6b6284 3 * All rights reserved.
8ae0e4b4 4 *
dbf0c423 5 * %sccs.include.redist.c%
2b6b6284 6 *
5548a02f 7 * @(#)ip_input.c 7.23 (Berkeley) %G%
8ae0e4b4 8 */
6e8b2eca 9
5548a02f
KB
10#include <sys/param.h>
11#include <sys/systm.h>
12#include <sys/malloc.h>
13#include <sys/mbuf.h>
14#include <sys/domain.h>
15#include <sys/protosw.h>
16#include <sys/socket.h>
17#include <sys/errno.h>
18#include <sys/time.h>
19#include <sys/kernel.h>
6e7edb25 20
5548a02f
KB
21#include <net/if.h>
22#include <net/route.h>
f4d55810 23
5548a02f
KB
24#include <netinet/in.h>
25#include <netinet/in_systm.h>
26#include <netinet/ip.h>
27#include <netinet/in_pcb.h>
28#include <netinet/in_var.h>
29#include <netinet/ip_var.h>
30#include <netinet/ip_icmp.h>
e6dd2097 31
b293fc3e
MK
32#ifndef IPFORWARDING
33#ifdef GATEWAY
34#define IPFORWARDING 1
35#else /* GATEWAY */
36#define IPFORWARDING 0
37#endif /* GATEWAY */
38#endif /* IPFORWARDING */
39#ifndef IPSENDREDIRECTS
40#define IPSENDREDIRECTS 1
41#endif
42int ipprintfs = 0;
43int ipforwarding = IPFORWARDING;
44extern int in_interfaces;
45int ipsendredirects = IPSENDREDIRECTS;
46
21f50054
MK
47#ifndef IPFORWARDING
48#ifdef GATEWAY
c81ef907 49#define IPFORWARDING 1 /* forward IP packets not for us */
21f50054 50#else /* GATEWAY */
c81ef907 51#define IPFORWARDING 0 /* don't forward IP packets not for us */
21f50054
MK
52#endif /* GATEWAY */
53#endif /* IPFORWARDING */
54#ifndef IPSENDREDIRECTS
55#define IPSENDREDIRECTS 1
56#endif
21f50054 57int ipforwarding = IPFORWARDING;
21f50054 58int ipsendredirects = IPSENDREDIRECTS;
bd22f489 59#ifdef DIAGNOSTIC
c81ef907
MK
60int ipprintfs = 0;
61#endif
21f50054 62
bd22f489
KS
63extern struct domain inetdomain;
64extern struct protosw inetsw[];
eb44bfb2 65u_char ip_protox[IPPROTO_MAX];
1e977657 66int ipqmaxlen = IFQ_MAXLEN;
f223fa7d 67struct in_ifaddr *in_ifaddr; /* first inet address */
eb44bfb2 68
8fb48289
MK
69/*
70 * We need to save the IP options in case a protocol wants to respond
71 * to an incoming packet over the same route if the packet got here
72 * using IP source routing. This allows connection establishment and
73 * maintenance when the remote end is on a network that is not known
74 * to us.
75 */
76int ip_nhops = 0;
77static struct ip_srcrt {
21f50054 78 struct in_addr dst; /* final destination */
b293fc3e 79 struct in_addr dst; /* final destination */
8fb48289
MK
80 char nop; /* one NOP to align */
81 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
b293fc3e 82 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
8fb48289
MK
83} ip_srcrt;
84
c81ef907
MK
85#ifdef GATEWAY
86extern int if_index;
87u_long *ip_ifmatrix;
88#endif
89
d52566dd 90/*
b454c3ea 91 * IP initialization: fill in IP protocol switch table.
405c9168 92 * All protocols not implemented in kernel go to raw IP protocol handler.
d52566dd
BJ
93 */
94ip_init()
95{
eb44bfb2
BJ
96 register struct protosw *pr;
97 register int i;
eb44bfb2 98
8fb48289 99 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
eb44bfb2
BJ
100 if (pr == 0)
101 panic("ip_init");
102 for (i = 0; i < IPPROTO_MAX; i++)
59965020
BJ
103 ip_protox[i] = pr - inetsw;
104 for (pr = inetdomain.dom_protosw;
36bb5f94 105 pr < inetdomain.dom_protoswNPROTOSW; pr++)
abe04898 106 if (pr->pr_domain->dom_family == PF_INET &&
eb44bfb2 107 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
59965020 108 ip_protox[pr->pr_protocol] = pr - inetsw;
d52566dd 109 ipq.next = ipq.prev = &ipq;
b2ac7f3b 110 ip_id = time.tv_sec & 0xffff;
1e977657 111 ipintrq.ifq_maxlen = ipqmaxlen;
c81ef907
MK
112#ifdef GATEWAY
113 i = (if_index + 1) * (if_index + 1) * sizeof (u_long);
90116e2a
KM
114 ip_ifmatrix = (u_long *) malloc(i, M_RTABLE, M_WAITOK);
115 bzero((char *)ip_ifmatrix, i);
c81ef907 116#endif
d52566dd
BJ
117}
118
e6dd2097 119struct ip *ip_reass();
b293fc3e 120struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
8fb48289 121struct route ipforward_rt;
e6dd2097 122
e6dd2097
BJ
123/*
124 * Ip input routine. Checksum and byte swap header. If fragmented
c81ef907 125 * try to reassemble. Process options. Pass to next level.
e6dd2097 126 */
8a13b737 127ipintr()
e1d82856 128{
2b4b57cd 129 register struct ip *ip;
8a13b737 130 register struct mbuf *m;
e1d82856 131 register struct ipq *fp;
f223fa7d 132 register struct in_ifaddr *ia;
8a13b737 133 int hlen, s;
e1d82856 134
8a13b737 135next:
e6dd2097 136 /*
8a13b737
BJ
137 * Get next datagram off input queue and get IP header
138 * in first mbuf.
e6dd2097 139 */
8a13b737 140 s = splimp();
b293fc3e 141 IF_DEQUEUE(&ipintrq, m);
8a13b737 142 splx(s);
7ac98d3c 143 if (m == 0)
8a13b737 144 return;
e9629d44
MK
145#ifdef DIAGNOSTIC
146 if ((m->m_flags & M_PKTHDR) == 0)
147 panic("ipintr no HDR");
148#endif
773dfe90
MK
149 /*
150 * If no IP addresses have been set yet but the interfaces
151 * are receiving, can't do anything with incoming packets yet.
152 */
153 if (in_ifaddr == NULL)
154 goto bad;
7922844b 155 ipstat.ips_total++;
c81ef907 156 if (m->m_len < sizeof (struct ip) &&
9dc8d46a
SL
157 (m = m_pullup(m, sizeof (struct ip))) == 0) {
158 ipstat.ips_toosmall++;
159 goto next;
160 }
e6dd2097 161 ip = mtod(m, struct ip *);
f223fa7d 162 hlen = ip->ip_hl << 2;
8fb48289 163 if (hlen < sizeof(struct ip)) { /* minimum header length */
f223fa7d 164 ipstat.ips_badhlen++;
ac066ae1 165 goto bad;
f223fa7d
MK
166 }
167 if (hlen > m->m_len) {
9dc8d46a
SL
168 if ((m = m_pullup(m, hlen)) == 0) {
169 ipstat.ips_badhlen++;
170 goto next;
171 }
405c9168
BJ
172 ip = mtod(m, struct ip *);
173 }
b293fc3e
MK
174 if (ip->ip_sum = in_cksum(m, hlen)) {
175 ipstat.ips_badsum++;
176 goto bad;
177 }
4ad99bae
BJ
178
179 /*
180 * Convert fields to host representation.
181 */
c81ef907 182 NTOHS(ip->ip_len);
9dc8d46a
SL
183 if (ip->ip_len < hlen) {
184 ipstat.ips_badlen++;
185 goto bad;
186 }
c81ef907
MK
187 NTOHS(ip->ip_id);
188 NTOHS(ip->ip_off);
e1d82856 189
d10bd5b7 190 /*
e6dd2097
BJ
191 * Check that the amount of data in the buffers
192 * is as at least much as the IP header would have us expect.
193 * Trim mbufs if longer than we expect.
194 * Drop packet if shorter than we expect.
d10bd5b7 195 */
b293fc3e
MK
196 if (m->m_pkthdr.len < ip->ip_len) {
197 ipstat.ips_tooshort++;
198 goto bad;
1dd55890 199 }
b293fc3e
MK
200 if (m->m_pkthdr.len > ip->ip_len) {
201 if (m->m_len == m->m_pkthdr.len) {
202 m->m_len = ip->ip_len;
203 m->m_pkthdr.len = ip->ip_len;
204 } else
205 m_adj(m, ip->ip_len - m->m_pkthdr.len);
d10bd5b7 206 }
e1d82856 207
e6dd2097
BJ
208 /*
209 * Process options and, if not destined for us,
72e4f44e
SL
210 * ship it on. ip_dooptions returns 1 when an
211 * error was detected (causing an icmp message
ac066ae1 212 * to be sent and the original packet to be freed).
e6dd2097 213 */
8fb48289 214 ip_nhops = 0; /* for source routed packets */
b293fc3e 215 if (hlen > sizeof (struct ip) && ip_dooptions(m))
72e4f44e 216 goto next;
ee787340
SL
217
218 /*
f223fa7d 219 * Check our list of addresses, to see if the packet is for us.
ee787340 220 */
f223fa7d
MK
221 for (ia = in_ifaddr; ia; ia = ia->ia_next) {
222#define satosin(sa) ((struct sockaddr_in *)(sa))
223
224 if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
8fb48289 225 goto ours;
4afb57fa
MK
226 if (
227#ifdef DIRECTED_BROADCAST
b293fc3e 228 ia->ia_ifp == m->m_pkthdr.rcvif &&
4afb57fa
MK
229#endif
230 (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
b2a3d559 231 u_long t;
4afb57fa
MK
232
233 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
234 ip->ip_dst.s_addr)
235 goto ours;
236 if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
237 goto ours;
238 /*
239 * Look for all-0's host part (old broadcast addr),
240 * either for subnet or net.
241 */
b2a3d559
MK
242 t = ntohl(ip->ip_dst.s_addr);
243 if (t == ia->ia_subnet)
4afb57fa 244 goto ours;
b2a3d559 245 if (t == ia->ia_net)
4afb57fa
MK
246 goto ours;
247 }
d10bd5b7 248 }
d6fa15c2
KS
249#ifdef MULTICAST
250 if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
251 struct in_multi *inm;
252#ifdef MROUTING
253 extern struct socket *ip_mrouter;
254
255 if (ip_mrouter) {
256 /*
257 * If we are acting as a multicast router, all
258 * incoming multicast packets are passed to the
259 * kernel-level multicast forwarding function.
260 * The packet is returned (relatively) intact; if
261 * ip_mforward() returns a non-zero value, the packet
262 * must be discarded, else it may be accepted below.
263 *
264 * (The IP ident field is put in the same byte order
265 * as expected when ip_mforward() is called from
266 * ip_output().)
267 */
268 ip->ip_id = htons(ip->ip_id);
269 if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
270 m_freem(m);
271 goto next;
272 }
273 ip->ip_id = ntohs(ip->ip_id);
274
275 /*
276 * The process-level routing demon needs to receive
277 * all multicast IGMP packets, whether or not this
278 * host belongs to their destination groups.
279 */
280 if (ip->ip_p == IPPROTO_IGMP)
281 goto ours;
282 }
283#endif
284 /*
285 * See if we belong to the destination multicast group on the
286 * arrival interface.
287 */
288 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
289 if (inm == NULL) {
290 m_freem(m);
291 goto next;
292 }
293 goto ours;
294 }
295#endif
8fb48289
MK
296 if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
297 goto ours;
298 if (ip->ip_dst.s_addr == INADDR_ANY)
299 goto ours;
e1d82856 300
8fb48289
MK
301 /*
302 * Not for us; forward if possible and desirable.
303 */
c81ef907 304 if (ipforwarding == 0) {
b293fc3e
MK
305 ipstat.ips_cantforward++;
306 m_freem(m);
307 } else
c81ef907 308 ip_forward(m, 0);
8fb48289
MK
309 goto next;
310
311ours:
e6dd2097 312 /*
5828b179
MK
313 * If offset or IP_MF are set, must reassemble.
314 * Otherwise, nothing need be done.
315 * (We could look in the reassembly queue to see
316 * if the packet was previously fragmented,
317 * but it's not worth the time; just let them time out.)
e6dd2097 318 */
5828b179 319 if (ip->ip_off &~ IP_DF) {
c81ef907
MK
320 if (m->m_flags & M_EXT) { /* XXX */
321 if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
322 ipstat.ips_toosmall++;
323 goto next;
324 }
325 ip = mtod(m, struct ip *);
326 }
5828b179
MK
327 /*
328 * Look for queue of fragments
329 * of this datagram.
330 */
331 for (fp = ipq.next; fp != &ipq; fp = fp->next)
332 if (ip->ip_id == fp->ipq_id &&
333 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
334 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
335 ip->ip_p == fp->ipq_p)
336 goto found;
337 fp = 0;
e6dd2097 338found:
e1d82856 339
5828b179
MK
340 /*
341 * Adjust ip_len to not reflect header,
342 * set ip_mff if more fragments are expected,
343 * convert offset of this to bytes.
344 */
345 ip->ip_len -= hlen;
346 ((struct ipasfrag *)ip)->ipf_mff = 0;
347 if (ip->ip_off & IP_MF)
348 ((struct ipasfrag *)ip)->ipf_mff = 1;
349 ip->ip_off <<= 3;
e1d82856 350
5828b179
MK
351 /*
352 * If datagram marked as having more fragments
353 * or if this is not the first fragment,
354 * attempt reassembly; if it succeeds, proceed.
355 */
356 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
357 ipstat.ips_fragments++;
358 ip = ip_reass((struct ipasfrag *)ip, fp);
359 if (ip == 0)
360 goto next;
ea9a9897
KS
361 else
362 ipstat.ips_reassembled++;
5828b179
MK
363 m = dtom(ip);
364 } else
365 if (fp)
366 ip_freef(fp);
e6dd2097 367 } else
5828b179 368 ip->ip_len -= hlen;
4ad99bae
BJ
369
370 /*
371 * Switch out to protocol's input routine.
372 */
ea9a9897 373 ipstat.ips_delivered++;
b293fc3e 374 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
8a13b737 375 goto next;
4ad99bae
BJ
376bad:
377 m_freem(m);
8a13b737 378 goto next;
e6dd2097 379}
e1d82856 380
e6dd2097
BJ
381/*
382 * Take incoming datagram fragment and try to
4ad99bae 383 * reassemble it into whole datagram. If a chain for
e6dd2097
BJ
384 * reassembly of this datagram already exists, then it
385 * is given as fp; otherwise have to make a chain.
386 */
387struct ip *
388ip_reass(ip, fp)
eb44bfb2 389 register struct ipasfrag *ip;
e6dd2097
BJ
390 register struct ipq *fp;
391{
392 register struct mbuf *m = dtom(ip);
eb44bfb2 393 register struct ipasfrag *q;
e6dd2097
BJ
394 struct mbuf *t;
395 int hlen = ip->ip_hl << 2;
396 int i, next;
d10bd5b7 397
e6dd2097
BJ
398 /*
399 * Presence of header sizes in mbufs
400 * would confuse code below.
401 */
b293fc3e 402 m->m_data += hlen;
e6dd2097 403 m->m_len -= hlen;
d10bd5b7 404
e6dd2097
BJ
405 /*
406 * If first fragment to arrive, create a reassembly queue.
407 */
408 if (fp == 0) {
13494852 409 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
e6dd2097 410 goto dropfrag;
e6dd2097
BJ
411 fp = mtod(t, struct ipq *);
412 insque(fp, &ipq);
413 fp->ipq_ttl = IPFRAGTTL;
414 fp->ipq_p = ip->ip_p;
415 fp->ipq_id = ip->ip_id;
eb44bfb2
BJ
416 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
417 fp->ipq_src = ((struct ip *)ip)->ip_src;
418 fp->ipq_dst = ((struct ip *)ip)->ip_dst;
405c9168
BJ
419 q = (struct ipasfrag *)fp;
420 goto insert;
e6dd2097 421 }
e1d82856 422
e6dd2097
BJ
423 /*
424 * Find a segment which begins after this one does.
425 */
eb44bfb2 426 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
427 if (q->ip_off > ip->ip_off)
428 break;
e1d82856 429
e6dd2097
BJ
430 /*
431 * If there is a preceding segment, it may provide some of
432 * our data already. If so, drop the data from the incoming
433 * segment. If it provides all of our data, drop us.
434 */
eb44bfb2
BJ
435 if (q->ipf_prev != (struct ipasfrag *)fp) {
436 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
e6dd2097
BJ
437 if (i > 0) {
438 if (i >= ip->ip_len)
439 goto dropfrag;
440 m_adj(dtom(ip), i);
441 ip->ip_off += i;
442 ip->ip_len -= i;
e1d82856 443 }
d10bd5b7 444 }
e1d82856 445
e6dd2097
BJ
446 /*
447 * While we overlap succeeding segments trim them or,
448 * if they are completely covered, dequeue them.
449 */
eb44bfb2 450 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
e6dd2097
BJ
451 i = (ip->ip_off + ip->ip_len) - q->ip_off;
452 if (i < q->ip_len) {
453 q->ip_len -= i;
c107df34 454 q->ip_off += i;
e6dd2097
BJ
455 m_adj(dtom(q), i);
456 break;
457 }
eb44bfb2
BJ
458 q = q->ipf_next;
459 m_freem(dtom(q->ipf_prev));
460 ip_deq(q->ipf_prev);
e6dd2097 461 }
e1d82856 462
405c9168 463insert:
e6dd2097
BJ
464 /*
465 * Stick new segment in its place;
466 * check for complete reassembly.
467 */
eb44bfb2 468 ip_enq(ip, q->ipf_prev);
e6dd2097 469 next = 0;
eb44bfb2 470 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
e6dd2097
BJ
471 if (q->ip_off != next)
472 return (0);
473 next += q->ip_len;
474 }
eb44bfb2 475 if (q->ipf_prev->ipf_mff)
e6dd2097 476 return (0);
e1d82856 477
e6dd2097
BJ
478 /*
479 * Reassembly is complete; concatenate fragments.
480 */
481 q = fp->ipq_next;
482 m = dtom(q);
483 t = m->m_next;
484 m->m_next = 0;
485 m_cat(m, t);
dfb346d0
BJ
486 q = q->ipf_next;
487 while (q != (struct ipasfrag *)fp) {
488 t = dtom(q);
489 q = q->ipf_next;
490 m_cat(m, t);
491 }
e1d82856 492
e6dd2097
BJ
493 /*
494 * Create header for new ip packet by
495 * modifying header of first packet;
496 * dequeue and discard fragment reassembly header.
497 * Make header visible.
498 */
499 ip = fp->ipq_next;
500 ip->ip_len = next;
eb44bfb2
BJ
501 ((struct ip *)ip)->ip_src = fp->ipq_src;
502 ((struct ip *)ip)->ip_dst = fp->ipq_dst;
e6dd2097 503 remque(fp);
cdad2eb1 504 (void) m_free(dtom(fp));
e6dd2097 505 m = dtom(ip);
8fb48289 506 m->m_len += (ip->ip_hl << 2);
b293fc3e 507 m->m_data -= (ip->ip_hl << 2);
bd22f489
KS
508 /* some debugging cruft by sklower, below, will go away soon */
509 if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
510 register int plen = 0;
511 for (t = m; m; m = m->m_next)
512 plen += m->m_len;
513 t->m_pkthdr.len = plen;
514 }
eb44bfb2 515 return ((struct ip *)ip);
e6dd2097
BJ
516
517dropfrag:
8fb48289 518 ipstat.ips_fragdropped++;
e6dd2097
BJ
519 m_freem(m);
520 return (0);
e1d82856
BJ
521}
522
e6dd2097
BJ
523/*
524 * Free a fragment reassembly header and all
525 * associated datagrams.
526 */
e6dd2097
BJ
527ip_freef(fp)
528 struct ipq *fp;
e1d82856 529{
e16de434 530 register struct ipasfrag *q, *p;
e6dd2097 531
e16de434
SL
532 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
533 p = q->ipf_next;
534 ip_deq(q);
e6dd2097 535 m_freem(dtom(q));
e16de434
SL
536 }
537 remque(fp);
538 (void) m_free(dtom(fp));
e1d82856
BJ
539}
540
e6dd2097
BJ
541/*
542 * Put an ip fragment on a reassembly chain.
543 * Like insque, but pointers in middle of structure.
544 */
545ip_enq(p, prev)
eb44bfb2 546 register struct ipasfrag *p, *prev;
e1d82856 547{
e1d82856 548
eb44bfb2
BJ
549 p->ipf_prev = prev;
550 p->ipf_next = prev->ipf_next;
551 prev->ipf_next->ipf_prev = p;
552 prev->ipf_next = p;
e1d82856
BJ
553}
554
e6dd2097
BJ
555/*
556 * To ip_enq as remque is to insque.
557 */
558ip_deq(p)
eb44bfb2 559 register struct ipasfrag *p;
e1d82856 560{
e6dd2097 561
eb44bfb2
BJ
562 p->ipf_prev->ipf_next = p->ipf_next;
563 p->ipf_next->ipf_prev = p->ipf_prev;
e1d82856
BJ
564}
565
e6dd2097
BJ
566/*
567 * IP timer processing;
568 * if a timer expires on a reassembly
569 * queue, discard it.
570 */
d52566dd 571ip_slowtimo()
e1d82856
BJ
572{
573 register struct ipq *fp;
e6dd2097 574 int s = splnet();
e1d82856 575
4aed14e3
BJ
576 fp = ipq.next;
577 if (fp == 0) {
578 splx(s);
579 return;
580 }
e16de434
SL
581 while (fp != &ipq) {
582 --fp->ipq_ttl;
583 fp = fp->next;
8fb48289
MK
584 if (fp->prev->ipq_ttl == 0) {
585 ipstat.ips_fragtimeout++;
e16de434 586 ip_freef(fp->prev);
8fb48289 587 }
e16de434 588 }
e6dd2097 589 splx(s);
e1d82856
BJ
590}
591
4ad99bae
BJ
592/*
593 * Drain off all datagram fragments.
594 */
d52566dd
BJ
595ip_drain()
596{
597
8fb48289
MK
598 while (ipq.next != &ipq) {
599 ipstat.ips_fragdropped++;
e16de434 600 ip_freef(ipq.next);
8fb48289 601 }
d52566dd 602}
2b4b57cd 603
1846019d 604extern struct in_ifaddr *ifptoia();
8fb48289
MK
605struct in_ifaddr *ip_rtaddr();
606
e6dd2097
BJ
607/*
608 * Do option processing on a datagram,
c81ef907
MK
609 * possibly discarding it if bad options are encountered,
610 * or forwarding it if source-routed.
611 * Returns 1 if packet has been forwarded/freed,
612 * 0 if the packet should be processed further.
e6dd2097 613 */
b293fc3e
MK
614ip_dooptions(m)
615 struct mbuf *m;
e1d82856 616{
21f50054 617 register struct ip *ip = mtod(m, struct ip *);
b293fc3e 618 register struct ip *ip = mtod(m, struct ip *);
e6dd2097 619 register u_char *cp;
d52566dd 620 register struct ip_timestamp *ipt;
8fb48289 621 register struct in_ifaddr *ia;
b293fc3e 622 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
21f50054 623 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
8fb48289
MK
624 struct in_addr *sin;
625 n_time ntime;
e6dd2097
BJ
626
627 cp = (u_char *)(ip + 1);
628 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
629 for (; cnt > 0; cnt -= optlen, cp += optlen) {
8fb48289 630 opt = cp[IPOPT_OPTVAL];
e6dd2097
BJ
631 if (opt == IPOPT_EOL)
632 break;
633 if (opt == IPOPT_NOP)
634 optlen = 1;
942169f7 635 else {
8fb48289
MK
636 optlen = cp[IPOPT_OLEN];
637 if (optlen <= 0 || optlen > cnt) {
638 code = &cp[IPOPT_OLEN] - (u_char *)ip;
36bb5f94 639 goto bad;
8fb48289 640 }
942169f7 641 }
e6dd2097 642 switch (opt) {
e1d82856 643
e6dd2097
BJ
644 default:
645 break;
e1d82856 646
4ad99bae
BJ
647 /*
648 * Source routing with record.
649 * Find interface with current destination address.
650 * If none on this machine then drop if strictly routed,
651 * or do nothing if loosely routed.
652 * Record interface address and bring up next address
653 * component. If strictly routed make sure next
c81ef907 654 * address is on directly accessible net.
4ad99bae 655 */
e6dd2097 656 case IPOPT_LSRR:
a71ece0a 657 case IPOPT_SSRR:
8fb48289
MK
658 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
659 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
660 goto bad;
661 }
662 ipaddr.sin_addr = ip->ip_dst;
663 ia = (struct in_ifaddr *)
664 ifa_ifwithaddr((struct sockaddr *)&ipaddr);
665 if (ia == 0) {
666 if (opt == IPOPT_SSRR) {
667 type = ICMP_UNREACH;
668 code = ICMP_UNREACH_SRCFAIL;
4ad99bae 669 goto bad;
8fb48289
MK
670 }
671 /*
672 * Loose routing, and not at next destination
673 * yet; nothing to do except forward.
674 */
4ad99bae 675 break;
e6dd2097 676 }
8fb48289
MK
677 off--; /* 0 origin */
678 if (off > optlen - sizeof(struct in_addr)) {
679 /*
680 * End of source route. Should be for us.
681 */
682 save_rte(cp, ip->ip_src);
4ad99bae 683 break;
8fb48289
MK
684 }
685 /*
686 * locate outgoing interface
687 */
8011f5df 688 bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
8fb48289 689 sizeof(ipaddr.sin_addr));
c81ef907
MK
690 if (opt == IPOPT_SSRR) {
691#define INA struct in_ifaddr *
692#define SA struct sockaddr *
693 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
694 ia = in_iaonnetof(in_netof(ipaddr.sin_addr));
695 } else
696 ia = ip_rtaddr(ipaddr.sin_addr);
697 if (ia == 0) {
8fb48289
MK
698 type = ICMP_UNREACH;
699 code = ICMP_UNREACH_SRCFAIL;
4ad99bae 700 goto bad;
8fb48289
MK
701 }
702 ip->ip_dst = ipaddr.sin_addr;
8011f5df
MK
703 bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
704 (caddr_t)(cp + off), sizeof(struct in_addr));
8fb48289 705 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
b293fc3e 706 forward = 1;
21f50054 707 forward = 1;
8fb48289
MK
708 break;
709
710 case IPOPT_RR:
711 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
712 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
713 goto bad;
714 }
715 /*
716 * If no space remains, ignore.
717 */
718 off--; /* 0 origin */
719 if (off > optlen - sizeof(struct in_addr))
720 break;
c13e44b4 721 bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
8fb48289
MK
722 sizeof(ipaddr.sin_addr));
723 /*
9759b0a4
MK
724 * locate outgoing interface; if we're the destination,
725 * use the incoming interface (should be same).
8fb48289 726 */
c81ef907 727 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
9759b0a4 728 (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
8fb48289 729 type = ICMP_UNREACH;
d89cc5db 730 code = ICMP_UNREACH_HOST;
8fb48289
MK
731 goto bad;
732 }
8011f5df
MK
733 bcopy((caddr_t)&(IA_SIN(ia)->sin_addr),
734 (caddr_t)(cp + off), sizeof(struct in_addr));
8fb48289 735 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
e6dd2097
BJ
736 break;
737
738 case IPOPT_TS:
72e4f44e 739 code = cp - (u_char *)ip;
d52566dd
BJ
740 ipt = (struct ip_timestamp *)cp;
741 if (ipt->ipt_len < 5)
e6dd2097 742 goto bad;
d52566dd
BJ
743 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
744 if (++ipt->ipt_oflw == 0)
e6dd2097 745 goto bad;
e6dd2097
BJ
746 break;
747 }
1846019d 748 sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
d52566dd 749 switch (ipt->ipt_flg) {
e1d82856 750
e6dd2097
BJ
751 case IPOPT_TS_TSONLY:
752 break;
e1d82856 753
e6dd2097 754 case IPOPT_TS_TSANDADDR:
8fb48289
MK
755 if (ipt->ipt_ptr + sizeof(n_time) +
756 sizeof(struct in_addr) > ipt->ipt_len)
e6dd2097 757 goto bad;
b293fc3e 758 ia = ifptoia(m->m_pkthdr.rcvif);
1846019d 759 bcopy((caddr_t)&IA_SIN(ia)->sin_addr,
8fb48289 760 (caddr_t)sin, sizeof(struct in_addr));
1846019d 761 ipt->ipt_ptr += sizeof(struct in_addr);
e6dd2097
BJ
762 break;
763
764 case IPOPT_TS_PRESPEC:
1846019d
MK
765 if (ipt->ipt_ptr + sizeof(n_time) +
766 sizeof(struct in_addr) > ipt->ipt_len)
767 goto bad;
8fb48289
MK
768 bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
769 sizeof(struct in_addr));
c81ef907 770 if (ifa_ifwithaddr((SA)&ipaddr) == 0)
4ad99bae 771 continue;
8fb48289 772 ipt->ipt_ptr += sizeof(struct in_addr);
e1d82856
BJ
773 break;
774
775 default:
e6dd2097 776 goto bad;
e1d82856 777 }
8fb48289 778 ntime = iptime();
1846019d
MK
779 bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1,
780 sizeof(n_time));
8fb48289 781 ipt->ipt_ptr += sizeof(n_time);
e6dd2097 782 }
e1d82856 783 }
b293fc3e 784 if (forward) {
c81ef907 785 ip_forward(m, 1);
b293fc3e
MK
786 return (1);
787 } else
788 return (0);
e6dd2097 789bad:
b293fc3e 790 icmp_error(m, type, code);
72e4f44e 791 return (1);
e1d82856
BJ
792}
793
8fb48289
MK
794/*
795 * Given address of next destination (final or next hop),
796 * return internet address info of interface to be used to get there.
797 */
798struct in_ifaddr *
799ip_rtaddr(dst)
800 struct in_addr dst;
801{
802 register struct sockaddr_in *sin;
8fb48289
MK
803
804 sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
805
806 if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
807 if (ipforward_rt.ro_rt) {
808 RTFREE(ipforward_rt.ro_rt);
809 ipforward_rt.ro_rt = 0;
810 }
811 sin->sin_family = AF_INET;
b293fc3e 812 sin->sin_len = sizeof(*sin);
8fb48289
MK
813 sin->sin_addr = dst;
814
815 rtalloc(&ipforward_rt);
816 }
817 if (ipforward_rt.ro_rt == 0)
818 return ((struct in_ifaddr *)0);
c81ef907 819 return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
8fb48289
MK
820}
821
822/*
823 * Save incoming source route for use in replies,
824 * to be picked up later by ip_srcroute if the receiver is interested.
825 */
826save_rte(option, dst)
8011f5df 827 u_char *option;
8fb48289
MK
828 struct in_addr dst;
829{
8011f5df 830 unsigned olen;
8fb48289
MK
831
832 olen = option[IPOPT_OLEN];
bd22f489 833#ifdef DIAGNOSTIC
b293fc3e
MK
834 if (ipprintfs)
835 printf("save_rte: olen %d\n", olen);
c81ef907 836#endif
b293fc3e 837 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
8fb48289 838 return;
8011f5df 839 bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
8fb48289 840 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
b293fc3e 841 ip_srcrt.dst = dst;
8fb48289
MK
842}
843
844/*
845 * Retrieve incoming source route for use in replies,
846 * in the same form used by setsockopt.
847 * The first hop is placed before the options, will be removed later.
848 */
849struct mbuf *
850ip_srcroute()
851{
852 register struct in_addr *p, *q;
853 register struct mbuf *m;
854
855 if (ip_nhops == 0)
856 return ((struct mbuf *)0);
13494852
MK
857 m = m_get(M_DONTWAIT, MT_SOOPTS);
858 if (m == 0)
859 return ((struct mbuf *)0);
b293fc3e 860
21f50054
MK
861#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
862
863 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
864 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
865 OPTSIZ;
866 if (ipprintfs)
867 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
868
b293fc3e
MK
869#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
870
871 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
872 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
873 OPTSIZ;
bd22f489 874#ifdef DIAGNOSTIC
b293fc3e
MK
875 if (ipprintfs)
876 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
c81ef907 877#endif
8fb48289
MK
878
879 /*
880 * First save first hop for return route
881 */
882 p = &ip_srcrt.route[ip_nhops - 1];
883 *(mtod(m, struct in_addr *)) = *p--;
bd22f489 884#ifdef DIAGNOSTIC
b293fc3e
MK
885 if (ipprintfs)
886 printf(" hops %X", ntohl(*mtod(m, struct in_addr *)));
21f50054 887 if (ipprintfs)
d7070096 888 printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
c81ef907 889#endif
8fb48289
MK
890
891 /*
892 * Copy option fields and padding (nop) to mbuf.
893 */
894 ip_srcrt.nop = IPOPT_NOP;
b293fc3e
MK
895 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
896 bcopy((caddr_t)&ip_srcrt.nop,
897 mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
8fb48289 898 q = (struct in_addr *)(mtod(m, caddr_t) +
b293fc3e
MK
899 sizeof(struct in_addr) + OPTSIZ);
900#undef OPTSIZ
8fb48289
MK
901 /*
902 * Record return path as an IP source route,
903 * reversing the path (pointers are now aligned).
904 */
b293fc3e 905 while (p >= ip_srcrt.route) {
bd22f489 906#ifdef DIAGNOSTIC
b293fc3e 907 if (ipprintfs)
d7070096 908 printf(" %lx", ntohl(q->s_addr));
c81ef907 909#endif
8fb48289 910 *q++ = *p--;
b293fc3e
MK
911 }
912 /*
913 * Last hop goes to final destination.
914 */
915 *q = ip_srcrt.dst;
21f50054
MK
916 if (ipprintfs)
917 printf(" %X\n", ntohl(*q));
918 }
919 /*
920 * Last hop goes to final destination.
921 */
922 *q = ip_srcrt.dst;
bd22f489 923#ifdef DIAGNOSTIC
b293fc3e 924 if (ipprintfs)
d7070096 925 printf(" %lx\n", ntohl(q->s_addr));
c81ef907 926#endif
8fb48289
MK
927 return (m);
928}
929
e6dd2097 930/*
4ad99bae
BJ
931 * Strip out IP options, at higher
932 * level protocol in the kernel.
933 * Second argument is buffer to which options
934 * will be moved, and return value is their length.
21f50054
MK
935#ifdef NEW
936 * XXX should be deleted; last arg currently ignored.
937#endif NEW
b293fc3e 938 * XXX should be deleted; last arg currently ignored.
e6dd2097 939 */
b293fc3e
MK
940ip_stripoptions(m, mopt)
941 register struct mbuf *m;
7c08c626 942 struct mbuf *mopt;
e1d82856 943{
e6dd2097 944 register int i;
b293fc3e 945 struct ip *ip = mtod(m, struct ip *);
8fb48289 946 register caddr_t opts;
e6dd2097 947 int olen;
e6dd2097
BJ
948
949 olen = (ip->ip_hl<<2) - sizeof (struct ip);
8fb48289 950 opts = (caddr_t)(ip + 1);
e6dd2097 951 i = m->m_len - (sizeof (struct ip) + olen);
8fb48289 952 bcopy(opts + olen, opts, (unsigned)i);
4aed14e3 953 m->m_len -= olen;
b293fc3e
MK
954 if (m->m_flags & M_PKTHDR)
955 m->m_pkthdr.len -= olen;
8fb48289 956 ip->ip_hl = sizeof(struct ip) >> 2;
e1d82856 957}
72e4f44e 958
0ecfeefb 959u_char inetctlerrmap[PRC_NCMDS] = {
8fb48289 960 0, 0, 0, 0,
c81ef907
MK
961 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
962 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
8fb48289
MK
963 EMSGSIZE, EHOSTUNREACH, 0, 0,
964 0, 0, 0, 0,
965 ENOPROTOOPT
72e4f44e
SL
966};
967
72e4f44e
SL
968/*
969 * Forward a packet. If some error occurs return the sender
f223fa7d 970 * an icmp packet. Note we can't always generate a meaningful
8fb48289 971 * icmp message because icmp doesn't have a large enough repertoire
72e4f44e 972 * of codes and types.
c0a9b2bd 973 *
c81ef907
MK
974 * If not forwarding, just drop the packet. This could be confusing
975 * if ipforwarding was zero but some routing protocol was advancing
976 * us as a gateway to somewhere. However, we must let the routing
977 * protocol deal with that.
978 *
979 * The srcrt parameter indicates whether the packet is being forwarded
980 * via a source route.
72e4f44e 981 */
c81ef907 982ip_forward(m, srcrt)
b293fc3e 983 struct mbuf *m;
c81ef907 984 int srcrt;
72e4f44e 985{
21f50054 986 register struct ip *ip = mtod(m, struct ip *);
b293fc3e 987 register struct ip *ip = mtod(m, struct ip *);
8fb48289 988 register struct sockaddr_in *sin;
c81ef907
MK
989 register struct rtentry *rt;
990 int error, type = 0, code;
f223fa7d 991 struct mbuf *mcopy;
8fb48289 992 struct in_addr dest;
72e4f44e 993
8fb48289 994 dest.s_addr = 0;
bd22f489 995#ifdef DIAGNOSTIC
72e4f44e
SL
996 if (ipprintfs)
997 printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
998 ip->ip_dst, ip->ip_ttl);
c81ef907 999#endif
b293fc3e 1000 if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
c0a9b2bd 1001 ipstat.ips_cantforward++;
b293fc3e 1002 m_freem(m);
c0a9b2bd 1003 return;
72e4f44e 1004 }
21f50054 1005 ip->ip_id = htons(ip->ip_id);
c13e44b4 1006 if (ip->ip_ttl <= IPTTLDEC) {
c81ef907
MK
1007 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest);
1008 return;
72e4f44e
SL
1009 }
1010 ip->ip_ttl -= IPTTLDEC;
67387c9c 1011
8fb48289 1012 sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
c81ef907 1013 if ((rt = ipforward_rt.ro_rt) == 0 ||
8fb48289
MK
1014 ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1015 if (ipforward_rt.ro_rt) {
1016 RTFREE(ipforward_rt.ro_rt);
1017 ipforward_rt.ro_rt = 0;
1018 }
1019 sin->sin_family = AF_INET;
b293fc3e 1020 sin->sin_len = sizeof(*sin);
8fb48289
MK
1021 sin->sin_addr = ip->ip_dst;
1022
1023 rtalloc(&ipforward_rt);
c81ef907
MK
1024 if (ipforward_rt.ro_rt == 0) {
1025 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest);
1026 return;
1027 }
1028 rt = ipforward_rt.ro_rt;
8fb48289 1029 }
c81ef907
MK
1030
1031 /*
1032 * Save at most 64 bytes of the packet in case
1033 * we need to generate an ICMP message to the src.
1034 */
1035 mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1036
1037#ifdef GATEWAY
1038 ip_ifmatrix[rt->rt_ifp->if_index +
1039 if_index * m->m_pkthdr.rcvif->if_index]++;
1040#endif
8fb48289
MK
1041 /*
1042 * If forwarding packet using same interface that it came in on,
1043 * perhaps should send a redirect to sender to shortcut a hop.
1044 * Only send redirect if source is sending directly to us,
1045 * and if packet was not source routed (or has any options).
83dfdaa9 1046 * Also, don't send redirect if forwarding using a default route
c81ef907 1047 * or a route modified by a redirect.
8fb48289 1048 */
83dfdaa9 1049#define satosin(sa) ((struct sockaddr_in *)(sa))
c81ef907
MK
1050 if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1051 (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1052 satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1053 ipsendredirects && !srcrt) {
53d63ec6 1054#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
8fb48289
MK
1055 u_long src = ntohl(ip->ip_src.s_addr);
1056 u_long dst = ntohl(ip->ip_dst.s_addr);
1057
53d63ec6
KS
1058 if (RTA(rt) &&
1059 (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
c81ef907
MK
1060 if (rt->rt_flags & RTF_GATEWAY)
1061 dest = satosin(rt->rt_gateway)->sin_addr;
8fb48289
MK
1062 else
1063 dest = ip->ip_dst;
1064 /*
1065 * If the destination is reached by a route to host,
020b84ae
MK
1066 * is on a subnet of a local net, or is directly
1067 * on the attached net (!), use host redirect.
8fb48289
MK
1068 * (We may be the correct first hop for other subnets.)
1069 */
1070 type = ICMP_REDIRECT;
c81ef907
MK
1071 if ((rt->rt_flags & RTF_HOST) ||
1072 (rt->rt_flags & RTF_GATEWAY) == 0)
1073 code = ICMP_REDIRECT_HOST;
1074 else if (RTA(rt)->ia_subnetmask != RTA(rt)->ia_netmask &&
1075 (dst & RTA(rt)->ia_netmask) == RTA(rt)->ia_net)
1076 code = ICMP_REDIRECT_HOST;
1077 else
1078 code = ICMP_REDIRECT_NET;
bd22f489 1079#ifdef DIAGNOSTIC
8fb48289 1080 if (ipprintfs)
c81ef907
MK
1081 printf("redirect (%d) to %x\n", code, dest.s_addr);
1082#endif
8fb48289
MK
1083 }
1084 }
1085
b293fc3e 1086 error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING);
8fb48289
MK
1087 if (error)
1088 ipstat.ips_cantforward++;
8fb48289 1089 else {
ac066ae1 1090 ipstat.ips_forward++;
c81ef907
MK
1091 if (type)
1092 ipstat.ips_redirectsent++;
1093 else {
1094 if (mcopy)
1095 m_freem(mcopy);
1096 return;
1097 }
67387c9c 1098 }
9dfd168a
SL
1099 if (mcopy == NULL)
1100 return;
67387c9c
SL
1101 switch (error) {
1102
8fb48289 1103 case 0: /* forwarded, but need redirect */
c81ef907 1104 /* type, code set above */
8fb48289
MK
1105 break;
1106
c81ef907
MK
1107 case ENETUNREACH: /* shouldn't happen, checked above */
1108 case EHOSTUNREACH:
67387c9c 1109 case ENETDOWN:
c81ef907
MK
1110 case EHOSTDOWN:
1111 default:
1112 type = ICMP_UNREACH;
1113 code = ICMP_UNREACH_HOST;
67387c9c
SL
1114 break;
1115
1116 case EMSGSIZE:
c81ef907 1117 type = ICMP_UNREACH;
72e4f44e 1118 code = ICMP_UNREACH_NEEDFRAG;
ea9a9897 1119 ipstat.ips_cantfrag++;
67387c9c
SL
1120 break;
1121
67387c9c
SL
1122 case ENOBUFS:
1123 type = ICMP_SOURCEQUENCH;
b293fc3e 1124 code = 0;
67387c9c 1125 break;
67387c9c 1126 }
e2fd29fc 1127 icmp_error(mcopy, type, code, dest);
72e4f44e 1128}