flags now indicate address of gateway; lh filled in from ether address
[unix-history] / usr / src / sys / netinet / ip_input.c
CommitLineData
cdff57cc 1/* ip_input.c 1.40 82/04/07 */
6e8b2eca 2
e1d82856 3#include "../h/param.h"
d10bd5b7 4#include "../h/systm.h"
e6dd2097
BJ
5#include "../h/clock.h"
6#include "../h/mbuf.h"
eb44bfb2 7#include "../h/protosw.h"
2b4b57cd 8#include "../h/socket.h"
8a13b737
BJ
9#include "../net/in.h"
10#include "../net/in_systm.h"
4ad99bae 11#include "../net/if.h"
8a13b737 12#include "../net/ip.h" /* belongs before in.h */
eb44bfb2 13#include "../net/ip_var.h"
d52566dd
BJ
14#include "../net/ip_icmp.h"
15#include "../net/tcp.h"
e6dd2097 16
eb44bfb2 17u_char ip_protox[IPPROTO_MAX];
1e977657 18int ipqmaxlen = IFQ_MAXLEN;
ee787340 19struct ifnet *ifinet; /* first inet interface */
eb44bfb2 20
d52566dd 21/*
b454c3ea 22 * IP initialization: fill in IP protocol switch table.
405c9168 23 * All protocols not implemented in kernel go to raw IP protocol handler.
d52566dd
BJ
24 */
25ip_init()
26{
eb44bfb2
BJ
27 register struct protosw *pr;
28 register int i;
eb44bfb2 29
4ad99bae 30COUNT(IP_INIT);
eb44bfb2
BJ
31 pr = pffindproto(PF_INET, IPPROTO_RAW);
32 if (pr == 0)
33 panic("ip_init");
34 for (i = 0; i < IPPROTO_MAX; i++)
35 ip_protox[i] = pr - protosw;
36 for (pr = protosw; pr <= protoswLAST; pr++)
37 if (pr->pr_family == PF_INET &&
38 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
39 ip_protox[pr->pr_protocol] = pr - protosw;
d52566dd
BJ
40 ipq.next = ipq.prev = &ipq;
41 ip_id = time & 0xffff;
1e977657 42 ipintrq.ifq_maxlen = ipqmaxlen;
ee787340 43 ifinet = if_ifwithaf(AF_INET);
d52566dd
BJ
44}
45
eb44bfb2 46u_char ipcksum = 1;
e6dd2097 47struct ip *ip_reass();
a9f3e174
SL
48int ipforwarding = 1;
49int ipprintfs = 0;
ee787340 50struct sockaddr_in ipaddr = { AF_INET };
e6dd2097 51
e6dd2097
BJ
52/*
53 * Ip input routine. Checksum and byte swap header. If fragmented
54 * try to reassamble. If complete and fragment queue exists, discard.
55 * Process options. Pass to next level.
56 */
8a13b737 57ipintr()
e1d82856 58{
2b4b57cd 59 register struct ip *ip;
8a13b737 60 register struct mbuf *m;
7c08c626 61 struct mbuf *m0, *mopt;
e6dd2097 62 register int i;
e1d82856 63 register struct ipq *fp;
8a13b737 64 int hlen, s;
e1d82856 65
8a13b737
BJ
66COUNT(IPINTR);
67next:
e6dd2097 68 /*
8a13b737
BJ
69 * Get next datagram off input queue and get IP header
70 * in first mbuf.
e6dd2097 71 */
8a13b737
BJ
72 s = splimp();
73 IF_DEQUEUE(&ipintrq, m);
74 splx(s);
7ac98d3c 75 if (m == 0)
8a13b737 76 return;
9411b6be
BJ
77 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
78 (m = m_pullup(m, sizeof (struct ip))) == 0)
79 return;
e6dd2097 80 ip = mtod(m, struct ip *);
405c9168 81 if ((hlen = ip->ip_hl << 2) > m->m_len) {
9411b6be
BJ
82 if ((m = m_pullup(m, hlen)) == 0)
83 return;
405c9168
BJ
84 ip = mtod(m, struct ip *);
85 }
4ad99bae 86 if (ipcksum)
7c08c626 87 if (ip->ip_sum = in_cksum(m, hlen)) {
405c9168 88 printf("ip_sum %x\n", ip->ip_sum); /* XXX */
4ad99bae
BJ
89 ipstat.ips_badsum++;
90 goto bad;
e1d82856 91 }
4ad99bae 92
7c08c626 93#if vax
4ad99bae
BJ
94 /*
95 * Convert fields to host representation.
96 */
cdad2eb1 97 ip->ip_len = ntohs((u_short)ip->ip_len);
e6dd2097 98 ip->ip_id = ntohs(ip->ip_id);
4ad99bae 99 ip->ip_off = ntohs((u_short)ip->ip_off);
7c08c626 100#endif
e1d82856 101
d10bd5b7 102 /*
e6dd2097
BJ
103 * Check that the amount of data in the buffers
104 * is as at least much as the IP header would have us expect.
105 * Trim mbufs if longer than we expect.
106 * Drop packet if shorter than we expect.
d10bd5b7 107 */
9c0ca361 108 i = -ip->ip_len;
405c9168 109 m0 = m;
9c0ca361 110 for (;;) {
e1d82856 111 i += m->m_len;
9c0ca361
BJ
112 if (m->m_next == 0)
113 break;
114 m = m->m_next;
1dd55890 115 }
9c0ca361
BJ
116 if (i != 0) {
117 if (i < 0) {
405c9168 118 ipstat.ips_tooshort++;
4ad99bae 119 goto bad;
405c9168 120 }
9c0ca361
BJ
121 if (i <= m->m_len)
122 m->m_len -= i;
123 else
124 m_adj(m0, -i);
d10bd5b7 125 }
9c0ca361 126 m = m0;
e1d82856 127
e6dd2097
BJ
128 /*
129 * Process options and, if not destined for us,
130 * ship it on.
131 */
132 if (hlen > sizeof (struct ip))
cdad2eb1 133 ip_dooptions(ip);
ee787340
SL
134
135 /*
c124e997
SL
136 * Fast check on the first internet
137 * interface in the list.
ee787340
SL
138 */
139 if (ifinet) {
140 struct sockaddr_in *sin;
141
142 sin = (struct sockaddr_in *)&ifinet->if_addr;
143 if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
144 goto ours;
cdff57cc 145 sin = (struct sockaddr_in *)&ifinet->if_broadaddr;
c124e997
SL
146 if ((ifinet->if_flags & IFF_BROADCAST) &&
147 sin->sin_addr.s_addr == ip->ip_dst.s_addr)
148 goto ours;
ee787340
SL
149 }
150 ipaddr.sin_addr = ip->ip_dst;
151 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
a9f3e174
SL
152 if (ipprintfs)
153 printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
154 ip->ip_dst, ip->ip_ttl);
ee787340
SL
155 if (ipforwarding == 0)
156 goto bad;
157 if (ip->ip_ttl < IPTTLDEC) {
cdad2eb1 158 icmp_error(ip, ICMP_TIMXCEED, 0);
8a13b737 159 goto next;
e6dd2097 160 }
ee787340 161 ip->ip_ttl -= IPTTLDEC;
7c08c626
BJ
162 mopt = m_get(M_DONTWAIT);
163 if (mopt == 0)
164 goto bad;
165 ip_stripoptions(ip, mopt);
ee787340 166
c124e997 167 /* last 0 here means no directed broadcast */
ee787340 168 (void) ip_output(m0, mopt, 0, 0);
8a13b737 169 goto next;
d10bd5b7 170 }
e1d82856 171
ee787340 172ours:
e6dd2097
BJ
173 /*
174 * Look for queue of fragments
175 * of this datagram.
176 */
177 for (fp = ipq.next; fp != &ipq; fp = fp->next)
178 if (ip->ip_id == fp->ipq_id &&
179 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
180 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
181 ip->ip_p == fp->ipq_p)
182 goto found;
183 fp = 0;
184found:
e1d82856 185
e6dd2097
BJ
186 /*
187 * Adjust ip_len to not reflect header,
188 * set ip_mff if more fragments are expected,
189 * convert offset of this to bytes.
190 */
191 ip->ip_len -= hlen;
eb44bfb2 192 ((struct ipasfrag *)ip)->ipf_mff = 0;
e6dd2097 193 if (ip->ip_off & IP_MF)
eb44bfb2 194 ((struct ipasfrag *)ip)->ipf_mff = 1;
e6dd2097 195 ip->ip_off <<= 3;
e1d82856 196
e6dd2097
BJ
197 /*
198 * If datagram marked as having more fragments
199 * or if this is not the first fragment,
200 * attempt reassembly; if it succeeds, proceed.
201 */
eb44bfb2
BJ
202 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
203 ip = ip_reass((struct ipasfrag *)ip, fp);
e6dd2097 204 if (ip == 0)
8a13b737 205 goto next;
e6dd2097
BJ
206 hlen = ip->ip_hl << 2;
207 m = dtom(ip);
208 } else
209 if (fp)
210 (void) ip_freef(fp);
4ad99bae
BJ
211
212 /*
213 * Switch out to protocol's input routine.
214 */
eb44bfb2 215 (*protosw[ip_protox[ip->ip_p]].pr_input)(m);
8a13b737 216 goto next;
4ad99bae
BJ
217bad:
218 m_freem(m);
8a13b737 219 goto next;
e6dd2097 220}
e1d82856 221
e6dd2097
BJ
222/*
223 * Take incoming datagram fragment and try to
4ad99bae 224 * reassemble it into whole datagram. If a chain for
e6dd2097
BJ
225 * reassembly of this datagram already exists, then it
226 * is given as fp; otherwise have to make a chain.
227 */
228struct ip *
229ip_reass(ip, fp)
eb44bfb2 230 register struct ipasfrag *ip;
e6dd2097
BJ
231 register struct ipq *fp;
232{
233 register struct mbuf *m = dtom(ip);
eb44bfb2 234 register struct ipasfrag *q;
e6dd2097
BJ
235 struct mbuf *t;
236 int hlen = ip->ip_hl << 2;
237 int i, next;
4ad99bae 238COUNT(IP_REASS);
d10bd5b7 239
e6dd2097
BJ
240 /*
241 * Presence of header sizes in mbufs
242 * would confuse code below.
243 */
244 m->m_off += hlen;
245 m->m_len -= hlen;
d10bd5b7 246
e6dd2097
BJ
247 /*
248 * If first fragment to arrive, create a reassembly queue.
249 */
250 if (fp == 0) {
e6b33a03 251 if ((t = m_get(M_WAIT)) == NULL)
e6dd2097
BJ
252 goto dropfrag;
253 t->m_off = MMINOFF;
254 fp = mtod(t, struct ipq *);
255 insque(fp, &ipq);
256 fp->ipq_ttl = IPFRAGTTL;
257 fp->ipq_p = ip->ip_p;
258 fp->ipq_id = ip->ip_id;
eb44bfb2
BJ
259 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
260 fp->ipq_src = ((struct ip *)ip)->ip_src;
261 fp->ipq_dst = ((struct ip *)ip)->ip_dst;
405c9168
BJ
262 q = (struct ipasfrag *)fp;
263 goto insert;
e6dd2097 264 }
e1d82856 265
e6dd2097
BJ
266 /*
267 * Find a segment which begins after this one does.
268 */
eb44bfb2 269 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
270 if (q->ip_off > ip->ip_off)
271 break;
e1d82856 272
e6dd2097
BJ
273 /*
274 * If there is a preceding segment, it may provide some of
275 * our data already. If so, drop the data from the incoming
276 * segment. If it provides all of our data, drop us.
277 */
eb44bfb2
BJ
278 if (q->ipf_prev != (struct ipasfrag *)fp) {
279 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
e6dd2097
BJ
280 if (i > 0) {
281 if (i >= ip->ip_len)
282 goto dropfrag;
283 m_adj(dtom(ip), i);
284 ip->ip_off += i;
285 ip->ip_len -= i;
e1d82856 286 }
d10bd5b7 287 }
e1d82856 288
e6dd2097
BJ
289 /*
290 * While we overlap succeeding segments trim them or,
291 * if they are completely covered, dequeue them.
292 */
eb44bfb2 293 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
e6dd2097
BJ
294 i = (ip->ip_off + ip->ip_len) - q->ip_off;
295 if (i < q->ip_len) {
296 q->ip_len -= i;
c107df34 297 q->ip_off += i;
e6dd2097
BJ
298 m_adj(dtom(q), i);
299 break;
300 }
eb44bfb2
BJ
301 q = q->ipf_next;
302 m_freem(dtom(q->ipf_prev));
303 ip_deq(q->ipf_prev);
e6dd2097 304 }
e1d82856 305
405c9168 306insert:
e6dd2097
BJ
307 /*
308 * Stick new segment in its place;
309 * check for complete reassembly.
310 */
eb44bfb2 311 ip_enq(ip, q->ipf_prev);
e6dd2097 312 next = 0;
eb44bfb2 313 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
e6dd2097
BJ
314 if (q->ip_off != next)
315 return (0);
316 next += q->ip_len;
317 }
eb44bfb2 318 if (q->ipf_prev->ipf_mff)
e6dd2097 319 return (0);
e1d82856 320
e6dd2097
BJ
321 /*
322 * Reassembly is complete; concatenate fragments.
323 */
324 q = fp->ipq_next;
325 m = dtom(q);
326 t = m->m_next;
327 m->m_next = 0;
328 m_cat(m, t);
dfb346d0
BJ
329 q = q->ipf_next;
330 while (q != (struct ipasfrag *)fp) {
331 t = dtom(q);
332 q = q->ipf_next;
333 m_cat(m, t);
334 }
e1d82856 335
e6dd2097
BJ
336 /*
337 * Create header for new ip packet by
338 * modifying header of first packet;
339 * dequeue and discard fragment reassembly header.
340 * Make header visible.
341 */
342 ip = fp->ipq_next;
343 ip->ip_len = next;
eb44bfb2
BJ
344 ((struct ip *)ip)->ip_src = fp->ipq_src;
345 ((struct ip *)ip)->ip_dst = fp->ipq_dst;
e6dd2097 346 remque(fp);
cdad2eb1 347 (void) m_free(dtom(fp));
e6dd2097 348 m = dtom(ip);
eb44bfb2
BJ
349 m->m_len += sizeof (struct ipasfrag);
350 m->m_off -= sizeof (struct ipasfrag);
351 return ((struct ip *)ip);
e6dd2097
BJ
352
353dropfrag:
354 m_freem(m);
355 return (0);
e1d82856
BJ
356}
357
e6dd2097
BJ
358/*
359 * Free a fragment reassembly header and all
360 * associated datagrams.
361 */
362struct ipq *
363ip_freef(fp)
364 struct ipq *fp;
e1d82856 365{
eb44bfb2 366 register struct ipasfrag *q;
e6dd2097 367 struct mbuf *m;
4ad99bae 368COUNT(IP_FREEF);
e6dd2097 369
eb44bfb2 370 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
371 m_freem(dtom(q));
372 m = dtom(fp);
373 fp = fp->next;
374 remque(fp->prev);
cdad2eb1 375 (void) m_free(m);
e6dd2097 376 return (fp);
e1d82856
BJ
377}
378
e6dd2097
BJ
379/*
380 * Put an ip fragment on a reassembly chain.
381 * Like insque, but pointers in middle of structure.
382 */
383ip_enq(p, prev)
eb44bfb2 384 register struct ipasfrag *p, *prev;
e1d82856 385{
e1d82856 386
4ad99bae 387COUNT(IP_ENQ);
eb44bfb2
BJ
388 p->ipf_prev = prev;
389 p->ipf_next = prev->ipf_next;
390 prev->ipf_next->ipf_prev = p;
391 prev->ipf_next = p;
e1d82856
BJ
392}
393
e6dd2097
BJ
394/*
395 * To ip_enq as remque is to insque.
396 */
397ip_deq(p)
eb44bfb2 398 register struct ipasfrag *p;
e1d82856 399{
e6dd2097 400
4ad99bae 401COUNT(IP_DEQ);
eb44bfb2
BJ
402 p->ipf_prev->ipf_next = p->ipf_next;
403 p->ipf_next->ipf_prev = p->ipf_prev;
e1d82856
BJ
404}
405
e6dd2097
BJ
406/*
407 * IP timer processing;
408 * if a timer expires on a reassembly
409 * queue, discard it.
410 */
d52566dd 411ip_slowtimo()
e1d82856
BJ
412{
413 register struct ipq *fp;
e6dd2097 414 int s = splnet();
e1d82856 415
4ad99bae 416COUNT(IP_SLOWTIMO);
4aed14e3
BJ
417 fp = ipq.next;
418 if (fp == 0) {
419 splx(s);
420 return;
421 }
422 while (fp != &ipq)
e6dd2097
BJ
423 if (--fp->ipq_ttl == 0)
424 fp = ip_freef(fp);
425 else
426 fp = fp->next;
e6dd2097 427 splx(s);
e1d82856
BJ
428}
429
4ad99bae
BJ
430/*
431 * Drain off all datagram fragments.
432 */
d52566dd
BJ
433ip_drain()
434{
435
4ad99bae
BJ
436COUNT(IP_DRAIN);
437 while (ipq.next != &ipq)
438 (void) ip_freef(ipq.next);
d52566dd 439}
2b4b57cd 440
e6dd2097
BJ
441/*
442 * Do option processing on a datagram,
443 * possibly discarding it if bad options
444 * are encountered.
445 */
446ip_dooptions(ip)
447 struct ip *ip;
e1d82856 448{
e6dd2097 449 register u_char *cp;
cdad2eb1 450 int opt, optlen, cnt;
2b4b57cd 451 struct in_addr *sin;
d52566dd 452 register struct ip_timestamp *ipt;
4ad99bae
BJ
453 register struct ifnet *ifp;
454 struct in_addr t;
e6dd2097 455
4ad99bae 456COUNT(IP_DOOPTIONS);
e6dd2097
BJ
457 cp = (u_char *)(ip + 1);
458 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
459 for (; cnt > 0; cnt -= optlen, cp += optlen) {
460 opt = cp[0];
461 if (opt == IPOPT_EOL)
462 break;
463 if (opt == IPOPT_NOP)
464 optlen = 1;
465 else
466 optlen = cp[1];
467 switch (opt) {
e1d82856 468
e6dd2097
BJ
469 default:
470 break;
e1d82856 471
4ad99bae
BJ
472 /*
473 * Source routing with record.
474 * Find interface with current destination address.
475 * If none on this machine then drop if strictly routed,
476 * or do nothing if loosely routed.
477 * Record interface address and bring up next address
478 * component. If strictly routed make sure next
479 * address on directly accessible net.
480 */
e6dd2097 481 case IPOPT_LSRR:
d52566dd 482 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
e6dd2097 483 break;
2b4b57cd 484 sin = (struct in_addr *)(cp + cp[2]);
ee787340
SL
485 ipaddr.sin_addr = *sin;
486 ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
4ad99bae
BJ
487 if (ifp == 0) {
488 if (opt == IPOPT_SSRR)
489 goto bad;
490 break;
e6dd2097 491 }
4ad99bae
BJ
492 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
493 cp[2] += 4;
494 if (cp[2] > optlen - (sizeof (long) - 1))
495 break;
496 ip->ip_dst = sin[1];
ee787340
SL
497 if (opt == IPOPT_SSRR &&
498 if_ifonnetof(ip->ip_dst.s_net) == 0)
4ad99bae 499 goto bad;
e6dd2097
BJ
500 break;
501
502 case IPOPT_TS:
d52566dd
BJ
503 ipt = (struct ip_timestamp *)cp;
504 if (ipt->ipt_len < 5)
e6dd2097 505 goto bad;
d52566dd
BJ
506 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
507 if (++ipt->ipt_oflw == 0)
e6dd2097 508 goto bad;
e6dd2097
BJ
509 break;
510 }
2b4b57cd 511 sin = (struct in_addr *)(cp+cp[2]);
d52566dd 512 switch (ipt->ipt_flg) {
e1d82856 513
e6dd2097
BJ
514 case IPOPT_TS_TSONLY:
515 break;
e1d82856 516
e6dd2097 517 case IPOPT_TS_TSANDADDR:
d52566dd 518 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 519 goto bad;
ee787340
SL
520 if (ifinet == 0)
521 goto bad; /* ??? */
522 *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
e6dd2097
BJ
523 break;
524
525 case IPOPT_TS_PRESPEC:
ee787340
SL
526 ipaddr.sin_addr = *sin;
527 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
4ad99bae 528 continue;
d52566dd 529 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 530 goto bad;
d52566dd 531 ipt->ipt_ptr += 4;
e1d82856
BJ
532 break;
533
534 default:
e6dd2097 535 goto bad;
e1d82856 536 }
2b4b57cd 537 *(n_time *)sin = iptime();
d52566dd 538 ipt->ipt_ptr += 4;
e6dd2097 539 }
e1d82856 540 }
cdad2eb1 541 return;
e6dd2097
BJ
542bad:
543 /* SHOULD FORCE ICMP MESSAGE */
cdad2eb1 544 return;
e1d82856
BJ
545}
546
e6dd2097 547/*
4ad99bae
BJ
548 * Strip out IP options, at higher
549 * level protocol in the kernel.
550 * Second argument is buffer to which options
551 * will be moved, and return value is their length.
e6dd2097 552 */
7c08c626 553ip_stripoptions(ip, mopt)
e6dd2097 554 struct ip *ip;
7c08c626 555 struct mbuf *mopt;
e1d82856 556{
e6dd2097
BJ
557 register int i;
558 register struct mbuf *m;
e6dd2097 559 int olen;
4ad99bae 560COUNT(IP_STRIPOPTIONS);
e6dd2097
BJ
561
562 olen = (ip->ip_hl<<2) - sizeof (struct ip);
4ad99bae
BJ
563 m = dtom(ip);
564 ip++;
7c08c626
BJ
565 if (mopt) {
566 mopt->m_len = olen;
567 mopt->m_off = MMINOFF;
568 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
569 }
e6dd2097 570 i = m->m_len - (sizeof (struct ip) + olen);
cdad2eb1 571 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
4aed14e3 572 m->m_len -= olen;
e1d82856 573}