#if vax --> #if vax || pdp11; also tp = 0 before goto drop's
[unix-history] / usr / src / sys / netinet / ip_input.c
CommitLineData
dfb346d0 1/* ip_input.c 1.34 82/03/23 */
6e8b2eca 2
e1d82856 3#include "../h/param.h"
d10bd5b7 4#include "../h/systm.h"
e6dd2097
BJ
5#include "../h/clock.h"
6#include "../h/mbuf.h"
eb44bfb2 7#include "../h/protosw.h"
2b4b57cd 8#include "../h/socket.h"
8a13b737
BJ
9#include "../net/in.h"
10#include "../net/in_systm.h"
4ad99bae 11#include "../net/if.h"
8a13b737 12#include "../net/ip.h" /* belongs before in.h */
eb44bfb2 13#include "../net/ip_var.h"
d52566dd
BJ
14#include "../net/ip_icmp.h"
15#include "../net/tcp.h"
e6dd2097 16
eb44bfb2 17u_char ip_protox[IPPROTO_MAX];
1e977657 18int ipqmaxlen = IFQ_MAXLEN;
eb44bfb2 19
d52566dd 20/*
b454c3ea 21 * IP initialization: fill in IP protocol switch table.
405c9168 22 * All protocols not implemented in kernel go to raw IP protocol handler.
d52566dd
BJ
23 */
24ip_init()
25{
eb44bfb2
BJ
26 register struct protosw *pr;
27 register int i;
eb44bfb2 28
4ad99bae 29COUNT(IP_INIT);
eb44bfb2
BJ
30 pr = pffindproto(PF_INET, IPPROTO_RAW);
31 if (pr == 0)
32 panic("ip_init");
33 for (i = 0; i < IPPROTO_MAX; i++)
34 ip_protox[i] = pr - protosw;
35 for (pr = protosw; pr <= protoswLAST; pr++)
36 if (pr->pr_family == PF_INET &&
37 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
38 ip_protox[pr->pr_protocol] = pr - protosw;
d52566dd
BJ
39 ipq.next = ipq.prev = &ipq;
40 ip_id = time & 0xffff;
1e977657 41 ipintrq.ifq_maxlen = ipqmaxlen;
d52566dd
BJ
42}
43
eb44bfb2 44u_char ipcksum = 1;
e6dd2097
BJ
45struct ip *ip_reass();
46
e6dd2097
BJ
47/*
48 * Ip input routine. Checksum and byte swap header. If fragmented
49 * try to reassamble. If complete and fragment queue exists, discard.
50 * Process options. Pass to next level.
51 */
8a13b737 52ipintr()
e1d82856 53{
2b4b57cd 54 register struct ip *ip;
8a13b737 55 register struct mbuf *m;
7c08c626 56 struct mbuf *m0, *mopt;
e6dd2097 57 register int i;
e1d82856 58 register struct ipq *fp;
8a13b737 59 int hlen, s;
e1d82856 60
8a13b737
BJ
61COUNT(IPINTR);
62next:
e6dd2097 63 /*
8a13b737
BJ
64 * Get next datagram off input queue and get IP header
65 * in first mbuf.
e6dd2097 66 */
8a13b737
BJ
67 s = splimp();
68 IF_DEQUEUE(&ipintrq, m);
69 splx(s);
7ac98d3c 70 if (m == 0)
8a13b737 71 return;
9411b6be
BJ
72 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
73 (m = m_pullup(m, sizeof (struct ip))) == 0)
74 return;
e6dd2097 75 ip = mtod(m, struct ip *);
405c9168 76 if ((hlen = ip->ip_hl << 2) > m->m_len) {
9411b6be
BJ
77 if ((m = m_pullup(m, hlen)) == 0)
78 return;
405c9168
BJ
79 ip = mtod(m, struct ip *);
80 }
4ad99bae 81 if (ipcksum)
7c08c626 82 if (ip->ip_sum = in_cksum(m, hlen)) {
405c9168 83 printf("ip_sum %x\n", ip->ip_sum); /* XXX */
4ad99bae
BJ
84 ipstat.ips_badsum++;
85 goto bad;
e1d82856 86 }
4ad99bae 87
7c08c626 88#if vax
4ad99bae
BJ
89 /*
90 * Convert fields to host representation.
91 */
cdad2eb1 92 ip->ip_len = ntohs((u_short)ip->ip_len);
e6dd2097 93 ip->ip_id = ntohs(ip->ip_id);
4ad99bae 94 ip->ip_off = ntohs((u_short)ip->ip_off);
7c08c626 95#endif
e1d82856 96
d10bd5b7 97 /*
e6dd2097
BJ
98 * Check that the amount of data in the buffers
99 * is as at least much as the IP header would have us expect.
100 * Trim mbufs if longer than we expect.
101 * Drop packet if shorter than we expect.
d10bd5b7 102 */
e6dd2097 103 i = 0;
405c9168 104 m0 = m;
1dd55890
BJ
105 for (; m != NULL; m = m->m_next) {
106 if (m->m_free) panic("ipinput already free");
e1d82856 107 i += m->m_len;
1dd55890 108 }
e6dd2097
BJ
109 m = m0;
110 if (i != ip->ip_len) {
405c9168
BJ
111 if (i < ip->ip_len) {
112 ipstat.ips_tooshort++;
4ad99bae 113 goto bad;
405c9168 114 }
e6dd2097 115 m_adj(m, ip->ip_len - i);
d10bd5b7 116 }
e1d82856 117
e6dd2097
BJ
118 /*
119 * Process options and, if not destined for us,
120 * ship it on.
121 */
122 if (hlen > sizeof (struct ip))
cdad2eb1 123 ip_dooptions(ip);
8a13b737 124 if (ifnet && ip->ip_dst.s_addr != ifnet->if_addr.s_addr &&
92c7b8c1 125 if_ifwithaddr(ip->ip_dst) == 0) {
1e977657
BJ
126
127 goto bad;
128#ifdef notdef
129 printf("ip->ip_dst %x ip->ip_ttl %x\n",
130 ip->ip_dst, ip->ip_ttl);
e6dd2097 131 if (--ip->ip_ttl == 0) {
cdad2eb1 132 icmp_error(ip, ICMP_TIMXCEED, 0);
8a13b737 133 goto next;
e6dd2097 134 }
7c08c626
BJ
135 mopt = m_get(M_DONTWAIT);
136 if (mopt == 0)
137 goto bad;
138 ip_stripoptions(ip, mopt);
1e977657
BJ
139 /* 0 here means no directed broadcast */
140 (void) ip_output(m0, mopt, 0);
8a13b737 141 goto next;
1e977657 142#endif
d10bd5b7 143 }
e1d82856 144
e6dd2097
BJ
145 /*
146 * Look for queue of fragments
147 * of this datagram.
148 */
149 for (fp = ipq.next; fp != &ipq; fp = fp->next)
150 if (ip->ip_id == fp->ipq_id &&
151 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
152 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
153 ip->ip_p == fp->ipq_p)
154 goto found;
155 fp = 0;
156found:
e1d82856 157
e6dd2097
BJ
158 /*
159 * Adjust ip_len to not reflect header,
160 * set ip_mff if more fragments are expected,
161 * convert offset of this to bytes.
162 */
163 ip->ip_len -= hlen;
eb44bfb2 164 ((struct ipasfrag *)ip)->ipf_mff = 0;
e6dd2097 165 if (ip->ip_off & IP_MF)
eb44bfb2 166 ((struct ipasfrag *)ip)->ipf_mff = 1;
e6dd2097 167 ip->ip_off <<= 3;
e1d82856 168
e6dd2097
BJ
169 /*
170 * If datagram marked as having more fragments
171 * or if this is not the first fragment,
172 * attempt reassembly; if it succeeds, proceed.
173 */
eb44bfb2
BJ
174 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
175 ip = ip_reass((struct ipasfrag *)ip, fp);
e6dd2097 176 if (ip == 0)
8a13b737 177 goto next;
e6dd2097
BJ
178 hlen = ip->ip_hl << 2;
179 m = dtom(ip);
180 } else
181 if (fp)
182 (void) ip_freef(fp);
4ad99bae
BJ
183
184 /*
185 * Switch out to protocol's input routine.
186 */
eb44bfb2 187 (*protosw[ip_protox[ip->ip_p]].pr_input)(m);
8a13b737 188 goto next;
4ad99bae
BJ
189bad:
190 m_freem(m);
8a13b737 191 goto next;
e6dd2097 192}
e1d82856 193
e6dd2097
BJ
194/*
195 * Take incoming datagram fragment and try to
4ad99bae 196 * reassemble it into whole datagram. If a chain for
e6dd2097
BJ
197 * reassembly of this datagram already exists, then it
198 * is given as fp; otherwise have to make a chain.
199 */
200struct ip *
201ip_reass(ip, fp)
eb44bfb2 202 register struct ipasfrag *ip;
e6dd2097
BJ
203 register struct ipq *fp;
204{
205 register struct mbuf *m = dtom(ip);
eb44bfb2 206 register struct ipasfrag *q;
e6dd2097
BJ
207 struct mbuf *t;
208 int hlen = ip->ip_hl << 2;
209 int i, next;
4ad99bae 210COUNT(IP_REASS);
d10bd5b7 211
e6dd2097
BJ
212 /*
213 * Presence of header sizes in mbufs
214 * would confuse code below.
215 */
216 m->m_off += hlen;
217 m->m_len -= hlen;
d10bd5b7 218
e6dd2097
BJ
219 /*
220 * If first fragment to arrive, create a reassembly queue.
221 */
222 if (fp == 0) {
e6b33a03 223 if ((t = m_get(M_WAIT)) == NULL)
e6dd2097
BJ
224 goto dropfrag;
225 t->m_off = MMINOFF;
226 fp = mtod(t, struct ipq *);
227 insque(fp, &ipq);
228 fp->ipq_ttl = IPFRAGTTL;
229 fp->ipq_p = ip->ip_p;
230 fp->ipq_id = ip->ip_id;
eb44bfb2
BJ
231 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
232 fp->ipq_src = ((struct ip *)ip)->ip_src;
233 fp->ipq_dst = ((struct ip *)ip)->ip_dst;
405c9168
BJ
234 q = (struct ipasfrag *)fp;
235 goto insert;
e6dd2097 236 }
e1d82856 237
e6dd2097
BJ
238 /*
239 * Find a segment which begins after this one does.
240 */
eb44bfb2 241 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
242 if (q->ip_off > ip->ip_off)
243 break;
e1d82856 244
e6dd2097
BJ
245 /*
246 * If there is a preceding segment, it may provide some of
247 * our data already. If so, drop the data from the incoming
248 * segment. If it provides all of our data, drop us.
249 */
eb44bfb2
BJ
250 if (q->ipf_prev != (struct ipasfrag *)fp) {
251 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
e6dd2097
BJ
252 if (i > 0) {
253 if (i >= ip->ip_len)
254 goto dropfrag;
255 m_adj(dtom(ip), i);
256 ip->ip_off += i;
257 ip->ip_len -= i;
e1d82856 258 }
d10bd5b7 259 }
e1d82856 260
e6dd2097
BJ
261 /*
262 * While we overlap succeeding segments trim them or,
263 * if they are completely covered, dequeue them.
264 */
eb44bfb2 265 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
e6dd2097
BJ
266 i = (ip->ip_off + ip->ip_len) - q->ip_off;
267 if (i < q->ip_len) {
268 q->ip_len -= i;
c107df34 269 q->ip_off += i;
e6dd2097
BJ
270 m_adj(dtom(q), i);
271 break;
272 }
eb44bfb2
BJ
273 q = q->ipf_next;
274 m_freem(dtom(q->ipf_prev));
275 ip_deq(q->ipf_prev);
e6dd2097 276 }
e1d82856 277
405c9168 278insert:
e6dd2097
BJ
279 /*
280 * Stick new segment in its place;
281 * check for complete reassembly.
282 */
eb44bfb2 283 ip_enq(ip, q->ipf_prev);
e6dd2097 284 next = 0;
eb44bfb2 285 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
e6dd2097
BJ
286 if (q->ip_off != next)
287 return (0);
288 next += q->ip_len;
289 }
eb44bfb2 290 if (q->ipf_prev->ipf_mff)
e6dd2097 291 return (0);
e1d82856 292
e6dd2097
BJ
293 /*
294 * Reassembly is complete; concatenate fragments.
295 */
296 q = fp->ipq_next;
297 m = dtom(q);
298 t = m->m_next;
299 m->m_next = 0;
300 m_cat(m, t);
dfb346d0
BJ
301 q = q->ipf_next;
302 while (q != (struct ipasfrag *)fp) {
303 t = dtom(q);
304 q = q->ipf_next;
305 m_cat(m, t);
306 }
e1d82856 307
e6dd2097
BJ
308 /*
309 * Create header for new ip packet by
310 * modifying header of first packet;
311 * dequeue and discard fragment reassembly header.
312 * Make header visible.
313 */
314 ip = fp->ipq_next;
315 ip->ip_len = next;
eb44bfb2
BJ
316 ((struct ip *)ip)->ip_src = fp->ipq_src;
317 ((struct ip *)ip)->ip_dst = fp->ipq_dst;
e6dd2097 318 remque(fp);
cdad2eb1 319 (void) m_free(dtom(fp));
e6dd2097 320 m = dtom(ip);
eb44bfb2
BJ
321 m->m_len += sizeof (struct ipasfrag);
322 m->m_off -= sizeof (struct ipasfrag);
323 return ((struct ip *)ip);
e6dd2097
BJ
324
325dropfrag:
326 m_freem(m);
327 return (0);
e1d82856
BJ
328}
329
e6dd2097
BJ
330/*
331 * Free a fragment reassembly header and all
332 * associated datagrams.
333 */
334struct ipq *
335ip_freef(fp)
336 struct ipq *fp;
e1d82856 337{
eb44bfb2 338 register struct ipasfrag *q;
e6dd2097 339 struct mbuf *m;
4ad99bae 340COUNT(IP_FREEF);
e6dd2097 341
eb44bfb2 342 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
343 m_freem(dtom(q));
344 m = dtom(fp);
345 fp = fp->next;
346 remque(fp->prev);
cdad2eb1 347 (void) m_free(m);
e6dd2097 348 return (fp);
e1d82856
BJ
349}
350
e6dd2097
BJ
351/*
352 * Put an ip fragment on a reassembly chain.
353 * Like insque, but pointers in middle of structure.
354 */
355ip_enq(p, prev)
eb44bfb2 356 register struct ipasfrag *p, *prev;
e1d82856 357{
e1d82856 358
4ad99bae 359COUNT(IP_ENQ);
eb44bfb2
BJ
360 p->ipf_prev = prev;
361 p->ipf_next = prev->ipf_next;
362 prev->ipf_next->ipf_prev = p;
363 prev->ipf_next = p;
e1d82856
BJ
364}
365
e6dd2097
BJ
366/*
367 * To ip_enq as remque is to insque.
368 */
369ip_deq(p)
eb44bfb2 370 register struct ipasfrag *p;
e1d82856 371{
e6dd2097 372
4ad99bae 373COUNT(IP_DEQ);
eb44bfb2
BJ
374 p->ipf_prev->ipf_next = p->ipf_next;
375 p->ipf_next->ipf_prev = p->ipf_prev;
e1d82856
BJ
376}
377
e6dd2097
BJ
378/*
379 * IP timer processing;
380 * if a timer expires on a reassembly
381 * queue, discard it.
382 */
d52566dd 383ip_slowtimo()
e1d82856
BJ
384{
385 register struct ipq *fp;
e6dd2097 386 int s = splnet();
e1d82856 387
4ad99bae 388COUNT(IP_SLOWTIMO);
4aed14e3
BJ
389 fp = ipq.next;
390 if (fp == 0) {
391 splx(s);
392 return;
393 }
394 while (fp != &ipq)
e6dd2097
BJ
395 if (--fp->ipq_ttl == 0)
396 fp = ip_freef(fp);
397 else
398 fp = fp->next;
e6dd2097 399 splx(s);
e1d82856
BJ
400}
401
4ad99bae
BJ
402/*
403 * Drain off all datagram fragments.
404 */
d52566dd
BJ
405ip_drain()
406{
407
4ad99bae
BJ
408COUNT(IP_DRAIN);
409 while (ipq.next != &ipq)
410 (void) ip_freef(ipq.next);
d52566dd 411}
2b4b57cd 412
e6dd2097
BJ
413/*
414 * Do option processing on a datagram,
415 * possibly discarding it if bad options
416 * are encountered.
417 */
418ip_dooptions(ip)
419 struct ip *ip;
e1d82856 420{
e6dd2097 421 register u_char *cp;
cdad2eb1 422 int opt, optlen, cnt;
2b4b57cd 423 struct in_addr *sin;
d52566dd 424 register struct ip_timestamp *ipt;
4ad99bae
BJ
425 register struct ifnet *ifp;
426 struct in_addr t;
e6dd2097 427
4ad99bae 428COUNT(IP_DOOPTIONS);
e6dd2097
BJ
429 cp = (u_char *)(ip + 1);
430 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
431 for (; cnt > 0; cnt -= optlen, cp += optlen) {
432 opt = cp[0];
433 if (opt == IPOPT_EOL)
434 break;
435 if (opt == IPOPT_NOP)
436 optlen = 1;
437 else
438 optlen = cp[1];
439 switch (opt) {
e1d82856 440
e6dd2097
BJ
441 default:
442 break;
e1d82856 443
4ad99bae
BJ
444 /*
445 * Source routing with record.
446 * Find interface with current destination address.
447 * If none on this machine then drop if strictly routed,
448 * or do nothing if loosely routed.
449 * Record interface address and bring up next address
450 * component. If strictly routed make sure next
451 * address on directly accessible net.
452 */
e6dd2097 453 case IPOPT_LSRR:
d52566dd 454 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
e6dd2097 455 break;
2b4b57cd 456 sin = (struct in_addr *)(cp + cp[2]);
4ad99bae
BJ
457 ifp = if_ifwithaddr(*sin);
458 if (ifp == 0) {
459 if (opt == IPOPT_SSRR)
460 goto bad;
461 break;
e6dd2097 462 }
4ad99bae
BJ
463 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
464 cp[2] += 4;
465 if (cp[2] > optlen - (sizeof (long) - 1))
466 break;
467 ip->ip_dst = sin[1];
468 if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
469 goto bad;
e6dd2097
BJ
470 break;
471
472 case IPOPT_TS:
d52566dd
BJ
473 ipt = (struct ip_timestamp *)cp;
474 if (ipt->ipt_len < 5)
e6dd2097 475 goto bad;
d52566dd
BJ
476 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
477 if (++ipt->ipt_oflw == 0)
e6dd2097 478 goto bad;
e6dd2097
BJ
479 break;
480 }
2b4b57cd 481 sin = (struct in_addr *)(cp+cp[2]);
d52566dd 482 switch (ipt->ipt_flg) {
e1d82856 483
e6dd2097
BJ
484 case IPOPT_TS_TSONLY:
485 break;
e1d82856 486
e6dd2097 487 case IPOPT_TS_TSANDADDR:
d52566dd 488 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 489 goto bad;
4ad99bae
BJ
490 /* stamp with ``first'' interface address */
491 *sin++ = ifnet->if_addr;
e6dd2097
BJ
492 break;
493
494 case IPOPT_TS_PRESPEC:
4ad99bae
BJ
495 if (if_ifwithaddr(*sin) == 0)
496 continue;
d52566dd 497 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 498 goto bad;
d52566dd 499 ipt->ipt_ptr += 4;
e1d82856
BJ
500 break;
501
502 default:
e6dd2097 503 goto bad;
e1d82856 504 }
2b4b57cd 505 *(n_time *)sin = iptime();
d52566dd 506 ipt->ipt_ptr += 4;
e6dd2097 507 }
e1d82856 508 }
cdad2eb1 509 return;
e6dd2097
BJ
510bad:
511 /* SHOULD FORCE ICMP MESSAGE */
cdad2eb1 512 return;
e1d82856
BJ
513}
514
e6dd2097 515/*
4ad99bae
BJ
516 * Strip out IP options, at higher
517 * level protocol in the kernel.
518 * Second argument is buffer to which options
519 * will be moved, and return value is their length.
e6dd2097 520 */
7c08c626 521ip_stripoptions(ip, mopt)
e6dd2097 522 struct ip *ip;
7c08c626 523 struct mbuf *mopt;
e1d82856 524{
e6dd2097
BJ
525 register int i;
526 register struct mbuf *m;
e6dd2097 527 int olen;
4ad99bae 528COUNT(IP_STRIPOPTIONS);
e6dd2097
BJ
529
530 olen = (ip->ip_hl<<2) - sizeof (struct ip);
4ad99bae
BJ
531 m = dtom(ip);
532 ip++;
7c08c626
BJ
533 if (mopt) {
534 mopt->m_len = olen;
535 mopt->m_off = MMINOFF;
536 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
537 }
e6dd2097 538 i = m->m_len - (sizeof (struct ip) + olen);
cdad2eb1 539 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
4aed14e3 540 m->m_len -= olen;
e1d82856 541}