much more generous NFILE
[unix-history] / usr / src / sys / netinet / ip_input.c
CommitLineData
a9f3e174 1/* ip_input.c 1.38 82/03/31 */
6e8b2eca 2
e1d82856 3#include "../h/param.h"
d10bd5b7 4#include "../h/systm.h"
e6dd2097
BJ
5#include "../h/clock.h"
6#include "../h/mbuf.h"
eb44bfb2 7#include "../h/protosw.h"
2b4b57cd 8#include "../h/socket.h"
8a13b737
BJ
9#include "../net/in.h"
10#include "../net/in_systm.h"
4ad99bae 11#include "../net/if.h"
8a13b737 12#include "../net/ip.h" /* belongs before in.h */
eb44bfb2 13#include "../net/ip_var.h"
d52566dd
BJ
14#include "../net/ip_icmp.h"
15#include "../net/tcp.h"
e6dd2097 16
eb44bfb2 17u_char ip_protox[IPPROTO_MAX];
1e977657 18int ipqmaxlen = IFQ_MAXLEN;
ee787340 19struct ifnet *ifinet; /* first inet interface */
eb44bfb2 20
d52566dd 21/*
b454c3ea 22 * IP initialization: fill in IP protocol switch table.
405c9168 23 * All protocols not implemented in kernel go to raw IP protocol handler.
d52566dd
BJ
24 */
25ip_init()
26{
eb44bfb2
BJ
27 register struct protosw *pr;
28 register int i;
eb44bfb2 29
4ad99bae 30COUNT(IP_INIT);
eb44bfb2
BJ
31 pr = pffindproto(PF_INET, IPPROTO_RAW);
32 if (pr == 0)
33 panic("ip_init");
34 for (i = 0; i < IPPROTO_MAX; i++)
35 ip_protox[i] = pr - protosw;
36 for (pr = protosw; pr <= protoswLAST; pr++)
37 if (pr->pr_family == PF_INET &&
38 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
39 ip_protox[pr->pr_protocol] = pr - protosw;
d52566dd
BJ
40 ipq.next = ipq.prev = &ipq;
41 ip_id = time & 0xffff;
1e977657 42 ipintrq.ifq_maxlen = ipqmaxlen;
ee787340 43 ifinet = if_ifwithaf(AF_INET);
d52566dd
BJ
44}
45
eb44bfb2 46u_char ipcksum = 1;
e6dd2097 47struct ip *ip_reass();
a9f3e174
SL
48int ipforwarding = 1;
49int ipprintfs = 0;
ee787340 50struct sockaddr_in ipaddr = { AF_INET };
e6dd2097 51
e6dd2097
BJ
52/*
53 * Ip input routine. Checksum and byte swap header. If fragmented
54 * try to reassamble. If complete and fragment queue exists, discard.
55 * Process options. Pass to next level.
56 */
8a13b737 57ipintr()
e1d82856 58{
2b4b57cd 59 register struct ip *ip;
8a13b737 60 register struct mbuf *m;
7c08c626 61 struct mbuf *m0, *mopt;
e6dd2097 62 register int i;
e1d82856 63 register struct ipq *fp;
8a13b737 64 int hlen, s;
e1d82856 65
8a13b737
BJ
66COUNT(IPINTR);
67next:
e6dd2097 68 /*
8a13b737
BJ
69 * Get next datagram off input queue and get IP header
70 * in first mbuf.
e6dd2097 71 */
8a13b737
BJ
72 s = splimp();
73 IF_DEQUEUE(&ipintrq, m);
74 splx(s);
7ac98d3c 75 if (m == 0)
8a13b737 76 return;
9411b6be
BJ
77 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
78 (m = m_pullup(m, sizeof (struct ip))) == 0)
79 return;
e6dd2097 80 ip = mtod(m, struct ip *);
405c9168 81 if ((hlen = ip->ip_hl << 2) > m->m_len) {
9411b6be
BJ
82 if ((m = m_pullup(m, hlen)) == 0)
83 return;
405c9168
BJ
84 ip = mtod(m, struct ip *);
85 }
4ad99bae 86 if (ipcksum)
7c08c626 87 if (ip->ip_sum = in_cksum(m, hlen)) {
405c9168 88 printf("ip_sum %x\n", ip->ip_sum); /* XXX */
4ad99bae
BJ
89 ipstat.ips_badsum++;
90 goto bad;
e1d82856 91 }
4ad99bae 92
7c08c626 93#if vax
4ad99bae
BJ
94 /*
95 * Convert fields to host representation.
96 */
cdad2eb1 97 ip->ip_len = ntohs((u_short)ip->ip_len);
e6dd2097 98 ip->ip_id = ntohs(ip->ip_id);
4ad99bae 99 ip->ip_off = ntohs((u_short)ip->ip_off);
7c08c626 100#endif
e1d82856 101
d10bd5b7 102 /*
e6dd2097
BJ
103 * Check that the amount of data in the buffers
104 * is as at least much as the IP header would have us expect.
105 * Trim mbufs if longer than we expect.
106 * Drop packet if shorter than we expect.
d10bd5b7 107 */
e6dd2097 108 i = 0;
405c9168 109 m0 = m;
1dd55890
BJ
110 for (; m != NULL; m = m->m_next) {
111 if (m->m_free) panic("ipinput already free");
e1d82856 112 i += m->m_len;
1dd55890 113 }
e6dd2097
BJ
114 m = m0;
115 if (i != ip->ip_len) {
405c9168
BJ
116 if (i < ip->ip_len) {
117 ipstat.ips_tooshort++;
4ad99bae 118 goto bad;
405c9168 119 }
e6dd2097 120 m_adj(m, ip->ip_len - i);
d10bd5b7 121 }
e1d82856 122
e6dd2097
BJ
123 /*
124 * Process options and, if not destined for us,
125 * ship it on.
126 */
127 if (hlen > sizeof (struct ip))
cdad2eb1 128 ip_dooptions(ip);
ee787340
SL
129
130 /*
c124e997
SL
131 * Fast check on the first internet
132 * interface in the list.
ee787340
SL
133 */
134 if (ifinet) {
135 struct sockaddr_in *sin;
136
137 sin = (struct sockaddr_in *)&ifinet->if_addr;
138 if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
139 goto ours;
c124e997
SL
140 if ((ifinet->if_flags & IFF_BROADCAST) &&
141 sin->sin_addr.s_addr == ip->ip_dst.s_addr)
142 goto ours;
ee787340
SL
143 }
144 ipaddr.sin_addr = ip->ip_dst;
145 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
a9f3e174
SL
146 if (ipprintfs)
147 printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
148 ip->ip_dst, ip->ip_ttl);
ee787340
SL
149 if (ipforwarding == 0)
150 goto bad;
151 if (ip->ip_ttl < IPTTLDEC) {
cdad2eb1 152 icmp_error(ip, ICMP_TIMXCEED, 0);
8a13b737 153 goto next;
e6dd2097 154 }
ee787340 155 ip->ip_ttl -= IPTTLDEC;
7c08c626
BJ
156 mopt = m_get(M_DONTWAIT);
157 if (mopt == 0)
158 goto bad;
159 ip_stripoptions(ip, mopt);
ee787340 160
c124e997 161 /* last 0 here means no directed broadcast */
ee787340 162 (void) ip_output(m0, mopt, 0, 0);
8a13b737 163 goto next;
d10bd5b7 164 }
e1d82856 165
ee787340 166ours:
e6dd2097
BJ
167 /*
168 * Look for queue of fragments
169 * of this datagram.
170 */
171 for (fp = ipq.next; fp != &ipq; fp = fp->next)
172 if (ip->ip_id == fp->ipq_id &&
173 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
174 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
175 ip->ip_p == fp->ipq_p)
176 goto found;
177 fp = 0;
178found:
e1d82856 179
e6dd2097
BJ
180 /*
181 * Adjust ip_len to not reflect header,
182 * set ip_mff if more fragments are expected,
183 * convert offset of this to bytes.
184 */
185 ip->ip_len -= hlen;
eb44bfb2 186 ((struct ipasfrag *)ip)->ipf_mff = 0;
e6dd2097 187 if (ip->ip_off & IP_MF)
eb44bfb2 188 ((struct ipasfrag *)ip)->ipf_mff = 1;
e6dd2097 189 ip->ip_off <<= 3;
e1d82856 190
e6dd2097
BJ
191 /*
192 * If datagram marked as having more fragments
193 * or if this is not the first fragment,
194 * attempt reassembly; if it succeeds, proceed.
195 */
eb44bfb2
BJ
196 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
197 ip = ip_reass((struct ipasfrag *)ip, fp);
e6dd2097 198 if (ip == 0)
8a13b737 199 goto next;
e6dd2097
BJ
200 hlen = ip->ip_hl << 2;
201 m = dtom(ip);
202 } else
203 if (fp)
204 (void) ip_freef(fp);
4ad99bae
BJ
205
206 /*
207 * Switch out to protocol's input routine.
208 */
eb44bfb2 209 (*protosw[ip_protox[ip->ip_p]].pr_input)(m);
8a13b737 210 goto next;
4ad99bae
BJ
211bad:
212 m_freem(m);
8a13b737 213 goto next;
e6dd2097 214}
e1d82856 215
e6dd2097
BJ
216/*
217 * Take incoming datagram fragment and try to
4ad99bae 218 * reassemble it into whole datagram. If a chain for
e6dd2097
BJ
219 * reassembly of this datagram already exists, then it
220 * is given as fp; otherwise have to make a chain.
221 */
222struct ip *
223ip_reass(ip, fp)
eb44bfb2 224 register struct ipasfrag *ip;
e6dd2097
BJ
225 register struct ipq *fp;
226{
227 register struct mbuf *m = dtom(ip);
eb44bfb2 228 register struct ipasfrag *q;
e6dd2097
BJ
229 struct mbuf *t;
230 int hlen = ip->ip_hl << 2;
231 int i, next;
4ad99bae 232COUNT(IP_REASS);
d10bd5b7 233
e6dd2097
BJ
234 /*
235 * Presence of header sizes in mbufs
236 * would confuse code below.
237 */
238 m->m_off += hlen;
239 m->m_len -= hlen;
d10bd5b7 240
e6dd2097
BJ
241 /*
242 * If first fragment to arrive, create a reassembly queue.
243 */
244 if (fp == 0) {
e6b33a03 245 if ((t = m_get(M_WAIT)) == NULL)
e6dd2097
BJ
246 goto dropfrag;
247 t->m_off = MMINOFF;
248 fp = mtod(t, struct ipq *);
249 insque(fp, &ipq);
250 fp->ipq_ttl = IPFRAGTTL;
251 fp->ipq_p = ip->ip_p;
252 fp->ipq_id = ip->ip_id;
eb44bfb2
BJ
253 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
254 fp->ipq_src = ((struct ip *)ip)->ip_src;
255 fp->ipq_dst = ((struct ip *)ip)->ip_dst;
405c9168
BJ
256 q = (struct ipasfrag *)fp;
257 goto insert;
e6dd2097 258 }
e1d82856 259
e6dd2097
BJ
260 /*
261 * Find a segment which begins after this one does.
262 */
eb44bfb2 263 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
264 if (q->ip_off > ip->ip_off)
265 break;
e1d82856 266
e6dd2097
BJ
267 /*
268 * If there is a preceding segment, it may provide some of
269 * our data already. If so, drop the data from the incoming
270 * segment. If it provides all of our data, drop us.
271 */
eb44bfb2
BJ
272 if (q->ipf_prev != (struct ipasfrag *)fp) {
273 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
e6dd2097
BJ
274 if (i > 0) {
275 if (i >= ip->ip_len)
276 goto dropfrag;
277 m_adj(dtom(ip), i);
278 ip->ip_off += i;
279 ip->ip_len -= i;
e1d82856 280 }
d10bd5b7 281 }
e1d82856 282
e6dd2097
BJ
283 /*
284 * While we overlap succeeding segments trim them or,
285 * if they are completely covered, dequeue them.
286 */
eb44bfb2 287 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
e6dd2097
BJ
288 i = (ip->ip_off + ip->ip_len) - q->ip_off;
289 if (i < q->ip_len) {
290 q->ip_len -= i;
c107df34 291 q->ip_off += i;
e6dd2097
BJ
292 m_adj(dtom(q), i);
293 break;
294 }
eb44bfb2
BJ
295 q = q->ipf_next;
296 m_freem(dtom(q->ipf_prev));
297 ip_deq(q->ipf_prev);
e6dd2097 298 }
e1d82856 299
405c9168 300insert:
e6dd2097
BJ
301 /*
302 * Stick new segment in its place;
303 * check for complete reassembly.
304 */
eb44bfb2 305 ip_enq(ip, q->ipf_prev);
e6dd2097 306 next = 0;
eb44bfb2 307 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
e6dd2097
BJ
308 if (q->ip_off != next)
309 return (0);
310 next += q->ip_len;
311 }
eb44bfb2 312 if (q->ipf_prev->ipf_mff)
e6dd2097 313 return (0);
e1d82856 314
e6dd2097
BJ
315 /*
316 * Reassembly is complete; concatenate fragments.
317 */
318 q = fp->ipq_next;
319 m = dtom(q);
320 t = m->m_next;
321 m->m_next = 0;
322 m_cat(m, t);
dfb346d0
BJ
323 q = q->ipf_next;
324 while (q != (struct ipasfrag *)fp) {
325 t = dtom(q);
326 q = q->ipf_next;
327 m_cat(m, t);
328 }
e1d82856 329
e6dd2097
BJ
330 /*
331 * Create header for new ip packet by
332 * modifying header of first packet;
333 * dequeue and discard fragment reassembly header.
334 * Make header visible.
335 */
336 ip = fp->ipq_next;
337 ip->ip_len = next;
eb44bfb2
BJ
338 ((struct ip *)ip)->ip_src = fp->ipq_src;
339 ((struct ip *)ip)->ip_dst = fp->ipq_dst;
e6dd2097 340 remque(fp);
cdad2eb1 341 (void) m_free(dtom(fp));
e6dd2097 342 m = dtom(ip);
eb44bfb2
BJ
343 m->m_len += sizeof (struct ipasfrag);
344 m->m_off -= sizeof (struct ipasfrag);
345 return ((struct ip *)ip);
e6dd2097
BJ
346
347dropfrag:
348 m_freem(m);
349 return (0);
e1d82856
BJ
350}
351
e6dd2097
BJ
352/*
353 * Free a fragment reassembly header and all
354 * associated datagrams.
355 */
356struct ipq *
357ip_freef(fp)
358 struct ipq *fp;
e1d82856 359{
eb44bfb2 360 register struct ipasfrag *q;
e6dd2097 361 struct mbuf *m;
4ad99bae 362COUNT(IP_FREEF);
e6dd2097 363
eb44bfb2 364 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
365 m_freem(dtom(q));
366 m = dtom(fp);
367 fp = fp->next;
368 remque(fp->prev);
cdad2eb1 369 (void) m_free(m);
e6dd2097 370 return (fp);
e1d82856
BJ
371}
372
e6dd2097
BJ
373/*
374 * Put an ip fragment on a reassembly chain.
375 * Like insque, but pointers in middle of structure.
376 */
377ip_enq(p, prev)
eb44bfb2 378 register struct ipasfrag *p, *prev;
e1d82856 379{
e1d82856 380
4ad99bae 381COUNT(IP_ENQ);
eb44bfb2
BJ
382 p->ipf_prev = prev;
383 p->ipf_next = prev->ipf_next;
384 prev->ipf_next->ipf_prev = p;
385 prev->ipf_next = p;
e1d82856
BJ
386}
387
e6dd2097
BJ
388/*
389 * To ip_enq as remque is to insque.
390 */
391ip_deq(p)
eb44bfb2 392 register struct ipasfrag *p;
e1d82856 393{
e6dd2097 394
4ad99bae 395COUNT(IP_DEQ);
eb44bfb2
BJ
396 p->ipf_prev->ipf_next = p->ipf_next;
397 p->ipf_next->ipf_prev = p->ipf_prev;
e1d82856
BJ
398}
399
e6dd2097
BJ
400/*
401 * IP timer processing;
402 * if a timer expires on a reassembly
403 * queue, discard it.
404 */
d52566dd 405ip_slowtimo()
e1d82856
BJ
406{
407 register struct ipq *fp;
e6dd2097 408 int s = splnet();
e1d82856 409
4ad99bae 410COUNT(IP_SLOWTIMO);
4aed14e3
BJ
411 fp = ipq.next;
412 if (fp == 0) {
413 splx(s);
414 return;
415 }
416 while (fp != &ipq)
e6dd2097
BJ
417 if (--fp->ipq_ttl == 0)
418 fp = ip_freef(fp);
419 else
420 fp = fp->next;
e6dd2097 421 splx(s);
e1d82856
BJ
422}
423
4ad99bae
BJ
424/*
425 * Drain off all datagram fragments.
426 */
d52566dd
BJ
427ip_drain()
428{
429
4ad99bae
BJ
430COUNT(IP_DRAIN);
431 while (ipq.next != &ipq)
432 (void) ip_freef(ipq.next);
d52566dd 433}
2b4b57cd 434
e6dd2097
BJ
435/*
436 * Do option processing on a datagram,
437 * possibly discarding it if bad options
438 * are encountered.
439 */
440ip_dooptions(ip)
441 struct ip *ip;
e1d82856 442{
e6dd2097 443 register u_char *cp;
cdad2eb1 444 int opt, optlen, cnt;
2b4b57cd 445 struct in_addr *sin;
d52566dd 446 register struct ip_timestamp *ipt;
4ad99bae
BJ
447 register struct ifnet *ifp;
448 struct in_addr t;
e6dd2097 449
4ad99bae 450COUNT(IP_DOOPTIONS);
e6dd2097
BJ
451 cp = (u_char *)(ip + 1);
452 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
453 for (; cnt > 0; cnt -= optlen, cp += optlen) {
454 opt = cp[0];
455 if (opt == IPOPT_EOL)
456 break;
457 if (opt == IPOPT_NOP)
458 optlen = 1;
459 else
460 optlen = cp[1];
461 switch (opt) {
e1d82856 462
e6dd2097
BJ
463 default:
464 break;
e1d82856 465
4ad99bae
BJ
466 /*
467 * Source routing with record.
468 * Find interface with current destination address.
469 * If none on this machine then drop if strictly routed,
470 * or do nothing if loosely routed.
471 * Record interface address and bring up next address
472 * component. If strictly routed make sure next
473 * address on directly accessible net.
474 */
e6dd2097 475 case IPOPT_LSRR:
d52566dd 476 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
e6dd2097 477 break;
2b4b57cd 478 sin = (struct in_addr *)(cp + cp[2]);
ee787340
SL
479 ipaddr.sin_addr = *sin;
480 ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
4ad99bae
BJ
481 if (ifp == 0) {
482 if (opt == IPOPT_SSRR)
483 goto bad;
484 break;
e6dd2097 485 }
4ad99bae
BJ
486 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
487 cp[2] += 4;
488 if (cp[2] > optlen - (sizeof (long) - 1))
489 break;
490 ip->ip_dst = sin[1];
ee787340
SL
491 if (opt == IPOPT_SSRR &&
492 if_ifonnetof(ip->ip_dst.s_net) == 0)
4ad99bae 493 goto bad;
e6dd2097
BJ
494 break;
495
496 case IPOPT_TS:
d52566dd
BJ
497 ipt = (struct ip_timestamp *)cp;
498 if (ipt->ipt_len < 5)
e6dd2097 499 goto bad;
d52566dd
BJ
500 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
501 if (++ipt->ipt_oflw == 0)
e6dd2097 502 goto bad;
e6dd2097
BJ
503 break;
504 }
2b4b57cd 505 sin = (struct in_addr *)(cp+cp[2]);
d52566dd 506 switch (ipt->ipt_flg) {
e1d82856 507
e6dd2097
BJ
508 case IPOPT_TS_TSONLY:
509 break;
e1d82856 510
e6dd2097 511 case IPOPT_TS_TSANDADDR:
d52566dd 512 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 513 goto bad;
ee787340
SL
514 if (ifinet == 0)
515 goto bad; /* ??? */
516 *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
e6dd2097
BJ
517 break;
518
519 case IPOPT_TS_PRESPEC:
ee787340
SL
520 ipaddr.sin_addr = *sin;
521 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0)
4ad99bae 522 continue;
d52566dd 523 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 524 goto bad;
d52566dd 525 ipt->ipt_ptr += 4;
e1d82856
BJ
526 break;
527
528 default:
e6dd2097 529 goto bad;
e1d82856 530 }
2b4b57cd 531 *(n_time *)sin = iptime();
d52566dd 532 ipt->ipt_ptr += 4;
e6dd2097 533 }
e1d82856 534 }
cdad2eb1 535 return;
e6dd2097
BJ
536bad:
537 /* SHOULD FORCE ICMP MESSAGE */
cdad2eb1 538 return;
e1d82856
BJ
539}
540
e6dd2097 541/*
4ad99bae
BJ
542 * Strip out IP options, at higher
543 * level protocol in the kernel.
544 * Second argument is buffer to which options
545 * will be moved, and return value is their length.
e6dd2097 546 */
7c08c626 547ip_stripoptions(ip, mopt)
e6dd2097 548 struct ip *ip;
7c08c626 549 struct mbuf *mopt;
e1d82856 550{
e6dd2097
BJ
551 register int i;
552 register struct mbuf *m;
e6dd2097 553 int olen;
4ad99bae 554COUNT(IP_STRIPOPTIONS);
e6dd2097
BJ
555
556 olen = (ip->ip_hl<<2) - sizeof (struct ip);
4ad99bae
BJ
557 m = dtom(ip);
558 ip++;
7c08c626
BJ
559 if (mopt) {
560 mopt->m_len = olen;
561 mopt->m_off = MMINOFF;
562 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
563 }
e6dd2097 564 i = m->m_len - (sizeof (struct ip) + olen);
cdad2eb1 565 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
4aed14e3 566 m->m_len -= olen;
e1d82856 567}