interfac queueing, restrictions on packets, tcp keep alives done
[unix-history] / usr / src / sys / netinet / ip_input.c
CommitLineData
1e977657 1/* ip_input.c 1.32 82/03/15 */
6e8b2eca 2
e1d82856 3#include "../h/param.h"
d10bd5b7 4#include "../h/systm.h"
e6dd2097
BJ
5#include "../h/clock.h"
6#include "../h/mbuf.h"
eb44bfb2 7#include "../h/protosw.h"
2b4b57cd 8#include "../h/socket.h"
8a13b737
BJ
9#include "../net/in.h"
10#include "../net/in_systm.h"
4ad99bae 11#include "../net/if.h"
8a13b737 12#include "../net/ip.h" /* belongs before in.h */
eb44bfb2 13#include "../net/ip_var.h"
d52566dd
BJ
14#include "../net/ip_icmp.h"
15#include "../net/tcp.h"
e6dd2097 16
eb44bfb2 17u_char ip_protox[IPPROTO_MAX];
1e977657 18int ipqmaxlen = IFQ_MAXLEN;
eb44bfb2 19
d52566dd 20/*
b454c3ea 21 * IP initialization: fill in IP protocol switch table.
405c9168 22 * All protocols not implemented in kernel go to raw IP protocol handler.
d52566dd
BJ
23 */
24ip_init()
25{
eb44bfb2
BJ
26 register struct protosw *pr;
27 register int i;
eb44bfb2 28
4ad99bae 29COUNT(IP_INIT);
eb44bfb2
BJ
30 pr = pffindproto(PF_INET, IPPROTO_RAW);
31 if (pr == 0)
32 panic("ip_init");
33 for (i = 0; i < IPPROTO_MAX; i++)
34 ip_protox[i] = pr - protosw;
35 for (pr = protosw; pr <= protoswLAST; pr++)
36 if (pr->pr_family == PF_INET &&
37 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
38 ip_protox[pr->pr_protocol] = pr - protosw;
d52566dd
BJ
39 ipq.next = ipq.prev = &ipq;
40 ip_id = time & 0xffff;
1e977657 41 ipintrq.ifq_maxlen = ipqmaxlen;
d52566dd
BJ
42}
43
eb44bfb2 44u_char ipcksum = 1;
e6dd2097
BJ
45struct ip *ip_reass();
46
e6dd2097
BJ
47/*
48 * Ip input routine. Checksum and byte swap header. If fragmented
49 * try to reassamble. If complete and fragment queue exists, discard.
50 * Process options. Pass to next level.
51 */
8a13b737 52ipintr()
e1d82856 53{
2b4b57cd 54 register struct ip *ip;
8a13b737 55 register struct mbuf *m;
7c08c626 56 struct mbuf *m0, *mopt;
e6dd2097 57 register int i;
e1d82856 58 register struct ipq *fp;
8a13b737 59 int hlen, s;
e1d82856 60
8a13b737
BJ
61COUNT(IPINTR);
62next:
e6dd2097 63 /*
8a13b737
BJ
64 * Get next datagram off input queue and get IP header
65 * in first mbuf.
e6dd2097 66 */
8a13b737
BJ
67 s = splimp();
68 IF_DEQUEUE(&ipintrq, m);
69 splx(s);
7ac98d3c 70 if (m == 0)
8a13b737 71 return;
9411b6be
BJ
72 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
73 (m = m_pullup(m, sizeof (struct ip))) == 0)
74 return;
e6dd2097 75 ip = mtod(m, struct ip *);
405c9168 76 if ((hlen = ip->ip_hl << 2) > m->m_len) {
9411b6be
BJ
77 if ((m = m_pullup(m, hlen)) == 0)
78 return;
405c9168
BJ
79 ip = mtod(m, struct ip *);
80 }
4ad99bae 81 if (ipcksum)
7c08c626 82 if (ip->ip_sum = in_cksum(m, hlen)) {
405c9168 83 printf("ip_sum %x\n", ip->ip_sum); /* XXX */
4ad99bae
BJ
84 ipstat.ips_badsum++;
85 goto bad;
e1d82856 86 }
4ad99bae 87
7c08c626 88#if vax
4ad99bae
BJ
89 /*
90 * Convert fields to host representation.
91 */
cdad2eb1 92 ip->ip_len = ntohs((u_short)ip->ip_len);
e6dd2097 93 ip->ip_id = ntohs(ip->ip_id);
4ad99bae 94 ip->ip_off = ntohs((u_short)ip->ip_off);
7c08c626 95#endif
e1d82856 96
d10bd5b7 97 /*
e6dd2097
BJ
98 * Check that the amount of data in the buffers
99 * is as at least much as the IP header would have us expect.
100 * Trim mbufs if longer than we expect.
101 * Drop packet if shorter than we expect.
d10bd5b7 102 */
e6dd2097 103 i = 0;
405c9168 104 m0 = m;
1dd55890
BJ
105 for (; m != NULL; m = m->m_next) {
106 if (m->m_free) panic("ipinput already free");
e1d82856 107 i += m->m_len;
1dd55890 108 }
e6dd2097
BJ
109 m = m0;
110 if (i != ip->ip_len) {
405c9168
BJ
111 if (i < ip->ip_len) {
112 ipstat.ips_tooshort++;
4ad99bae 113 goto bad;
405c9168 114 }
e6dd2097 115 m_adj(m, ip->ip_len - i);
d10bd5b7 116 }
e1d82856 117
e6dd2097
BJ
118 /*
119 * Process options and, if not destined for us,
120 * ship it on.
121 */
122 if (hlen > sizeof (struct ip))
cdad2eb1 123 ip_dooptions(ip);
8a13b737 124 if (ifnet && ip->ip_dst.s_addr != ifnet->if_addr.s_addr &&
92c7b8c1 125 if_ifwithaddr(ip->ip_dst) == 0) {
1e977657
BJ
126
127 goto bad;
128#ifdef notdef
129 printf("ip->ip_dst %x ip->ip_ttl %x\n",
130 ip->ip_dst, ip->ip_ttl);
e6dd2097 131 if (--ip->ip_ttl == 0) {
cdad2eb1 132 icmp_error(ip, ICMP_TIMXCEED, 0);
8a13b737 133 goto next;
e6dd2097 134 }
7c08c626
BJ
135 mopt = m_get(M_DONTWAIT);
136 if (mopt == 0)
137 goto bad;
138 ip_stripoptions(ip, mopt);
1e977657
BJ
139 /* 0 here means no directed broadcast */
140 (void) ip_output(m0, mopt, 0);
8a13b737 141 goto next;
1e977657 142#endif
d10bd5b7 143 }
e1d82856 144
e6dd2097
BJ
145 /*
146 * Look for queue of fragments
147 * of this datagram.
148 */
149 for (fp = ipq.next; fp != &ipq; fp = fp->next)
150 if (ip->ip_id == fp->ipq_id &&
151 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
152 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
153 ip->ip_p == fp->ipq_p)
154 goto found;
155 fp = 0;
156found:
e1d82856 157
e6dd2097
BJ
158 /*
159 * Adjust ip_len to not reflect header,
160 * set ip_mff if more fragments are expected,
161 * convert offset of this to bytes.
162 */
163 ip->ip_len -= hlen;
eb44bfb2 164 ((struct ipasfrag *)ip)->ipf_mff = 0;
e6dd2097 165 if (ip->ip_off & IP_MF)
eb44bfb2 166 ((struct ipasfrag *)ip)->ipf_mff = 1;
e6dd2097 167 ip->ip_off <<= 3;
e1d82856 168
e6dd2097
BJ
169 /*
170 * If datagram marked as having more fragments
171 * or if this is not the first fragment,
172 * attempt reassembly; if it succeeds, proceed.
173 */
eb44bfb2
BJ
174 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
175 ip = ip_reass((struct ipasfrag *)ip, fp);
e6dd2097 176 if (ip == 0)
8a13b737 177 goto next;
e6dd2097
BJ
178 hlen = ip->ip_hl << 2;
179 m = dtom(ip);
180 } else
181 if (fp)
182 (void) ip_freef(fp);
4ad99bae
BJ
183
184 /*
185 * Switch out to protocol's input routine.
186 */
eb44bfb2 187 (*protosw[ip_protox[ip->ip_p]].pr_input)(m);
8a13b737 188 goto next;
4ad99bae
BJ
189bad:
190 m_freem(m);
8a13b737 191 goto next;
e6dd2097 192}
e1d82856 193
e6dd2097
BJ
194/*
195 * Take incoming datagram fragment and try to
4ad99bae 196 * reassemble it into whole datagram. If a chain for
e6dd2097
BJ
197 * reassembly of this datagram already exists, then it
198 * is given as fp; otherwise have to make a chain.
199 */
200struct ip *
201ip_reass(ip, fp)
eb44bfb2 202 register struct ipasfrag *ip;
e6dd2097
BJ
203 register struct ipq *fp;
204{
205 register struct mbuf *m = dtom(ip);
eb44bfb2 206 register struct ipasfrag *q;
e6dd2097
BJ
207 struct mbuf *t;
208 int hlen = ip->ip_hl << 2;
209 int i, next;
4ad99bae 210COUNT(IP_REASS);
d10bd5b7 211
e6dd2097
BJ
212 /*
213 * Presence of header sizes in mbufs
214 * would confuse code below.
215 */
216 m->m_off += hlen;
217 m->m_len -= hlen;
d10bd5b7 218
e6dd2097
BJ
219 /*
220 * If first fragment to arrive, create a reassembly queue.
221 */
222 if (fp == 0) {
e6b33a03 223 if ((t = m_get(M_WAIT)) == NULL)
e6dd2097
BJ
224 goto dropfrag;
225 t->m_off = MMINOFF;
226 fp = mtod(t, struct ipq *);
227 insque(fp, &ipq);
228 fp->ipq_ttl = IPFRAGTTL;
229 fp->ipq_p = ip->ip_p;
230 fp->ipq_id = ip->ip_id;
eb44bfb2
BJ
231 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
232 fp->ipq_src = ((struct ip *)ip)->ip_src;
233 fp->ipq_dst = ((struct ip *)ip)->ip_dst;
405c9168
BJ
234 q = (struct ipasfrag *)fp;
235 goto insert;
e6dd2097 236 }
e1d82856 237
e6dd2097
BJ
238 /*
239 * Find a segment which begins after this one does.
240 */
eb44bfb2 241 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
242 if (q->ip_off > ip->ip_off)
243 break;
e1d82856 244
e6dd2097
BJ
245 /*
246 * If there is a preceding segment, it may provide some of
247 * our data already. If so, drop the data from the incoming
248 * segment. If it provides all of our data, drop us.
249 */
eb44bfb2
BJ
250 if (q->ipf_prev != (struct ipasfrag *)fp) {
251 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
e6dd2097
BJ
252 if (i > 0) {
253 if (i >= ip->ip_len)
254 goto dropfrag;
255 m_adj(dtom(ip), i);
256 ip->ip_off += i;
257 ip->ip_len -= i;
e1d82856 258 }
d10bd5b7 259 }
e1d82856 260
e6dd2097
BJ
261 /*
262 * While we overlap succeeding segments trim them or,
263 * if they are completely covered, dequeue them.
264 */
eb44bfb2 265 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
e6dd2097
BJ
266 i = (ip->ip_off + ip->ip_len) - q->ip_off;
267 if (i < q->ip_len) {
268 q->ip_len -= i;
269 m_adj(dtom(q), i);
270 break;
271 }
eb44bfb2
BJ
272 q = q->ipf_next;
273 m_freem(dtom(q->ipf_prev));
274 ip_deq(q->ipf_prev);
e6dd2097 275 }
e1d82856 276
405c9168 277insert:
e6dd2097
BJ
278 /*
279 * Stick new segment in its place;
280 * check for complete reassembly.
281 */
eb44bfb2 282 ip_enq(ip, q->ipf_prev);
e6dd2097 283 next = 0;
eb44bfb2 284 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
e6dd2097
BJ
285 if (q->ip_off != next)
286 return (0);
287 next += q->ip_len;
288 }
eb44bfb2 289 if (q->ipf_prev->ipf_mff)
e6dd2097 290 return (0);
e1d82856 291
e6dd2097
BJ
292 /*
293 * Reassembly is complete; concatenate fragments.
294 */
295 q = fp->ipq_next;
296 m = dtom(q);
297 t = m->m_next;
298 m->m_next = 0;
299 m_cat(m, t);
eb44bfb2 300 while ((q = q->ipf_next) != (struct ipasfrag *)fp)
e6dd2097 301 m_cat(m, dtom(q));
e1d82856 302
e6dd2097
BJ
303 /*
304 * Create header for new ip packet by
305 * modifying header of first packet;
306 * dequeue and discard fragment reassembly header.
307 * Make header visible.
308 */
309 ip = fp->ipq_next;
310 ip->ip_len = next;
eb44bfb2
BJ
311 ((struct ip *)ip)->ip_src = fp->ipq_src;
312 ((struct ip *)ip)->ip_dst = fp->ipq_dst;
e6dd2097 313 remque(fp);
cdad2eb1 314 (void) m_free(dtom(fp));
e6dd2097 315 m = dtom(ip);
eb44bfb2
BJ
316 m->m_len += sizeof (struct ipasfrag);
317 m->m_off -= sizeof (struct ipasfrag);
318 return ((struct ip *)ip);
e6dd2097
BJ
319
320dropfrag:
321 m_freem(m);
322 return (0);
e1d82856
BJ
323}
324
e6dd2097
BJ
325/*
326 * Free a fragment reassembly header and all
327 * associated datagrams.
328 */
329struct ipq *
330ip_freef(fp)
331 struct ipq *fp;
e1d82856 332{
eb44bfb2 333 register struct ipasfrag *q;
e6dd2097 334 struct mbuf *m;
4ad99bae 335COUNT(IP_FREEF);
e6dd2097 336
eb44bfb2 337 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
338 m_freem(dtom(q));
339 m = dtom(fp);
340 fp = fp->next;
341 remque(fp->prev);
cdad2eb1 342 (void) m_free(m);
e6dd2097 343 return (fp);
e1d82856
BJ
344}
345
e6dd2097
BJ
346/*
347 * Put an ip fragment on a reassembly chain.
348 * Like insque, but pointers in middle of structure.
349 */
350ip_enq(p, prev)
eb44bfb2 351 register struct ipasfrag *p, *prev;
e1d82856 352{
e1d82856 353
4ad99bae 354COUNT(IP_ENQ);
eb44bfb2
BJ
355 p->ipf_prev = prev;
356 p->ipf_next = prev->ipf_next;
357 prev->ipf_next->ipf_prev = p;
358 prev->ipf_next = p;
e1d82856
BJ
359}
360
e6dd2097
BJ
361/*
362 * To ip_enq as remque is to insque.
363 */
364ip_deq(p)
eb44bfb2 365 register struct ipasfrag *p;
e1d82856 366{
e6dd2097 367
4ad99bae 368COUNT(IP_DEQ);
eb44bfb2
BJ
369 p->ipf_prev->ipf_next = p->ipf_next;
370 p->ipf_next->ipf_prev = p->ipf_prev;
e1d82856
BJ
371}
372
e6dd2097
BJ
373/*
374 * IP timer processing;
375 * if a timer expires on a reassembly
376 * queue, discard it.
377 */
d52566dd 378ip_slowtimo()
e1d82856
BJ
379{
380 register struct ipq *fp;
e6dd2097 381 int s = splnet();
e1d82856 382
4ad99bae 383COUNT(IP_SLOWTIMO);
4aed14e3
BJ
384 fp = ipq.next;
385 if (fp == 0) {
386 splx(s);
387 return;
388 }
389 while (fp != &ipq)
e6dd2097
BJ
390 if (--fp->ipq_ttl == 0)
391 fp = ip_freef(fp);
392 else
393 fp = fp->next;
e6dd2097 394 splx(s);
e1d82856
BJ
395}
396
4ad99bae
BJ
397/*
398 * Drain off all datagram fragments.
399 */
d52566dd
BJ
400ip_drain()
401{
402
4ad99bae
BJ
403COUNT(IP_DRAIN);
404 while (ipq.next != &ipq)
405 (void) ip_freef(ipq.next);
d52566dd 406}
2b4b57cd 407
e6dd2097
BJ
408/*
409 * Do option processing on a datagram,
410 * possibly discarding it if bad options
411 * are encountered.
412 */
413ip_dooptions(ip)
414 struct ip *ip;
e1d82856 415{
e6dd2097 416 register u_char *cp;
cdad2eb1 417 int opt, optlen, cnt;
2b4b57cd 418 struct in_addr *sin;
d52566dd 419 register struct ip_timestamp *ipt;
4ad99bae
BJ
420 register struct ifnet *ifp;
421 struct in_addr t;
e6dd2097 422
4ad99bae 423COUNT(IP_DOOPTIONS);
e6dd2097
BJ
424 cp = (u_char *)(ip + 1);
425 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
426 for (; cnt > 0; cnt -= optlen, cp += optlen) {
427 opt = cp[0];
428 if (opt == IPOPT_EOL)
429 break;
430 if (opt == IPOPT_NOP)
431 optlen = 1;
432 else
433 optlen = cp[1];
434 switch (opt) {
e1d82856 435
e6dd2097
BJ
436 default:
437 break;
e1d82856 438
4ad99bae
BJ
439 /*
440 * Source routing with record.
441 * Find interface with current destination address.
442 * If none on this machine then drop if strictly routed,
443 * or do nothing if loosely routed.
444 * Record interface address and bring up next address
445 * component. If strictly routed make sure next
446 * address on directly accessible net.
447 */
e6dd2097 448 case IPOPT_LSRR:
d52566dd 449 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
e6dd2097 450 break;
2b4b57cd 451 sin = (struct in_addr *)(cp + cp[2]);
4ad99bae
BJ
452 ifp = if_ifwithaddr(*sin);
453 if (ifp == 0) {
454 if (opt == IPOPT_SSRR)
455 goto bad;
456 break;
e6dd2097 457 }
4ad99bae
BJ
458 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
459 cp[2] += 4;
460 if (cp[2] > optlen - (sizeof (long) - 1))
461 break;
462 ip->ip_dst = sin[1];
463 if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
464 goto bad;
e6dd2097
BJ
465 break;
466
467 case IPOPT_TS:
d52566dd
BJ
468 ipt = (struct ip_timestamp *)cp;
469 if (ipt->ipt_len < 5)
e6dd2097 470 goto bad;
d52566dd
BJ
471 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
472 if (++ipt->ipt_oflw == 0)
e6dd2097 473 goto bad;
e6dd2097
BJ
474 break;
475 }
2b4b57cd 476 sin = (struct in_addr *)(cp+cp[2]);
d52566dd 477 switch (ipt->ipt_flg) {
e1d82856 478
e6dd2097
BJ
479 case IPOPT_TS_TSONLY:
480 break;
e1d82856 481
e6dd2097 482 case IPOPT_TS_TSANDADDR:
d52566dd 483 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 484 goto bad;
4ad99bae
BJ
485 /* stamp with ``first'' interface address */
486 *sin++ = ifnet->if_addr;
e6dd2097
BJ
487 break;
488
489 case IPOPT_TS_PRESPEC:
4ad99bae
BJ
490 if (if_ifwithaddr(*sin) == 0)
491 continue;
d52566dd 492 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 493 goto bad;
d52566dd 494 ipt->ipt_ptr += 4;
e1d82856
BJ
495 break;
496
497 default:
e6dd2097 498 goto bad;
e1d82856 499 }
2b4b57cd 500 *(n_time *)sin = iptime();
d52566dd 501 ipt->ipt_ptr += 4;
e6dd2097 502 }
e1d82856 503 }
cdad2eb1 504 return;
e6dd2097
BJ
505bad:
506 /* SHOULD FORCE ICMP MESSAGE */
cdad2eb1 507 return;
e1d82856
BJ
508}
509
e6dd2097 510/*
4ad99bae
BJ
511 * Strip out IP options, at higher
512 * level protocol in the kernel.
513 * Second argument is buffer to which options
514 * will be moved, and return value is their length.
e6dd2097 515 */
7c08c626 516ip_stripoptions(ip, mopt)
e6dd2097 517 struct ip *ip;
7c08c626 518 struct mbuf *mopt;
e1d82856 519{
e6dd2097
BJ
520 register int i;
521 register struct mbuf *m;
e6dd2097 522 int olen;
4ad99bae 523COUNT(IP_STRIPOPTIONS);
e6dd2097
BJ
524
525 olen = (ip->ip_hl<<2) - sizeof (struct ip);
4ad99bae
BJ
526 m = dtom(ip);
527 ip++;
7c08c626
BJ
528 if (mopt) {
529 mopt->m_len = olen;
530 mopt->m_off = MMINOFF;
531 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
532 }
e6dd2097 533 i = m->m_len - (sizeof (struct ip) + olen);
cdad2eb1 534 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
4aed14e3 535 m->m_len -= olen;
e1d82856 536}