interfac queueing, restrictions on packets, tcp keep alives done
[unix-history] / usr / src / sys / netinet / ip_input.c
... / ...
CommitLineData
1/* ip_input.c 1.32 82/03/15 */
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/clock.h"
6#include "../h/mbuf.h"
7#include "../h/protosw.h"
8#include "../h/socket.h"
9#include "../net/in.h"
10#include "../net/in_systm.h"
11#include "../net/if.h"
12#include "../net/ip.h" /* belongs before in.h */
13#include "../net/ip_var.h"
14#include "../net/ip_icmp.h"
15#include "../net/tcp.h"
16
17u_char ip_protox[IPPROTO_MAX];
18int ipqmaxlen = IFQ_MAXLEN;
19
20/*
21 * IP initialization: fill in IP protocol switch table.
22 * All protocols not implemented in kernel go to raw IP protocol handler.
23 */
24ip_init()
25{
26 register struct protosw *pr;
27 register int i;
28
29COUNT(IP_INIT);
30 pr = pffindproto(PF_INET, IPPROTO_RAW);
31 if (pr == 0)
32 panic("ip_init");
33 for (i = 0; i < IPPROTO_MAX; i++)
34 ip_protox[i] = pr - protosw;
35 for (pr = protosw; pr <= protoswLAST; pr++)
36 if (pr->pr_family == PF_INET &&
37 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
38 ip_protox[pr->pr_protocol] = pr - protosw;
39 ipq.next = ipq.prev = &ipq;
40 ip_id = time & 0xffff;
41 ipintrq.ifq_maxlen = ipqmaxlen;
42}
43
44u_char ipcksum = 1;
45struct ip *ip_reass();
46
47/*
48 * Ip input routine. Checksum and byte swap header. If fragmented
49 * try to reassamble. If complete and fragment queue exists, discard.
50 * Process options. Pass to next level.
51 */
52ipintr()
53{
54 register struct ip *ip;
55 register struct mbuf *m;
56 struct mbuf *m0, *mopt;
57 register int i;
58 register struct ipq *fp;
59 int hlen, s;
60
61COUNT(IPINTR);
62next:
63 /*
64 * Get next datagram off input queue and get IP header
65 * in first mbuf.
66 */
67 s = splimp();
68 IF_DEQUEUE(&ipintrq, m);
69 splx(s);
70 if (m == 0)
71 return;
72 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
73 (m = m_pullup(m, sizeof (struct ip))) == 0)
74 return;
75 ip = mtod(m, struct ip *);
76 if ((hlen = ip->ip_hl << 2) > m->m_len) {
77 if ((m = m_pullup(m, hlen)) == 0)
78 return;
79 ip = mtod(m, struct ip *);
80 }
81 if (ipcksum)
82 if (ip->ip_sum = in_cksum(m, hlen)) {
83 printf("ip_sum %x\n", ip->ip_sum); /* XXX */
84 ipstat.ips_badsum++;
85 goto bad;
86 }
87
88#if vax
89 /*
90 * Convert fields to host representation.
91 */
92 ip->ip_len = ntohs((u_short)ip->ip_len);
93 ip->ip_id = ntohs(ip->ip_id);
94 ip->ip_off = ntohs((u_short)ip->ip_off);
95#endif
96
97 /*
98 * Check that the amount of data in the buffers
99 * is as at least much as the IP header would have us expect.
100 * Trim mbufs if longer than we expect.
101 * Drop packet if shorter than we expect.
102 */
103 i = 0;
104 m0 = m;
105 for (; m != NULL; m = m->m_next) {
106 if (m->m_free) panic("ipinput already free");
107 i += m->m_len;
108 }
109 m = m0;
110 if (i != ip->ip_len) {
111 if (i < ip->ip_len) {
112 ipstat.ips_tooshort++;
113 goto bad;
114 }
115 m_adj(m, ip->ip_len - i);
116 }
117
118 /*
119 * Process options and, if not destined for us,
120 * ship it on.
121 */
122 if (hlen > sizeof (struct ip))
123 ip_dooptions(ip);
124 if (ifnet && ip->ip_dst.s_addr != ifnet->if_addr.s_addr &&
125 if_ifwithaddr(ip->ip_dst) == 0) {
126
127 goto bad;
128#ifdef notdef
129 printf("ip->ip_dst %x ip->ip_ttl %x\n",
130 ip->ip_dst, ip->ip_ttl);
131 if (--ip->ip_ttl == 0) {
132 icmp_error(ip, ICMP_TIMXCEED, 0);
133 goto next;
134 }
135 mopt = m_get(M_DONTWAIT);
136 if (mopt == 0)
137 goto bad;
138 ip_stripoptions(ip, mopt);
139 /* 0 here means no directed broadcast */
140 (void) ip_output(m0, mopt, 0);
141 goto next;
142#endif
143 }
144
145 /*
146 * Look for queue of fragments
147 * of this datagram.
148 */
149 for (fp = ipq.next; fp != &ipq; fp = fp->next)
150 if (ip->ip_id == fp->ipq_id &&
151 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
152 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
153 ip->ip_p == fp->ipq_p)
154 goto found;
155 fp = 0;
156found:
157
158 /*
159 * Adjust ip_len to not reflect header,
160 * set ip_mff if more fragments are expected,
161 * convert offset of this to bytes.
162 */
163 ip->ip_len -= hlen;
164 ((struct ipasfrag *)ip)->ipf_mff = 0;
165 if (ip->ip_off & IP_MF)
166 ((struct ipasfrag *)ip)->ipf_mff = 1;
167 ip->ip_off <<= 3;
168
169 /*
170 * If datagram marked as having more fragments
171 * or if this is not the first fragment,
172 * attempt reassembly; if it succeeds, proceed.
173 */
174 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
175 ip = ip_reass((struct ipasfrag *)ip, fp);
176 if (ip == 0)
177 goto next;
178 hlen = ip->ip_hl << 2;
179 m = dtom(ip);
180 } else
181 if (fp)
182 (void) ip_freef(fp);
183
184 /*
185 * Switch out to protocol's input routine.
186 */
187 (*protosw[ip_protox[ip->ip_p]].pr_input)(m);
188 goto next;
189bad:
190 m_freem(m);
191 goto next;
192}
193
194/*
195 * Take incoming datagram fragment and try to
196 * reassemble it into whole datagram. If a chain for
197 * reassembly of this datagram already exists, then it
198 * is given as fp; otherwise have to make a chain.
199 */
200struct ip *
201ip_reass(ip, fp)
202 register struct ipasfrag *ip;
203 register struct ipq *fp;
204{
205 register struct mbuf *m = dtom(ip);
206 register struct ipasfrag *q;
207 struct mbuf *t;
208 int hlen = ip->ip_hl << 2;
209 int i, next;
210COUNT(IP_REASS);
211
212 /*
213 * Presence of header sizes in mbufs
214 * would confuse code below.
215 */
216 m->m_off += hlen;
217 m->m_len -= hlen;
218
219 /*
220 * If first fragment to arrive, create a reassembly queue.
221 */
222 if (fp == 0) {
223 if ((t = m_get(M_WAIT)) == NULL)
224 goto dropfrag;
225 t->m_off = MMINOFF;
226 fp = mtod(t, struct ipq *);
227 insque(fp, &ipq);
228 fp->ipq_ttl = IPFRAGTTL;
229 fp->ipq_p = ip->ip_p;
230 fp->ipq_id = ip->ip_id;
231 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
232 fp->ipq_src = ((struct ip *)ip)->ip_src;
233 fp->ipq_dst = ((struct ip *)ip)->ip_dst;
234 q = (struct ipasfrag *)fp;
235 goto insert;
236 }
237
238 /*
239 * Find a segment which begins after this one does.
240 */
241 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
242 if (q->ip_off > ip->ip_off)
243 break;
244
245 /*
246 * If there is a preceding segment, it may provide some of
247 * our data already. If so, drop the data from the incoming
248 * segment. If it provides all of our data, drop us.
249 */
250 if (q->ipf_prev != (struct ipasfrag *)fp) {
251 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
252 if (i > 0) {
253 if (i >= ip->ip_len)
254 goto dropfrag;
255 m_adj(dtom(ip), i);
256 ip->ip_off += i;
257 ip->ip_len -= i;
258 }
259 }
260
261 /*
262 * While we overlap succeeding segments trim them or,
263 * if they are completely covered, dequeue them.
264 */
265 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
266 i = (ip->ip_off + ip->ip_len) - q->ip_off;
267 if (i < q->ip_len) {
268 q->ip_len -= i;
269 m_adj(dtom(q), i);
270 break;
271 }
272 q = q->ipf_next;
273 m_freem(dtom(q->ipf_prev));
274 ip_deq(q->ipf_prev);
275 }
276
277insert:
278 /*
279 * Stick new segment in its place;
280 * check for complete reassembly.
281 */
282 ip_enq(ip, q->ipf_prev);
283 next = 0;
284 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
285 if (q->ip_off != next)
286 return (0);
287 next += q->ip_len;
288 }
289 if (q->ipf_prev->ipf_mff)
290 return (0);
291
292 /*
293 * Reassembly is complete; concatenate fragments.
294 */
295 q = fp->ipq_next;
296 m = dtom(q);
297 t = m->m_next;
298 m->m_next = 0;
299 m_cat(m, t);
300 while ((q = q->ipf_next) != (struct ipasfrag *)fp)
301 m_cat(m, dtom(q));
302
303 /*
304 * Create header for new ip packet by
305 * modifying header of first packet;
306 * dequeue and discard fragment reassembly header.
307 * Make header visible.
308 */
309 ip = fp->ipq_next;
310 ip->ip_len = next;
311 ((struct ip *)ip)->ip_src = fp->ipq_src;
312 ((struct ip *)ip)->ip_dst = fp->ipq_dst;
313 remque(fp);
314 (void) m_free(dtom(fp));
315 m = dtom(ip);
316 m->m_len += sizeof (struct ipasfrag);
317 m->m_off -= sizeof (struct ipasfrag);
318 return ((struct ip *)ip);
319
320dropfrag:
321 m_freem(m);
322 return (0);
323}
324
325/*
326 * Free a fragment reassembly header and all
327 * associated datagrams.
328 */
329struct ipq *
330ip_freef(fp)
331 struct ipq *fp;
332{
333 register struct ipasfrag *q;
334 struct mbuf *m;
335COUNT(IP_FREEF);
336
337 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
338 m_freem(dtom(q));
339 m = dtom(fp);
340 fp = fp->next;
341 remque(fp->prev);
342 (void) m_free(m);
343 return (fp);
344}
345
346/*
347 * Put an ip fragment on a reassembly chain.
348 * Like insque, but pointers in middle of structure.
349 */
350ip_enq(p, prev)
351 register struct ipasfrag *p, *prev;
352{
353
354COUNT(IP_ENQ);
355 p->ipf_prev = prev;
356 p->ipf_next = prev->ipf_next;
357 prev->ipf_next->ipf_prev = p;
358 prev->ipf_next = p;
359}
360
361/*
362 * To ip_enq as remque is to insque.
363 */
364ip_deq(p)
365 register struct ipasfrag *p;
366{
367
368COUNT(IP_DEQ);
369 p->ipf_prev->ipf_next = p->ipf_next;
370 p->ipf_next->ipf_prev = p->ipf_prev;
371}
372
373/*
374 * IP timer processing;
375 * if a timer expires on a reassembly
376 * queue, discard it.
377 */
378ip_slowtimo()
379{
380 register struct ipq *fp;
381 int s = splnet();
382
383COUNT(IP_SLOWTIMO);
384 fp = ipq.next;
385 if (fp == 0) {
386 splx(s);
387 return;
388 }
389 while (fp != &ipq)
390 if (--fp->ipq_ttl == 0)
391 fp = ip_freef(fp);
392 else
393 fp = fp->next;
394 splx(s);
395}
396
397/*
398 * Drain off all datagram fragments.
399 */
400ip_drain()
401{
402
403COUNT(IP_DRAIN);
404 while (ipq.next != &ipq)
405 (void) ip_freef(ipq.next);
406}
407
408/*
409 * Do option processing on a datagram,
410 * possibly discarding it if bad options
411 * are encountered.
412 */
413ip_dooptions(ip)
414 struct ip *ip;
415{
416 register u_char *cp;
417 int opt, optlen, cnt;
418 struct in_addr *sin;
419 register struct ip_timestamp *ipt;
420 register struct ifnet *ifp;
421 struct in_addr t;
422
423COUNT(IP_DOOPTIONS);
424 cp = (u_char *)(ip + 1);
425 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
426 for (; cnt > 0; cnt -= optlen, cp += optlen) {
427 opt = cp[0];
428 if (opt == IPOPT_EOL)
429 break;
430 if (opt == IPOPT_NOP)
431 optlen = 1;
432 else
433 optlen = cp[1];
434 switch (opt) {
435
436 default:
437 break;
438
439 /*
440 * Source routing with record.
441 * Find interface with current destination address.
442 * If none on this machine then drop if strictly routed,
443 * or do nothing if loosely routed.
444 * Record interface address and bring up next address
445 * component. If strictly routed make sure next
446 * address on directly accessible net.
447 */
448 case IPOPT_LSRR:
449 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
450 break;
451 sin = (struct in_addr *)(cp + cp[2]);
452 ifp = if_ifwithaddr(*sin);
453 if (ifp == 0) {
454 if (opt == IPOPT_SSRR)
455 goto bad;
456 break;
457 }
458 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
459 cp[2] += 4;
460 if (cp[2] > optlen - (sizeof (long) - 1))
461 break;
462 ip->ip_dst = sin[1];
463 if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
464 goto bad;
465 break;
466
467 case IPOPT_TS:
468 ipt = (struct ip_timestamp *)cp;
469 if (ipt->ipt_len < 5)
470 goto bad;
471 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
472 if (++ipt->ipt_oflw == 0)
473 goto bad;
474 break;
475 }
476 sin = (struct in_addr *)(cp+cp[2]);
477 switch (ipt->ipt_flg) {
478
479 case IPOPT_TS_TSONLY:
480 break;
481
482 case IPOPT_TS_TSANDADDR:
483 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
484 goto bad;
485 /* stamp with ``first'' interface address */
486 *sin++ = ifnet->if_addr;
487 break;
488
489 case IPOPT_TS_PRESPEC:
490 if (if_ifwithaddr(*sin) == 0)
491 continue;
492 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
493 goto bad;
494 ipt->ipt_ptr += 4;
495 break;
496
497 default:
498 goto bad;
499 }
500 *(n_time *)sin = iptime();
501 ipt->ipt_ptr += 4;
502 }
503 }
504 return;
505bad:
506 /* SHOULD FORCE ICMP MESSAGE */
507 return;
508}
509
510/*
511 * Strip out IP options, at higher
512 * level protocol in the kernel.
513 * Second argument is buffer to which options
514 * will be moved, and return value is their length.
515 */
516ip_stripoptions(ip, mopt)
517 struct ip *ip;
518 struct mbuf *mopt;
519{
520 register int i;
521 register struct mbuf *m;
522 int olen;
523COUNT(IP_STRIPOPTIONS);
524
525 olen = (ip->ip_hl<<2) - sizeof (struct ip);
526 m = dtom(ip);
527 ip++;
528 if (mopt) {
529 mopt->m_len = olen;
530 mopt->m_off = MMINOFF;
531 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
532 }
533 i = m->m_len - (sizeof (struct ip) + olen);
534 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
535 m->m_len -= olen;
536}