insert SBCHECK calls looking for panic:receive problem
[unix-history] / usr / src / sys / netinet / ip_input.c
... / ...
CommitLineData
1/* ip_input.c 1.51 82/10/09 */
2
3#include "../h/param.h"
4#include "../h/systm.h"
5#include "../h/mbuf.h"
6#include "../h/protosw.h"
7#include "../h/socket.h"
8#include "../netinet/in.h"
9#include "../netinet/in_systm.h"
10#include "../net/if.h"
11#include "../netinet/ip.h" /* belongs before in.h */
12#include "../netinet/ip_var.h"
13#include "../netinet/ip_icmp.h"
14#include "../netinet/tcp.h"
15#include <time.h>
16#include "../h/kernel.h"
17#include <errno.h>
18
19u_char ip_protox[IPPROTO_MAX];
20int ipqmaxlen = IFQ_MAXLEN;
21struct ifnet *ifinet; /* first inet interface */
22
23/*
24 * IP initialization: fill in IP protocol switch table.
25 * All protocols not implemented in kernel go to raw IP protocol handler.
26 */
27ip_init()
28{
29 register struct protosw *pr;
30 register int i;
31
32 pr = pffindproto(PF_INET, IPPROTO_RAW);
33 if (pr == 0)
34 panic("ip_init");
35 for (i = 0; i < IPPROTO_MAX; i++)
36 ip_protox[i] = pr - protosw;
37 for (pr = protosw; pr <= protoswLAST; pr++)
38 if (pr->pr_family == PF_INET &&
39 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
40 ip_protox[pr->pr_protocol] = pr - protosw;
41 ipq.next = ipq.prev = &ipq;
42 ip_id = time.tv_sec & 0xffff;
43 ipintrq.ifq_maxlen = ipqmaxlen;
44 ifinet = if_ifwithaf(AF_INET);
45}
46
47u_char ipcksum = 1;
48struct ip *ip_reass();
49struct sockaddr_in ipaddr = { AF_INET };
50
51/*
52 * Ip input routine. Checksum and byte swap header. If fragmented
53 * try to reassamble. If complete and fragment queue exists, discard.
54 * Process options. Pass to next level.
55 */
56ipintr()
57{
58 register struct ip *ip;
59 register struct mbuf *m;
60 struct mbuf *m0, *mopt;
61 register int i;
62 register struct ipq *fp;
63 int hlen, s;
64
65next:
66 /*
67 * Get next datagram off input queue and get IP header
68 * in first mbuf.
69 */
70 s = splimp();
71 IF_DEQUEUE(&ipintrq, m);
72 splx(s);
73 if (m == 0)
74 return;
75 if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
76 (m = m_pullup(m, sizeof (struct ip))) == 0)
77 return;
78 ip = mtod(m, struct ip *);
79 if ((hlen = ip->ip_hl << 2) > m->m_len) {
80 if ((m = m_pullup(m, hlen)) == 0)
81 return;
82 ip = mtod(m, struct ip *);
83 }
84 if (ipcksum)
85 if (ip->ip_sum = in_cksum(m, hlen)) {
86 printf("ip_sum %x\n", ip->ip_sum); /* XXX */
87 ipstat.ips_badsum++;
88 goto bad;
89 }
90
91#if vax
92 /*
93 * Convert fields to host representation.
94 */
95 ip->ip_len = ntohs((u_short)ip->ip_len);
96 ip->ip_id = ntohs(ip->ip_id);
97 ip->ip_off = ntohs((u_short)ip->ip_off);
98#endif
99
100 /*
101 * Check that the amount of data in the buffers
102 * is as at least much as the IP header would have us expect.
103 * Trim mbufs if longer than we expect.
104 * Drop packet if shorter than we expect.
105 */
106 i = -ip->ip_len;
107 m0 = m;
108 for (;;) {
109 i += m->m_len;
110 if (m->m_next == 0)
111 break;
112 m = m->m_next;
113 }
114 if (i != 0) {
115 if (i < 0) {
116 ipstat.ips_tooshort++;
117 goto bad;
118 }
119 if (i <= m->m_len)
120 m->m_len -= i;
121 else
122 m_adj(m0, -i);
123 }
124 m = m0;
125
126 /*
127 * Process options and, if not destined for us,
128 * ship it on. ip_dooptions returns 1 when an
129 * error was detected (causing an icmp message
130 * to be sent).
131 */
132 if (hlen > sizeof (struct ip) && ip_dooptions(ip))
133 goto next;
134
135 /*
136 * Fast check on the first internet
137 * interface in the list.
138 */
139 if (ifinet) {
140 struct sockaddr_in *sin;
141
142 sin = (struct sockaddr_in *)&ifinet->if_addr;
143 if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
144 goto ours;
145 sin = (struct sockaddr_in *)&ifinet->if_broadaddr;
146 if ((ifinet->if_flags & IFF_BROADCAST) &&
147 sin->sin_addr.s_addr == ip->ip_dst.s_addr)
148 goto ours;
149 }
150 ipaddr.sin_addr = ip->ip_dst;
151 if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
152 ip_forward(ip);
153 goto next;
154 }
155
156ours:
157 /*
158 * Look for queue of fragments
159 * of this datagram.
160 */
161 for (fp = ipq.next; fp != &ipq; fp = fp->next)
162 if (ip->ip_id == fp->ipq_id &&
163 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
164 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
165 ip->ip_p == fp->ipq_p)
166 goto found;
167 fp = 0;
168found:
169
170 /*
171 * Adjust ip_len to not reflect header,
172 * set ip_mff if more fragments are expected,
173 * convert offset of this to bytes.
174 */
175 ip->ip_len -= hlen;
176 ((struct ipasfrag *)ip)->ipf_mff = 0;
177 if (ip->ip_off & IP_MF)
178 ((struct ipasfrag *)ip)->ipf_mff = 1;
179 ip->ip_off <<= 3;
180
181 /*
182 * If datagram marked as having more fragments
183 * or if this is not the first fragment,
184 * attempt reassembly; if it succeeds, proceed.
185 */
186 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
187 ip = ip_reass((struct ipasfrag *)ip, fp);
188 if (ip == 0)
189 goto next;
190 hlen = ip->ip_hl << 2;
191 m = dtom(ip);
192 } else
193 if (fp)
194 (void) ip_freef(fp);
195
196 /*
197 * Switch out to protocol's input routine.
198 */
199 (*protosw[ip_protox[ip->ip_p]].pr_input)(m);
200 goto next;
201bad:
202 m_freem(m);
203 goto next;
204}
205
206/*
207 * Take incoming datagram fragment and try to
208 * reassemble it into whole datagram. If a chain for
209 * reassembly of this datagram already exists, then it
210 * is given as fp; otherwise have to make a chain.
211 */
212struct ip *
213ip_reass(ip, fp)
214 register struct ipasfrag *ip;
215 register struct ipq *fp;
216{
217 register struct mbuf *m = dtom(ip);
218 register struct ipasfrag *q;
219 struct mbuf *t;
220 int hlen = ip->ip_hl << 2;
221 int i, next;
222
223 /*
224 * Presence of header sizes in mbufs
225 * would confuse code below.
226 */
227 m->m_off += hlen;
228 m->m_len -= hlen;
229
230 /*
231 * If first fragment to arrive, create a reassembly queue.
232 */
233 if (fp == 0) {
234 if ((t = m_get(M_WAIT)) == NULL)
235 goto dropfrag;
236 fp = mtod(t, struct ipq *);
237 insque(fp, &ipq);
238 fp->ipq_ttl = IPFRAGTTL;
239 fp->ipq_p = ip->ip_p;
240 fp->ipq_id = ip->ip_id;
241 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
242 fp->ipq_src = ((struct ip *)ip)->ip_src;
243 fp->ipq_dst = ((struct ip *)ip)->ip_dst;
244 q = (struct ipasfrag *)fp;
245 goto insert;
246 }
247
248 /*
249 * Find a segment which begins after this one does.
250 */
251 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
252 if (q->ip_off > ip->ip_off)
253 break;
254
255 /*
256 * If there is a preceding segment, it may provide some of
257 * our data already. If so, drop the data from the incoming
258 * segment. If it provides all of our data, drop us.
259 */
260 if (q->ipf_prev != (struct ipasfrag *)fp) {
261 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
262 if (i > 0) {
263 if (i >= ip->ip_len)
264 goto dropfrag;
265 m_adj(dtom(ip), i);
266 ip->ip_off += i;
267 ip->ip_len -= i;
268 }
269 }
270
271 /*
272 * While we overlap succeeding segments trim them or,
273 * if they are completely covered, dequeue them.
274 */
275 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
276 i = (ip->ip_off + ip->ip_len) - q->ip_off;
277 if (i < q->ip_len) {
278 q->ip_len -= i;
279 q->ip_off += i;
280 m_adj(dtom(q), i);
281 break;
282 }
283 q = q->ipf_next;
284 m_freem(dtom(q->ipf_prev));
285 ip_deq(q->ipf_prev);
286 }
287
288insert:
289 /*
290 * Stick new segment in its place;
291 * check for complete reassembly.
292 */
293 ip_enq(ip, q->ipf_prev);
294 next = 0;
295 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
296 if (q->ip_off != next)
297 return (0);
298 next += q->ip_len;
299 }
300 if (q->ipf_prev->ipf_mff)
301 return (0);
302
303 /*
304 * Reassembly is complete; concatenate fragments.
305 */
306 q = fp->ipq_next;
307 m = dtom(q);
308 t = m->m_next;
309 m->m_next = 0;
310 m_cat(m, t);
311 q = q->ipf_next;
312 while (q != (struct ipasfrag *)fp) {
313 t = dtom(q);
314 q = q->ipf_next;
315 m_cat(m, t);
316 }
317
318 /*
319 * Create header for new ip packet by
320 * modifying header of first packet;
321 * dequeue and discard fragment reassembly header.
322 * Make header visible.
323 */
324 ip = fp->ipq_next;
325 ip->ip_len = next;
326 ((struct ip *)ip)->ip_src = fp->ipq_src;
327 ((struct ip *)ip)->ip_dst = fp->ipq_dst;
328 remque(fp);
329 (void) m_free(dtom(fp));
330 m = dtom(ip);
331 m->m_len += sizeof (struct ipasfrag);
332 m->m_off -= sizeof (struct ipasfrag);
333 return ((struct ip *)ip);
334
335dropfrag:
336 m_freem(m);
337 return (0);
338}
339
340/*
341 * Free a fragment reassembly header and all
342 * associated datagrams.
343 */
344struct ipq *
345ip_freef(fp)
346 struct ipq *fp;
347{
348 register struct ipasfrag *q;
349 struct mbuf *m;
350
351 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
352 m_freem(dtom(q));
353 m = dtom(fp);
354 fp = fp->next;
355 remque(fp->prev);
356 (void) m_free(m);
357 return (fp);
358}
359
360/*
361 * Put an ip fragment on a reassembly chain.
362 * Like insque, but pointers in middle of structure.
363 */
364ip_enq(p, prev)
365 register struct ipasfrag *p, *prev;
366{
367
368 p->ipf_prev = prev;
369 p->ipf_next = prev->ipf_next;
370 prev->ipf_next->ipf_prev = p;
371 prev->ipf_next = p;
372}
373
374/*
375 * To ip_enq as remque is to insque.
376 */
377ip_deq(p)
378 register struct ipasfrag *p;
379{
380
381 p->ipf_prev->ipf_next = p->ipf_next;
382 p->ipf_next->ipf_prev = p->ipf_prev;
383}
384
385/*
386 * IP timer processing;
387 * if a timer expires on a reassembly
388 * queue, discard it.
389 */
390ip_slowtimo()
391{
392 register struct ipq *fp;
393 int s = splnet();
394
395 fp = ipq.next;
396 if (fp == 0) {
397 splx(s);
398 return;
399 }
400 while (fp != &ipq)
401 if (--fp->ipq_ttl == 0)
402 fp = ip_freef(fp);
403 else
404 fp = fp->next;
405 splx(s);
406}
407
408/*
409 * Drain off all datagram fragments.
410 */
411ip_drain()
412{
413
414 while (ipq.next != &ipq)
415 (void) ip_freef(ipq.next);
416}
417
418/*
419 * Do option processing on a datagram,
420 * possibly discarding it if bad options
421 * are encountered.
422 */
423ip_dooptions(ip)
424 struct ip *ip;
425{
426 register u_char *cp;
427 int opt, optlen, cnt, code, type;
428 struct in_addr *sin;
429 register struct ip_timestamp *ipt;
430 register struct ifnet *ifp;
431 struct in_addr t;
432
433 cp = (u_char *)(ip + 1);
434 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
435 for (; cnt > 0; cnt -= optlen, cp += optlen) {
436 opt = cp[0];
437 if (opt == IPOPT_EOL)
438 break;
439 if (opt == IPOPT_NOP)
440 optlen = 1;
441 else
442 optlen = cp[1];
443 switch (opt) {
444
445 default:
446 break;
447
448 /*
449 * Source routing with record.
450 * Find interface with current destination address.
451 * If none on this machine then drop if strictly routed,
452 * or do nothing if loosely routed.
453 * Record interface address and bring up next address
454 * component. If strictly routed make sure next
455 * address on directly accessible net.
456 */
457 case IPOPT_LSRR:
458 case IPOPT_SSRR:
459 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
460 break;
461 sin = (struct in_addr *)(cp + cp[2]);
462 ipaddr.sin_addr = *sin;
463 ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
464 type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL;
465 if (ifp == 0) {
466 if (opt == IPOPT_SSRR)
467 goto bad;
468 break;
469 }
470 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
471 cp[2] += 4;
472 if (cp[2] > optlen - (sizeof (long) - 1))
473 break;
474 ip->ip_dst = sin[1];
475 if (opt == IPOPT_SSRR &&
476 if_ifonnetof(in_netof(ip->ip_dst)) == 0)
477 goto bad;
478 break;
479
480 case IPOPT_TS:
481 code = cp - (u_char *)ip;
482 type = ICMP_PARAMPROB;
483 ipt = (struct ip_timestamp *)cp;
484 if (ipt->ipt_len < 5)
485 goto bad;
486 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
487 if (++ipt->ipt_oflw == 0)
488 goto bad;
489 break;
490 }
491 sin = (struct in_addr *)(cp+cp[2]);
492 switch (ipt->ipt_flg) {
493
494 case IPOPT_TS_TSONLY:
495 break;
496
497 case IPOPT_TS_TSANDADDR:
498 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
499 goto bad;
500 if (ifinet == 0)
501 goto bad; /* ??? */
502 *sin++ = ((struct sockaddr_in *)&ifinet->if_addr)->sin_addr;
503 break;
504
505 case IPOPT_TS_PRESPEC:
506 ipaddr.sin_addr = *sin;
507 if (!if_ifwithaddr((struct sockaddr *)&ipaddr))
508 continue;
509 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
510 goto bad;
511 ipt->ipt_ptr += 4;
512 break;
513
514 default:
515 goto bad;
516 }
517 *(n_time *)sin = iptime();
518 ipt->ipt_ptr += 4;
519 }
520 }
521 return (0);
522bad:
523 icmp_error(ip, type, code);
524 return (1);
525}
526
527/*
528 * Strip out IP options, at higher
529 * level protocol in the kernel.
530 * Second argument is buffer to which options
531 * will be moved, and return value is their length.
532 */
533ip_stripoptions(ip, mopt)
534 struct ip *ip;
535 struct mbuf *mopt;
536{
537 register int i;
538 register struct mbuf *m;
539 int olen;
540
541 olen = (ip->ip_hl<<2) - sizeof (struct ip);
542 m = dtom(ip);
543 ip++;
544 if (mopt) {
545 mopt->m_len = olen;
546 mopt->m_off = MMINOFF;
547 bcopy((caddr_t)ip, mtod(m, caddr_t), (unsigned)olen);
548 }
549 i = m->m_len - (sizeof (struct ip) + olen);
550 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
551 m->m_len -= olen;
552}
553
554u_char inetctlerrmap[] = {
555 ECONNABORTED, ECONNABORTED, 0, 0,
556 0, 0,
557 EHOSTDOWN, EHOSTUNREACH, ENETUNREACH, EHOSTUNREACH,
558 ECONNREFUSED, ECONNREFUSED, EMSGSIZE, 0,
559 0, 0, 0, 0
560};
561
562ip_ctlinput(cmd, arg)
563 int cmd;
564 caddr_t arg;
565{
566 struct in_addr *sin;
567 int tcp_abort(), udp_abort();
568 extern struct inpcb tcb, udb;
569
570 if (cmd < 0 || cmd > PRC_NCMDS)
571 return;
572 if (inetctlerrmap[cmd] == 0)
573 return; /* XXX */
574 if (cmd == PRC_IFDOWN)
575 sin = &((struct sockaddr_in *)arg)->sin_addr;
576 else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH)
577 sin = (struct in_addr *)arg;
578 else
579 sin = &((struct icmp *)arg)->icmp_ip.ip_dst;
580 in_pcbnotify(&tcb, sin, inetctlerrmap[cmd], tcp_abort);
581 in_pcbnotify(&udb, sin, inetctlerrmap[cmd], udp_abort);
582}
583
584int ipprintfs = 0;
585int ipforwarding = 1;
586/*
587 * Forward a packet. If some error occurs return the sender
588 * and icmp packet. Note we can't always generate a meaningful
589 * icmp message because icmp doesn't have a large enough repetoire
590 * of codes and types.
591 */
592ip_forward(ip)
593 register struct ip *ip;
594{
595 register int error, type, code;
596 struct mbuf *mopt, *mcopy;
597
598 if (ipprintfs)
599 printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
600 ip->ip_dst, ip->ip_ttl);
601 if (ipforwarding == 0) {
602 /* can't tell difference between net and host */
603 type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
604 goto sendicmp;
605 }
606 if (ip->ip_ttl < IPTTLDEC) {
607 type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
608 goto sendicmp;
609 }
610 ip->ip_ttl -= IPTTLDEC;
611 mopt = m_get(M_DONTWAIT);
612 if (mopt == 0) {
613 m_freem(dtom(ip));
614 return;
615 }
616
617 /*
618 * Save at most 64 bytes of the packet in case
619 * we need to generate an ICMP message to the src.
620 */
621 mcopy = m_copy(dtom(ip), 0, imin(ip->ip_len, 64));
622 ip_stripoptions(ip, mopt);
623
624 /* last 0 here means no directed broadcast */
625 if ((error = ip_output(dtom(ip), mopt, 0, 0)) == 0) {
626 if (mcopy)
627 m_freem(mcopy);
628 return;
629 }
630 ip = mtod(mcopy, struct ip *);
631 type = ICMP_UNREACH, code = 0; /* need ``undefined'' */
632 switch (error) {
633
634 case ENETUNREACH:
635 case ENETDOWN:
636 code = ICMP_UNREACH_NET;
637 break;
638
639 case EMSGSIZE:
640 code = ICMP_UNREACH_NEEDFRAG;
641 break;
642
643 case EPERM:
644 code = ICMP_UNREACH_PORT;
645 break;
646
647 case ENOBUFS:
648 type = ICMP_SOURCEQUENCH;
649 break;
650
651 case EHOSTDOWN:
652 case EHOSTUNREACH:
653 code = ICMP_UNREACH_HOST;
654 break;
655 }
656sendicmp:
657 icmp_error(ip, type, code);
658}