start rewrite
[unix-history] / usr / src / sys / netinet / ip_input.c
CommitLineData
c20dd751 1/* ip_input.c 1.18 81/11/23 */
6e8b2eca 2
e1d82856 3#include "../h/param.h"
d10bd5b7 4#include "../h/systm.h"
e6dd2097
BJ
5#include "../h/clock.h"
6#include "../h/mbuf.h"
eb44bfb2 7#include "../h/protosw.h"
2b4b57cd 8#include "../h/socket.h"
d52566dd
BJ
9#include "../net/inet.h"
10#include "../net/inet_systm.h"
4ad99bae 11#include "../net/if.h"
d52566dd
BJ
12#include "../net/imp.h"
13#include "../net/ip.h" /* belongs before inet.h */
eb44bfb2 14#include "../net/ip_var.h"
d52566dd
BJ
15#include "../net/ip_icmp.h"
16#include "../net/tcp.h"
e6dd2097 17
eb44bfb2
BJ
18u_char ip_protox[IPPROTO_MAX];
19
d52566dd
BJ
20/*
21 * Ip initialization.
22 */
23ip_init()
24{
eb44bfb2
BJ
25 register struct protosw *pr;
26 register int i;
eb44bfb2 27
4ad99bae 28COUNT(IP_INIT);
eb44bfb2
BJ
29 pr = pffindproto(PF_INET, IPPROTO_RAW);
30 if (pr == 0)
31 panic("ip_init");
32 for (i = 0; i < IPPROTO_MAX; i++)
33 ip_protox[i] = pr - protosw;
34 for (pr = protosw; pr <= protoswLAST; pr++)
35 if (pr->pr_family == PF_INET &&
36 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
37 ip_protox[pr->pr_protocol] = pr - protosw;
d52566dd
BJ
38 ipq.next = ipq.prev = &ipq;
39 ip_id = time & 0xffff;
40}
41
eb44bfb2 42u_char ipcksum = 1;
e6dd2097
BJ
43struct ip *ip_reass();
44
45/*
46 * Ip input routines.
47 */
48
49/*
50 * Ip input routine. Checksum and byte swap header. If fragmented
51 * try to reassamble. If complete and fragment queue exists, discard.
52 * Process options. Pass to next level.
53 */
54ip_input(m0)
55 struct mbuf *m0;
e1d82856 56{
2b4b57cd 57 register struct ip *ip;
6a7455e4 58 register struct mbuf *m = m0;
e6dd2097 59 register int i;
e1d82856 60 register struct ipq *fp;
e1d82856 61 int hlen;
e1d82856
BJ
62
63COUNT(IP_INPUT);
e6dd2097
BJ
64 /*
65 * Check header and byteswap.
66 */
c20dd751
BJ
67 if (m->m_len < sizeof (struct ip) &&
68 m_pullup(m, sizeof (struct ip)) == 0)
69 goto bad;
e6dd2097 70 ip = mtod(m, struct ip *);
c20dd751
BJ
71 if ((hlen = ip->ip_hl << 2) > m->m_len &&
72 m_pullup(m, hlen) == 0)
4ad99bae 73 goto bad;
4ad99bae 74 if (ipcksum)
92c7b8c1 75 if ((ip->ip_sum = inet_cksum(m, hlen)) != 0xffff) {
4ad99bae
BJ
76 printf("ip_sum %x\n", ip->ip_sum);
77 ipstat.ips_badsum++;
78 goto bad;
e1d82856 79 }
4ad99bae
BJ
80
81 /*
82 * Convert fields to host representation.
83 */
cdad2eb1 84 ip->ip_len = ntohs((u_short)ip->ip_len);
e6dd2097 85 ip->ip_id = ntohs(ip->ip_id);
4ad99bae 86 ip->ip_off = ntohs((u_short)ip->ip_off);
e1d82856 87
d10bd5b7 88 /*
e6dd2097
BJ
89 * Check that the amount of data in the buffers
90 * is as at least much as the IP header would have us expect.
91 * Trim mbufs if longer than we expect.
92 * Drop packet if shorter than we expect.
d10bd5b7 93 */
e6dd2097 94 i = 0;
c20dd751 95 for (; m != NULL; m = m->m_next)
e1d82856 96 i += m->m_len;
e6dd2097
BJ
97 m = m0;
98 if (i != ip->ip_len) {
c20dd751 99 if (i < ip->ip_len)
4ad99bae 100 goto bad;
e6dd2097 101 m_adj(m, ip->ip_len - i);
d10bd5b7 102 }
e1d82856 103
e6dd2097
BJ
104 /*
105 * Process options and, if not destined for us,
106 * ship it on.
107 */
108 if (hlen > sizeof (struct ip))
cdad2eb1 109 ip_dooptions(ip);
92c7b8c1
BJ
110 if (ip->ip_dst.s_addr != n_lhost.s_addr &&
111 if_ifwithaddr(ip->ip_dst) == 0) {
e6dd2097 112 if (--ip->ip_ttl == 0) {
cdad2eb1 113 icmp_error(ip, ICMP_TIMXCEED, 0);
e6dd2097
BJ
114 return;
115 }
116 ip_output(dtom(ip));
117 return;
d10bd5b7 118 }
e1d82856 119
e6dd2097
BJ
120 /*
121 * Look for queue of fragments
122 * of this datagram.
123 */
124 for (fp = ipq.next; fp != &ipq; fp = fp->next)
125 if (ip->ip_id == fp->ipq_id &&
126 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
127 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
128 ip->ip_p == fp->ipq_p)
129 goto found;
130 fp = 0;
131found:
e1d82856 132
e6dd2097
BJ
133 /*
134 * Adjust ip_len to not reflect header,
135 * set ip_mff if more fragments are expected,
136 * convert offset of this to bytes.
137 */
138 ip->ip_len -= hlen;
eb44bfb2 139 ((struct ipasfrag *)ip)->ipf_mff = 0;
e6dd2097 140 if (ip->ip_off & IP_MF)
eb44bfb2 141 ((struct ipasfrag *)ip)->ipf_mff = 1;
e6dd2097 142 ip->ip_off <<= 3;
e1d82856 143
e6dd2097
BJ
144 /*
145 * If datagram marked as having more fragments
146 * or if this is not the first fragment,
147 * attempt reassembly; if it succeeds, proceed.
148 */
eb44bfb2
BJ
149 if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
150 ip = ip_reass((struct ipasfrag *)ip, fp);
e6dd2097 151 if (ip == 0)
d10bd5b7 152 return;
e6dd2097
BJ
153 hlen = ip->ip_hl << 2;
154 m = dtom(ip);
155 } else
156 if (fp)
157 (void) ip_freef(fp);
4ad99bae
BJ
158
159 /*
160 * Switch out to protocol's input routine.
161 */
eb44bfb2 162 (*protosw[ip_protox[ip->ip_p]].pr_input)(m);
4ad99bae
BJ
163 return;
164bad:
165 m_freem(m);
e6dd2097 166}
e1d82856 167
e6dd2097
BJ
168/*
169 * Take incoming datagram fragment and try to
4ad99bae 170 * reassemble it into whole datagram. If a chain for
e6dd2097
BJ
171 * reassembly of this datagram already exists, then it
172 * is given as fp; otherwise have to make a chain.
173 */
174struct ip *
175ip_reass(ip, fp)
eb44bfb2 176 register struct ipasfrag *ip;
e6dd2097
BJ
177 register struct ipq *fp;
178{
179 register struct mbuf *m = dtom(ip);
eb44bfb2 180 register struct ipasfrag *q;
e6dd2097
BJ
181 struct mbuf *t;
182 int hlen = ip->ip_hl << 2;
183 int i, next;
4ad99bae 184COUNT(IP_REASS);
d10bd5b7 185
e6dd2097
BJ
186 /*
187 * Presence of header sizes in mbufs
188 * would confuse code below.
189 */
190 m->m_off += hlen;
191 m->m_len -= hlen;
d10bd5b7 192
e6dd2097
BJ
193 /*
194 * If first fragment to arrive, create a reassembly queue.
195 */
196 if (fp == 0) {
197 if ((t = m_get(1)) == NULL)
198 goto dropfrag;
199 t->m_off = MMINOFF;
200 fp = mtod(t, struct ipq *);
201 insque(fp, &ipq);
202 fp->ipq_ttl = IPFRAGTTL;
203 fp->ipq_p = ip->ip_p;
204 fp->ipq_id = ip->ip_id;
eb44bfb2
BJ
205 fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
206 fp->ipq_src = ((struct ip *)ip)->ip_src;
207 fp->ipq_dst = ((struct ip *)ip)->ip_dst;
e6dd2097 208 }
e1d82856 209
e6dd2097
BJ
210 /*
211 * Find a segment which begins after this one does.
212 */
eb44bfb2 213 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
214 if (q->ip_off > ip->ip_off)
215 break;
e1d82856 216
e6dd2097
BJ
217 /*
218 * If there is a preceding segment, it may provide some of
219 * our data already. If so, drop the data from the incoming
220 * segment. If it provides all of our data, drop us.
221 */
eb44bfb2
BJ
222 if (q->ipf_prev != (struct ipasfrag *)fp) {
223 i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
e6dd2097
BJ
224 if (i > 0) {
225 if (i >= ip->ip_len)
226 goto dropfrag;
227 m_adj(dtom(ip), i);
228 ip->ip_off += i;
229 ip->ip_len -= i;
e1d82856 230 }
d10bd5b7 231 }
e1d82856 232
e6dd2097
BJ
233 /*
234 * While we overlap succeeding segments trim them or,
235 * if they are completely covered, dequeue them.
236 */
eb44bfb2 237 while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
e6dd2097
BJ
238 i = (ip->ip_off + ip->ip_len) - q->ip_off;
239 if (i < q->ip_len) {
240 q->ip_len -= i;
241 m_adj(dtom(q), i);
242 break;
243 }
eb44bfb2
BJ
244 q = q->ipf_next;
245 m_freem(dtom(q->ipf_prev));
246 ip_deq(q->ipf_prev);
e6dd2097 247 }
e1d82856 248
e6dd2097
BJ
249 /*
250 * Stick new segment in its place;
251 * check for complete reassembly.
252 */
eb44bfb2 253 ip_enq(ip, q->ipf_prev);
e6dd2097 254 next = 0;
eb44bfb2 255 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
e6dd2097
BJ
256 if (q->ip_off != next)
257 return (0);
258 next += q->ip_len;
259 }
eb44bfb2 260 if (q->ipf_prev->ipf_mff)
e6dd2097 261 return (0);
e1d82856 262
e6dd2097
BJ
263 /*
264 * Reassembly is complete; concatenate fragments.
265 */
266 q = fp->ipq_next;
267 m = dtom(q);
268 t = m->m_next;
269 m->m_next = 0;
270 m_cat(m, t);
eb44bfb2 271 while ((q = q->ipf_next) != (struct ipasfrag *)fp)
e6dd2097 272 m_cat(m, dtom(q));
e1d82856 273
e6dd2097
BJ
274 /*
275 * Create header for new ip packet by
276 * modifying header of first packet;
277 * dequeue and discard fragment reassembly header.
278 * Make header visible.
279 */
280 ip = fp->ipq_next;
281 ip->ip_len = next;
eb44bfb2
BJ
282 ((struct ip *)ip)->ip_src = fp->ipq_src;
283 ((struct ip *)ip)->ip_dst = fp->ipq_dst;
e6dd2097 284 remque(fp);
cdad2eb1 285 (void) m_free(dtom(fp));
e6dd2097 286 m = dtom(ip);
eb44bfb2
BJ
287 m->m_len += sizeof (struct ipasfrag);
288 m->m_off -= sizeof (struct ipasfrag);
289 return ((struct ip *)ip);
e6dd2097
BJ
290
291dropfrag:
292 m_freem(m);
293 return (0);
e1d82856
BJ
294}
295
e6dd2097
BJ
296/*
297 * Free a fragment reassembly header and all
298 * associated datagrams.
299 */
300struct ipq *
301ip_freef(fp)
302 struct ipq *fp;
e1d82856 303{
eb44bfb2 304 register struct ipasfrag *q;
e6dd2097 305 struct mbuf *m;
4ad99bae 306COUNT(IP_FREEF);
e6dd2097 307
eb44bfb2 308 for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
e6dd2097
BJ
309 m_freem(dtom(q));
310 m = dtom(fp);
311 fp = fp->next;
312 remque(fp->prev);
cdad2eb1 313 (void) m_free(m);
e6dd2097 314 return (fp);
e1d82856
BJ
315}
316
e6dd2097
BJ
317/*
318 * Put an ip fragment on a reassembly chain.
319 * Like insque, but pointers in middle of structure.
320 */
321ip_enq(p, prev)
eb44bfb2 322 register struct ipasfrag *p, *prev;
e1d82856 323{
e1d82856 324
4ad99bae 325COUNT(IP_ENQ);
eb44bfb2
BJ
326 p->ipf_prev = prev;
327 p->ipf_next = prev->ipf_next;
328 prev->ipf_next->ipf_prev = p;
329 prev->ipf_next = p;
e1d82856
BJ
330}
331
e6dd2097
BJ
332/*
333 * To ip_enq as remque is to insque.
334 */
335ip_deq(p)
eb44bfb2 336 register struct ipasfrag *p;
e1d82856 337{
e6dd2097 338
4ad99bae 339COUNT(IP_DEQ);
eb44bfb2
BJ
340 p->ipf_prev->ipf_next = p->ipf_next;
341 p->ipf_next->ipf_prev = p->ipf_prev;
e1d82856
BJ
342}
343
e6dd2097
BJ
344/*
345 * IP timer processing;
346 * if a timer expires on a reassembly
347 * queue, discard it.
348 */
d52566dd 349ip_slowtimo()
e1d82856
BJ
350{
351 register struct ipq *fp;
e6dd2097 352 int s = splnet();
e1d82856 353
4ad99bae 354COUNT(IP_SLOWTIMO);
905758fb 355 for (fp = ipq.next; fp != &ipq; )
e6dd2097
BJ
356 if (--fp->ipq_ttl == 0)
357 fp = ip_freef(fp);
358 else
359 fp = fp->next;
e6dd2097 360 splx(s);
e1d82856
BJ
361}
362
4ad99bae
BJ
363/*
364 * Drain off all datagram fragments.
365 */
d52566dd
BJ
366ip_drain()
367{
368
4ad99bae
BJ
369COUNT(IP_DRAIN);
370 while (ipq.next != &ipq)
371 (void) ip_freef(ipq.next);
d52566dd 372}
2b4b57cd 373
e6dd2097
BJ
374/*
375 * Do option processing on a datagram,
376 * possibly discarding it if bad options
377 * are encountered.
378 */
379ip_dooptions(ip)
380 struct ip *ip;
e1d82856 381{
e6dd2097 382 register u_char *cp;
cdad2eb1 383 int opt, optlen, cnt;
2b4b57cd 384 struct in_addr *sin;
d52566dd 385 register struct ip_timestamp *ipt;
4ad99bae
BJ
386 register struct ifnet *ifp;
387 struct in_addr t;
e6dd2097 388
4ad99bae 389COUNT(IP_DOOPTIONS);
e6dd2097
BJ
390 cp = (u_char *)(ip + 1);
391 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
392 for (; cnt > 0; cnt -= optlen, cp += optlen) {
393 opt = cp[0];
394 if (opt == IPOPT_EOL)
395 break;
396 if (opt == IPOPT_NOP)
397 optlen = 1;
398 else
399 optlen = cp[1];
400 switch (opt) {
e1d82856 401
e6dd2097
BJ
402 default:
403 break;
e1d82856 404
4ad99bae
BJ
405 /*
406 * Source routing with record.
407 * Find interface with current destination address.
408 * If none on this machine then drop if strictly routed,
409 * or do nothing if loosely routed.
410 * Record interface address and bring up next address
411 * component. If strictly routed make sure next
412 * address on directly accessible net.
413 */
e6dd2097 414 case IPOPT_LSRR:
d52566dd 415 if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
e6dd2097 416 break;
2b4b57cd 417 sin = (struct in_addr *)(cp + cp[2]);
4ad99bae
BJ
418 ifp = if_ifwithaddr(*sin);
419 if (ifp == 0) {
420 if (opt == IPOPT_SSRR)
421 goto bad;
422 break;
e6dd2097 423 }
4ad99bae
BJ
424 t = ip->ip_dst; ip->ip_dst = *sin; *sin = t;
425 cp[2] += 4;
426 if (cp[2] > optlen - (sizeof (long) - 1))
427 break;
428 ip->ip_dst = sin[1];
429 if (opt == IPOPT_SSRR && if_ifonnetof(ip->ip_dst)==0)
430 goto bad;
e6dd2097
BJ
431 break;
432
433 case IPOPT_TS:
d52566dd
BJ
434 ipt = (struct ip_timestamp *)cp;
435 if (ipt->ipt_len < 5)
e6dd2097 436 goto bad;
d52566dd
BJ
437 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
438 if (++ipt->ipt_oflw == 0)
e6dd2097 439 goto bad;
e6dd2097
BJ
440 break;
441 }
2b4b57cd 442 sin = (struct in_addr *)(cp+cp[2]);
d52566dd 443 switch (ipt->ipt_flg) {
e1d82856 444
e6dd2097
BJ
445 case IPOPT_TS_TSONLY:
446 break;
e1d82856 447
e6dd2097 448 case IPOPT_TS_TSANDADDR:
d52566dd 449 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 450 goto bad;
4ad99bae
BJ
451 /* stamp with ``first'' interface address */
452 *sin++ = ifnet->if_addr;
e6dd2097
BJ
453 break;
454
455 case IPOPT_TS_PRESPEC:
4ad99bae
BJ
456 if (if_ifwithaddr(*sin) == 0)
457 continue;
d52566dd 458 if (ipt->ipt_ptr + 8 > ipt->ipt_len)
e6dd2097 459 goto bad;
d52566dd 460 ipt->ipt_ptr += 4;
e1d82856
BJ
461 break;
462
463 default:
e6dd2097 464 goto bad;
e1d82856 465 }
2b4b57cd 466 *(n_time *)sin = iptime();
d52566dd 467 ipt->ipt_ptr += 4;
e6dd2097 468 }
e1d82856 469 }
cdad2eb1 470 return;
e6dd2097
BJ
471bad:
472 /* SHOULD FORCE ICMP MESSAGE */
cdad2eb1 473 return;
e1d82856
BJ
474}
475
e6dd2097 476/*
4ad99bae
BJ
477 * Strip out IP options, at higher
478 * level protocol in the kernel.
479 * Second argument is buffer to which options
480 * will be moved, and return value is their length.
e6dd2097 481 */
4ad99bae 482ip_stripoptions(ip, cp)
e6dd2097 483 struct ip *ip;
4ad99bae 484 char *cp;
e1d82856 485{
e6dd2097
BJ
486 register int i;
487 register struct mbuf *m;
e6dd2097 488 int olen;
4ad99bae 489COUNT(IP_STRIPOPTIONS);
e6dd2097
BJ
490
491 olen = (ip->ip_hl<<2) - sizeof (struct ip);
4ad99bae
BJ
492 m = dtom(ip);
493 ip++;
494 if (cp)
495 bcopy((caddr_t)ip, cp, (unsigned)olen);
e6dd2097 496 i = m->m_len - (sizeof (struct ip) + olen);
cdad2eb1 497 bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
e6dd2097 498 m->m_len -= i;
e1d82856 499}