Commit | Line | Data |
---|---|---|
17efd7fe MK |
1 | #ifdef RCSIDENT |
2 | static char rcsident[] = "$Header: ip_output.c,v 1.28 85/07/31 09:32:09 walsh Exp $"; | |
3 | #endif | |
4 | ||
5 | #include "../h/param.h" | |
6 | #include "../h/dir.h" | |
7 | #include "../h/user.h" | |
8 | #include "../h/mbuf.h" | |
9 | #include "../h/socket.h" | |
10 | #include "../h/socketvar.h" | |
11 | #include "../h/protosw.h" | |
12 | #include "../h/domain.h" | |
13 | #include "../h/ioctl.h" | |
14 | #include "../h/syslog.h" | |
15 | ||
16 | #include "../net/if.h" | |
17 | #include "../net/route.h" | |
18 | ||
19 | #include "../bbnnet/in.h" | |
20 | #include "../bbnnet/net.h" | |
21 | #include "../bbnnet/in_pcb.h" | |
22 | #include "../bbnnet/in_var.h" | |
23 | #include "../bbnnet/ip.h" | |
24 | #include "../bbnnet/icmp.h" | |
25 | ||
26 | /* | |
27 | * If you're going to a specific host or via a gateway, the routing | |
28 | * entry gateway field holds the best way to get there. Otherwise, | |
29 | * the routing entry tells you how to get onto that net -- it has | |
30 | * the net address portion of our local host: | |
31 | * | |
32 | * On bbn-labs-b: | |
33 | * | |
34 | * rt_dst rt_gateway flags | |
35 | * il0 => 0x00000b80 0x2010b80 UP | |
36 | * imp0 => 0x00000008 0x2000708 UP | |
37 | * loop => 0x0000007f 0x100007f UP | |
38 | * | |
39 | * So you can see that the rt_gateway is our local address, and the | |
40 | * rt_dst may be the net number of the media. If it's a route | |
41 | * to a net, the other guy is on this net and you want to route the | |
42 | * packet to him anyway. | |
43 | * | |
44 | * gateway 0 0x1000b80 UP, RTF_GATEWAY | |
45 | */ | |
46 | ||
47 | #define IF_SEND(ifp, mp, rt, retval) \ | |
48 | {\ | |
49 | static struct sockaddr_in tmproute = {AF_INET}; \ | |
50 | \ | |
51 | if (! ((ifp)->if_flags & IFF_UP)){ \ | |
52 | /* goes with PRC_IFDOWN in in.c */ \ | |
53 | m_freem(mp); \ | |
54 | retval = ENETUNREACH; \ | |
55 | } else if ((rt)->rt_flags & (RTF_GATEWAY|RTF_HOST)) \ | |
56 | retval = (*(ifp)->if_output)(ifp, mp, &(rt)->rt_gateway); \ | |
57 | else { \ | |
58 | tmproute.sin_addr.s_addr = (mtod(mp, struct ip *))->ip_dst.s_addr; \ | |
59 | retval = (*(ifp)->if_output)(ifp, mp, (struct sockaddr *) &tmproute); \ | |
60 | }} | |
61 | ||
62 | if_send(ifp, mp, rt) | |
63 | register struct ifnet *ifp; | |
64 | register struct mbuf *mp; | |
65 | register struct rtentry *rt; | |
66 | { | |
67 | int retval; | |
68 | ||
69 | IF_SEND (ifp, mp, rt, retval); | |
70 | return (retval); | |
71 | } | |
72 | ||
73 | ||
74 | /* | |
75 | * Find a route to this destination. Given the source and destination | |
76 | * addresses, it returns a local net address | |
77 | * to send to (either the address of the destination itself or a gateway). | |
78 | * Taken mostly from rtalloc; expanded to route according to | |
79 | * both ends of the connection. | |
80 | */ | |
81 | ||
82 | ||
83 | struct rtentry *ip_route(src, dst) | |
84 | struct in_addr *src; | |
85 | struct in_addr *dst; | |
86 | { | |
87 | register struct rtentry *rt; | |
88 | register struct mbuf *m; | |
89 | register unsigned hash; | |
90 | net_t snet, dnet; | |
91 | int doinghost; | |
92 | struct rtentry *rtmin; | |
93 | struct mbuf **table; | |
94 | static struct in_addr wildcard; | |
95 | ||
96 | /* get network parts of src and dest addresses */ | |
97 | ||
98 | snet = iptonet(*src); | |
99 | dnet = iptonet(*dst); | |
100 | ||
101 | rtmin = NULL; | |
102 | hash = HOSTHASH(dst->s_addr); | |
103 | table = rthost; | |
104 | doinghost = TRUE; | |
105 | again : | |
106 | for (m = table[hash % RTHASHSIZ]; m; m = m->m_next) | |
107 | { | |
108 | rt = mtod(m, struct rtentry *); | |
109 | if (rt->rt_hash != hash) | |
110 | continue; | |
111 | if (! (rt->rt_flags & RTF_UP)) | |
112 | continue; | |
113 | if (! (rt->rt_ifp->if_flags & IFF_UP)) | |
114 | continue; | |
115 | if (rt->rt_dst.sa_family != AF_INET) | |
116 | continue; | |
117 | ||
118 | /* packets go out an interface with our local IP address */ | |
119 | if (iptonet(((struct sockaddr_in *)&(rt->rt_gateway))->sin_addr) != snet) | |
120 | continue; | |
121 | ||
122 | /* does this route get us there? */ | |
123 | if (doinghost) | |
124 | { | |
125 | if (((struct sockaddr_in *)&(rt->rt_dst))->sin_addr.s_addr != | |
126 | dst->s_addr) | |
127 | continue; | |
128 | } | |
129 | else | |
130 | { | |
131 | /* | |
132 | * iptonet == 0 => smart gateway (route to anywhere) | |
133 | * iptonet != 0 => gateway to another net (route to net) | |
134 | */ | |
135 | if (iptonet(((struct sockaddr_in *)&(rt->rt_dst))->sin_addr) != dnet) | |
136 | continue; | |
137 | } | |
138 | ||
139 | /* and try to share load across gateways */ | |
140 | if (rtmin == NULL) | |
141 | rtmin = rt; | |
142 | else if (rt->rt_use < rtmin->rt_use) | |
143 | rtmin = rt; | |
144 | } | |
145 | ||
146 | if (rtmin == NULL) | |
147 | { | |
148 | if (doinghost) | |
149 | { | |
150 | doinghost = FALSE; | |
151 | hash = NETHASH(*dst), table = rtnet; | |
152 | goto again; | |
153 | } | |
154 | /* | |
155 | * Check for wildcard gateway, by convention network 0. | |
156 | */ | |
157 | if (dst != &wildcard) | |
158 | { | |
159 | hash = 0; | |
160 | dst = &wildcard; | |
161 | dnet = 0; | |
162 | goto again; | |
163 | } | |
164 | rtstat.rts_unreach++; | |
165 | return(NULL); | |
166 | } | |
167 | ||
168 | rtmin->rt_refcnt++; | |
169 | if (dst == &wildcard) | |
170 | rtstat.rts_wildcard++; | |
171 | return(rtmin); | |
172 | } | |
173 | ||
174 | ||
175 | /* | |
176 | * Ip_send is called from the higher protocol layer (TCP/RDP/UDP) and is passed | |
177 | * an mbuf chain containing a packet to send to the local network. The first | |
178 | * mbuf contains the protocol header and an IP header which is partially | |
179 | * filled in. After determining a route (outgoing interface + first hop) for | |
180 | * the packet, it is fragmented (if necessary) and sent to the local net | |
181 | * through the local net send routine. | |
182 | * | |
183 | * For non-raw output, caller should have stuffed: | |
184 | * ip protocol type, type of service, source addr, destin addr | |
185 | * | |
186 | * ip_tos is left to caller so that people using raw sockets can do whatever | |
187 | * they please. (They don't have an inpcb in which to store such info.) | |
188 | * | |
189 | * The asis argument is TRUE for raw output and the gateway (packet forwarding) | |
190 | * code. It indicates that the IP header is fully constructed. | |
191 | * | |
192 | * Errors at the IP layer and below occur synchronously, and can be reported | |
193 | * back via subroutine return values. Higher level protocols should remember | |
194 | * that if they do things asynchronous to a system call (ie., packet | |
195 | * retransmission) that they should post error back to user via advise_user() | |
196 | * so that user gets error next time he rendezvous with the kernel. | |
197 | */ | |
198 | ip_send(inp, mp, len, asis) | |
199 | struct inpcb *inp; | |
200 | register struct mbuf *mp; | |
201 | register int len; | |
202 | int asis; | |
203 | { | |
204 | register struct ip *p; | |
205 | register struct ifnet *ifp; | |
206 | register struct rtentry *rt; | |
207 | register int hlen; | |
208 | int free_route = FALSE; | |
209 | int retval; | |
210 | ||
211 | p = mtod(mp, struct ip *); /* -> ip header */ | |
212 | /* | |
213 | * Find route for datagram if one has not been assigned. | |
214 | */ | |
215 | if ((rt = inp->inp_route.ro_rt) == NULL) | |
216 | { | |
217 | if ((rt = ip_route(&p->ip_src, &p->ip_dst)) == NULL) | |
218 | { | |
219 | if (asis || (p->ip_src.s_addr == INADDR_ANY)) | |
220 | { | |
221 | /* | |
222 | * asis: forwarding a packet not sourced by us | |
223 | * eg., by raw interface and user level repeater process | |
224 | * INADDR_ANY: sending icmp packet for which | |
225 | * we're trying to avoid routing twice. | |
226 | */ | |
227 | struct route tmproute; | |
228 | struct sockaddr_in *sin; | |
229 | ||
230 | bzero ((caddr_t) &tmproute, sizeof(tmproute)); | |
231 | sin = (struct sockaddr_in *) &tmproute.ro_dst; | |
232 | sin->sin_family = AF_INET; | |
233 | sin->sin_addr.s_addr = p->ip_dst.s_addr; | |
234 | rtalloc (&tmproute); | |
235 | rt = tmproute.ro_rt; | |
236 | ||
237 | if (rt && (p->ip_src.s_addr == INADDR_ANY)) | |
238 | p->ip_src = IA_INADDR(in_iafromif(rt->rt_ifp)); | |
239 | } | |
240 | ||
241 | if (rt == NULL) | |
242 | { | |
243 | m_freem(mp); | |
244 | return(ENETUNREACH); | |
245 | } | |
246 | } | |
247 | free_route = TRUE; | |
248 | } | |
249 | ifp = rt->rt_ifp; | |
250 | ||
251 | /* | |
252 | * Copy ip source route to header. Know asis must be FALSE, if do. | |
253 | */ | |
254 | if (inp->inp_optlen > 0) | |
255 | { | |
256 | char *q; | |
257 | ||
258 | if (mp->m_off - inp->inp_optlen >= MMINOFF) | |
259 | { | |
260 | struct in_addr *ipa; | |
261 | ||
262 | mp->m_off -= inp->inp_optlen; | |
263 | mp->m_len += inp->inp_optlen; | |
264 | q = (char *) p; | |
265 | p = (struct ip *) (q - inp->inp_optlen); | |
266 | bcopy(q, (caddr_t)p, sizeof(struct ip)); | |
267 | bcopy(inp->inp_options, (caddr_t)(p+1), (unsigned)inp->inp_optlen); | |
268 | /* | |
269 | * And replate eventual destination with first hop. | |
270 | * Eventual destination is in source route just | |
271 | * copied in. | |
272 | */ | |
273 | ipa = (struct in_addr *) (&inp->inp_options[0]); | |
274 | p->ip_dst = ipa[inp->inp_optlen/sizeof(struct in_addr)]; | |
275 | } | |
276 | else | |
8902c2d0 | 277 | log(LOG_INFO, "ip_send: optlen %d inpcb 0x%x\n", |
17efd7fe MK |
278 | (int)inp->inp_optlen, inp); |
279 | } | |
280 | ||
281 | /* | |
282 | * fill in ip header fields | |
283 | */ | |
284 | if (asis) | |
285 | { | |
286 | /* | |
287 | * RAW OUTPUT. Must get len, hlen, off from packet header. | |
288 | * Byte swap is ugly (since we must swap back below), but | |
289 | * necessary in case we must fragment. | |
290 | */ | |
291 | hlen = p->ip_hl << IP_HLSHIFT; | |
292 | len = ntohs(p->ip_len); | |
293 | p->ip_off = ntohs(p->ip_off); | |
294 | } | |
295 | else | |
296 | { | |
297 | static u_short next_ip_id; /* some day RDP may want to force for rxmit */ | |
298 | ||
299 | hlen = sizeof(struct ip) + inp->inp_optlen; | |
300 | len += hlen; | |
301 | p->ip_v = IPVERSION; | |
302 | p->ip_hl = hlen >> IP_HLSHIFT; | |
303 | p->ip_off = 0; | |
304 | p->ip_ttl = MAXTTL; /* ### should come from route */ | |
305 | p->ip_id = htons(next_ip_id++); | |
306 | } | |
307 | ||
308 | /* | |
309 | * let ip_frag do the send if needed, otherwise do it directly. | |
310 | */ | |
311 | ||
312 | /* for testing IP reassembly code */ | |
313 | #ifdef FORCE_FRAG | |
314 | #define MTU(ifp) (((ifp)->if_mtu >> FORCE_FRAG) & (~3)) | |
315 | #else | |
316 | #define MTU(ifp) (ifp)->if_mtu | |
317 | #endif | |
318 | ||
319 | if (len > MTU(ifp)) | |
320 | { | |
321 | p->ip_len = len; | |
322 | retval = ip_frag(p, ifp, rt, hlen); | |
323 | } | |
324 | else | |
325 | { | |
326 | /* | |
327 | * complete header, byte swap, and send to local net | |
328 | */ | |
329 | p->ip_len = htons((u_short)len); | |
330 | p->ip_off = htons(p->ip_off); | |
331 | /* | |
332 | * No reason not to have kernel checksum, even for raw packets. | |
333 | */ | |
334 | p->ip_sum = 0; | |
335 | p->ip_sum = in_cksum(dtom(p), hlen); | |
336 | IF_SEND (ifp, mp, rt, retval); | |
337 | } | |
338 | ||
339 | rt->rt_use ++; /* Yet another IP packet sent away */ | |
340 | ||
341 | if (free_route) | |
342 | { | |
343 | struct socket *so; | |
344 | ||
345 | if ((so = inp->inp_socket) && | |
346 | (so->so_proto->pr_flags & PR_CONNREQUIRED)) | |
347 | /* | |
348 | * Found a new route after old one pinged out. | |
349 | */ | |
350 | inp->inp_route.ro_rt = rt; | |
351 | else | |
352 | rtfree(rt); | |
353 | } | |
354 | ||
355 | return(retval); | |
356 | } | |
357 | ||
358 | /* | |
359 | * Ip_frag is called with a packet with a completed ip header | |
360 | * (except for checksum). It fragments the packet, inserts the | |
361 | * IP checksum, and calls the appropriate local net output routine | |
362 | * to send it to the net. | |
363 | * | |
364 | * Previously, when there was only one kind of mbuf, it tried to | |
365 | * reduce space requirements by recycling the chain to be fragmented. | |
366 | * Preserving this approach is overly complicated, and should mbufs | |
367 | * change again, cause problems. Therefore, have switched to copying | |
368 | * the chain to be fragmented. | |
369 | */ | |
370 | ip_frag(p, ifp, rt, hlen) | |
371 | register struct ip *p; | |
372 | struct ifnet *ifp; | |
373 | struct rtentry *rt; | |
374 | register int hlen; | |
375 | { | |
376 | register struct mbuf *m; /* original chunk */ | |
377 | register struct mbuf *mhdr; /* fragment */ | |
378 | register struct ip *fip; /* the fragment IP header */ | |
379 | int off; /* offset into entire IP datagram */ | |
380 | int here; /* offset into this chunk of it */ | |
381 | register int len; /* length of data in this chunk */ | |
382 | int flags; /* of this chunk to fragment */ | |
383 | int max; /* max data length in a fragment */ | |
384 | int fdlen; /* actual fragment data length */ | |
385 | int error; | |
386 | ||
387 | m = dtom(p); | |
388 | ||
389 | if (p->ip_off & ip_df) | |
390 | { /* can't fragment */ | |
391 | m_freem(m); | |
392 | return(EMSGSIZE); | |
393 | } | |
394 | max = MTU(ifp) - hlen; /* max data length in frag */ | |
395 | len = p->ip_len - hlen; /* data length */ | |
396 | ||
397 | /* | |
398 | * this only needs to be this complicated if we are handed | |
399 | * an already-fragmented packet | |
400 | */ | |
401 | flags = p->ip_off&(ip_mf|ip_df); /* save old flags */ | |
402 | p->ip_off &= ~flags; /* take them out of ip_off */ | |
403 | off = p->ip_off << IP_OFFSHIFT; /* fragment offset */ | |
404 | here = hlen; | |
405 | error = 0; | |
406 | ||
407 | while (len > 0) | |
408 | { | |
409 | /* | |
410 | * Allocate mbuf for fragment IP header | |
411 | */ | |
412 | mhdr = m_get(M_DONTWAIT, MT_HEADER); | |
413 | if (mhdr == NULL) | |
414 | { | |
415 | error = ENOBUFS; | |
416 | break; | |
417 | } | |
418 | /* | |
419 | * get copy of data for fragment | |
420 | */ | |
421 | if (len < max) | |
422 | fdlen = len; | |
423 | else | |
424 | fdlen = max & (~7); /* 7 == 2^IP_OFFSHIFT -1 */ | |
425 | mhdr->m_next = m_copy(m, here, fdlen); | |
426 | if (mhdr->m_next == NULL) | |
427 | { | |
428 | m_free(mhdr); | |
429 | error = ENOBUFS; | |
430 | break; | |
431 | } | |
432 | /* | |
433 | * build the header for this fragment and ship it off. | |
434 | */ | |
435 | mhdr->m_len = hlen; | |
436 | mhdr->m_off = MMAXOFF - hlen; | |
437 | fip = mtod(mhdr, struct ip *); | |
438 | bcopy((caddr_t)p, (caddr_t)fip, (unsigned)hlen); | |
439 | fip->ip_off = off >> IP_OFFSHIFT; | |
440 | if (fdlen >= len) | |
441 | /* it's the last fragment */ | |
442 | fip->ip_off |= flags; | |
443 | else | |
444 | fip->ip_off |= ip_mf; | |
445 | fip->ip_off = htons((u_short)fip->ip_off); | |
446 | fip->ip_len = htons((u_short)fdlen + hlen); | |
447 | fip->ip_sum = 0; | |
448 | fip->ip_sum = in_cksum(mhdr, hlen); | |
449 | if (error = if_send (ifp, mhdr, rt)) | |
450 | break; | |
451 | ||
452 | /* | |
453 | * and get ready for next pass through the loop | |
454 | */ | |
455 | len -= fdlen; | |
456 | off += fdlen; | |
457 | here += fdlen; | |
458 | } | |
459 | ||
460 | m_freem(m); | |
461 | return (error); | |
462 | } | |
463 | ||
464 | /* | |
465 | * Current connection should use a new path. | |
466 | */ | |
467 | struct rtentry *ip_reroute(inp) | |
468 | register struct inpcb *inp; | |
469 | { | |
470 | register struct route *ro = &inp->inp_route; | |
471 | ||
472 | rtfree(ro->ro_rt); | |
473 | return(ro->ro_rt = ip_route(&inp->inp_laddr, &inp->inp_faddr)); | |
474 | } | |
475 | ||
476 | /* | |
477 | * A gateway has gone down. Change route used by all connections currently | |
478 | * using it. | |
479 | */ | |
480 | ip_gdown(addr) | |
481 | u_long addr; | |
482 | { | |
483 | register struct protosw *psw; | |
484 | ||
485 | for(psw=inetdomain.dom_protosw; psw < inetdomain.dom_protoswNPROTOSW; psw++) | |
486 | if (psw->pr_type != SOCK_RAW) | |
487 | if (psw->pr_ctlinput) | |
488 | (*(psw->pr_ctlinput)) (PRC_GWDOWN, addr); | |
489 | } | |
490 | ||
491 | /* | |
492 | * Called from protocol ctlinput routine. This way, IP/ICMP don't need to know | |
493 | * about protocol's head of inpcbs... for all the protocols. | |
494 | */ | |
495 | in_gdown (head, addr) | |
496 | register struct inpcb *head; | |
497 | u_long addr; | |
498 | { | |
499 | register struct inpcb *inp; | |
500 | register struct rtentry *rt; | |
501 | ||
502 | if (head == NULL) | |
503 | return; | |
504 | ||
505 | for(inp = head->inp_next; inp != head; inp = inp->inp_next) | |
506 | { | |
507 | if (rt = inp->inp_route.ro_rt) | |
508 | { | |
509 | if (rt->rt_flags & RTF_GATEWAY) | |
510 | { | |
511 | if (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == addr) | |
512 | { | |
513 | /* | |
514 | * Don't remove route permanently, since want to catch | |
515 | * the gateway when it reboots: | |
516 | * -- rtrequest (SIOCDELRT, rt) -- | |
517 | * | |
518 | * make sure rtfree() not remove route mbuf | |
519 | * incrementing reference count here, and decrementing | |
520 | * when timeout on reinstatement goes off. Cannot call | |
521 | * rtfree with zero reference count when have not done | |
522 | * SIOCDELRT. | |
523 | */ | |
524 | if (rt->rt_flags & RTF_UP) | |
525 | { | |
526 | rt->rt_flags &= ~RTF_UP; | |
527 | rt->rt_flags |= RTF_REINSTATE; | |
528 | rt->irt_gdown = RT_REINSTATE; | |
529 | rt->rt_refcnt ++; | |
530 | } | |
531 | ||
532 | if (!ip_reroute(inp)) | |
533 | advise_user(inp->inp_socket, ENETUNREACH); | |
534 | ||
535 | } | |
536 | } | |
537 | } | |
538 | } | |
539 | } |