have to leave instrs.adb around, kernel uses it for kdb
[unix-history] / usr / src / sys / deprecated / bbnnet / ip_output.c
CommitLineData
17efd7fe
MK
1#ifdef RCSIDENT
2static char rcsident[] = "$Header: ip_output.c,v 1.28 85/07/31 09:32:09 walsh Exp $";
3#endif
4
5#include "../h/param.h"
6#include "../h/dir.h"
7#include "../h/user.h"
8#include "../h/mbuf.h"
9#include "../h/socket.h"
10#include "../h/socketvar.h"
11#include "../h/protosw.h"
12#include "../h/domain.h"
13#include "../h/ioctl.h"
14#include "../h/syslog.h"
15
16#include "../net/if.h"
17#include "../net/route.h"
18
19#include "../bbnnet/in.h"
20#include "../bbnnet/net.h"
21#include "../bbnnet/in_pcb.h"
22#include "../bbnnet/in_var.h"
23#include "../bbnnet/ip.h"
24#include "../bbnnet/icmp.h"
25
26/*
27 * If you're going to a specific host or via a gateway, the routing
28 * entry gateway field holds the best way to get there. Otherwise,
29 * the routing entry tells you how to get onto that net -- it has
30 * the net address portion of our local host:
31 *
32 * On bbn-labs-b:
33 *
34 * rt_dst rt_gateway flags
35 * il0 => 0x00000b80 0x2010b80 UP
36 * imp0 => 0x00000008 0x2000708 UP
37 * loop => 0x0000007f 0x100007f UP
38 *
39 * So you can see that the rt_gateway is our local address, and the
40 * rt_dst may be the net number of the media. If it's a route
41 * to a net, the other guy is on this net and you want to route the
42 * packet to him anyway.
43 *
44 * gateway 0 0x1000b80 UP, RTF_GATEWAY
45 */
46
47#define IF_SEND(ifp, mp, rt, retval) \
48{\
49 static struct sockaddr_in tmproute = {AF_INET}; \
50\
51 if (! ((ifp)->if_flags & IFF_UP)){ \
52 /* goes with PRC_IFDOWN in in.c */ \
53 m_freem(mp); \
54 retval = ENETUNREACH; \
55 } else if ((rt)->rt_flags & (RTF_GATEWAY|RTF_HOST)) \
56 retval = (*(ifp)->if_output)(ifp, mp, &(rt)->rt_gateway); \
57 else { \
58 tmproute.sin_addr.s_addr = (mtod(mp, struct ip *))->ip_dst.s_addr; \
59 retval = (*(ifp)->if_output)(ifp, mp, (struct sockaddr *) &tmproute); \
60}}
61
62if_send(ifp, mp, rt)
63register struct ifnet *ifp;
64register struct mbuf *mp;
65register struct rtentry *rt;
66{
67 int retval;
68
69 IF_SEND (ifp, mp, rt, retval);
70 return (retval);
71}
72
73
74/*
75 * Find a route to this destination. Given the source and destination
76 * addresses, it returns a local net address
77 * to send to (either the address of the destination itself or a gateway).
78 * Taken mostly from rtalloc; expanded to route according to
79 * both ends of the connection.
80 */
81
82
83struct rtentry *ip_route(src, dst)
84struct in_addr *src;
85struct in_addr *dst;
86{
87 register struct rtentry *rt;
88 register struct mbuf *m;
89 register unsigned hash;
90 net_t snet, dnet;
91 int doinghost;
92 struct rtentry *rtmin;
93 struct mbuf **table;
94 static struct in_addr wildcard;
95
96 /* get network parts of src and dest addresses */
97
98 snet = iptonet(*src);
99 dnet = iptonet(*dst);
100
101 rtmin = NULL;
102 hash = HOSTHASH(dst->s_addr);
103 table = rthost;
104 doinghost = TRUE;
105again :
106 for (m = table[hash % RTHASHSIZ]; m; m = m->m_next)
107 {
108 rt = mtod(m, struct rtentry *);
109 if (rt->rt_hash != hash)
110 continue;
111 if (! (rt->rt_flags & RTF_UP))
112 continue;
113 if (! (rt->rt_ifp->if_flags & IFF_UP))
114 continue;
115 if (rt->rt_dst.sa_family != AF_INET)
116 continue;
117
118 /* packets go out an interface with our local IP address */
119 if (iptonet(((struct sockaddr_in *)&(rt->rt_gateway))->sin_addr) != snet)
120 continue;
121
122 /* does this route get us there? */
123 if (doinghost)
124 {
125 if (((struct sockaddr_in *)&(rt->rt_dst))->sin_addr.s_addr !=
126 dst->s_addr)
127 continue;
128 }
129 else
130 {
131 /*
132 * iptonet == 0 => smart gateway (route to anywhere)
133 * iptonet != 0 => gateway to another net (route to net)
134 */
135 if (iptonet(((struct sockaddr_in *)&(rt->rt_dst))->sin_addr) != dnet)
136 continue;
137 }
138
139 /* and try to share load across gateways */
140 if (rtmin == NULL)
141 rtmin = rt;
142 else if (rt->rt_use < rtmin->rt_use)
143 rtmin = rt;
144 }
145
146 if (rtmin == NULL)
147 {
148 if (doinghost)
149 {
150 doinghost = FALSE;
151 hash = NETHASH(*dst), table = rtnet;
152 goto again;
153 }
154 /*
155 * Check for wildcard gateway, by convention network 0.
156 */
157 if (dst != &wildcard)
158 {
159 hash = 0;
160 dst = &wildcard;
161 dnet = 0;
162 goto again;
163 }
164 rtstat.rts_unreach++;
165 return(NULL);
166 }
167
168 rtmin->rt_refcnt++;
169 if (dst == &wildcard)
170 rtstat.rts_wildcard++;
171 return(rtmin);
172}
173
174
175/*
176 * Ip_send is called from the higher protocol layer (TCP/RDP/UDP) and is passed
177 * an mbuf chain containing a packet to send to the local network. The first
178 * mbuf contains the protocol header and an IP header which is partially
179 * filled in. After determining a route (outgoing interface + first hop) for
180 * the packet, it is fragmented (if necessary) and sent to the local net
181 * through the local net send routine.
182 *
183 * For non-raw output, caller should have stuffed:
184 * ip protocol type, type of service, source addr, destin addr
185 *
186 * ip_tos is left to caller so that people using raw sockets can do whatever
187 * they please. (They don't have an inpcb in which to store such info.)
188 *
189 * The asis argument is TRUE for raw output and the gateway (packet forwarding)
190 * code. It indicates that the IP header is fully constructed.
191 *
192 * Errors at the IP layer and below occur synchronously, and can be reported
193 * back via subroutine return values. Higher level protocols should remember
194 * that if they do things asynchronous to a system call (ie., packet
195 * retransmission) that they should post error back to user via advise_user()
196 * so that user gets error next time he rendezvous with the kernel.
197 */
198ip_send(inp, mp, len, asis)
199struct inpcb *inp;
200register struct mbuf *mp;
201register int len;
202int asis;
203{
204 register struct ip *p;
205 register struct ifnet *ifp;
206 register struct rtentry *rt;
207 register int hlen;
208 int free_route = FALSE;
209 int retval;
210
211 p = mtod(mp, struct ip *); /* -> ip header */
212 /*
213 * Find route for datagram if one has not been assigned.
214 */
215 if ((rt = inp->inp_route.ro_rt) == NULL)
216 {
217 if ((rt = ip_route(&p->ip_src, &p->ip_dst)) == NULL)
218 {
219 if (asis || (p->ip_src.s_addr == INADDR_ANY))
220 {
221 /*
222 * asis: forwarding a packet not sourced by us
223 * eg., by raw interface and user level repeater process
224 * INADDR_ANY: sending icmp packet for which
225 * we're trying to avoid routing twice.
226 */
227 struct route tmproute;
228 struct sockaddr_in *sin;
229
230 bzero ((caddr_t) &tmproute, sizeof(tmproute));
231 sin = (struct sockaddr_in *) &tmproute.ro_dst;
232 sin->sin_family = AF_INET;
233 sin->sin_addr.s_addr = p->ip_dst.s_addr;
234 rtalloc (&tmproute);
235 rt = tmproute.ro_rt;
236
237 if (rt && (p->ip_src.s_addr == INADDR_ANY))
238 p->ip_src = IA_INADDR(in_iafromif(rt->rt_ifp));
239 }
240
241 if (rt == NULL)
242 {
243 m_freem(mp);
244 return(ENETUNREACH);
245 }
246 }
247 free_route = TRUE;
248 }
249 ifp = rt->rt_ifp;
250
251 /*
252 * Copy ip source route to header. Know asis must be FALSE, if do.
253 */
254 if (inp->inp_optlen > 0)
255 {
256 char *q;
257
258 if (mp->m_off - inp->inp_optlen >= MMINOFF)
259 {
260 struct in_addr *ipa;
261
262 mp->m_off -= inp->inp_optlen;
263 mp->m_len += inp->inp_optlen;
264 q = (char *) p;
265 p = (struct ip *) (q - inp->inp_optlen);
266 bcopy(q, (caddr_t)p, sizeof(struct ip));
267 bcopy(inp->inp_options, (caddr_t)(p+1), (unsigned)inp->inp_optlen);
268 /*
269 * And replate eventual destination with first hop.
270 * Eventual destination is in source route just
271 * copied in.
272 */
273 ipa = (struct in_addr *) (&inp->inp_options[0]);
274 p->ip_dst = ipa[inp->inp_optlen/sizeof(struct in_addr)];
275 }
276 else
8902c2d0 277 log(LOG_INFO, "ip_send: optlen %d inpcb 0x%x\n",
17efd7fe
MK
278 (int)inp->inp_optlen, inp);
279 }
280
281 /*
282 * fill in ip header fields
283 */
284 if (asis)
285 {
286 /*
287 * RAW OUTPUT. Must get len, hlen, off from packet header.
288 * Byte swap is ugly (since we must swap back below), but
289 * necessary in case we must fragment.
290 */
291 hlen = p->ip_hl << IP_HLSHIFT;
292 len = ntohs(p->ip_len);
293 p->ip_off = ntohs(p->ip_off);
294 }
295 else
296 {
297 static u_short next_ip_id; /* some day RDP may want to force for rxmit */
298
299 hlen = sizeof(struct ip) + inp->inp_optlen;
300 len += hlen;
301 p->ip_v = IPVERSION;
302 p->ip_hl = hlen >> IP_HLSHIFT;
303 p->ip_off = 0;
304 p->ip_ttl = MAXTTL; /* ### should come from route */
305 p->ip_id = htons(next_ip_id++);
306 }
307
308 /*
309 * let ip_frag do the send if needed, otherwise do it directly.
310 */
311
312 /* for testing IP reassembly code */
313#ifdef FORCE_FRAG
314#define MTU(ifp) (((ifp)->if_mtu >> FORCE_FRAG) & (~3))
315#else
316#define MTU(ifp) (ifp)->if_mtu
317#endif
318
319 if (len > MTU(ifp))
320 {
321 p->ip_len = len;
322 retval = ip_frag(p, ifp, rt, hlen);
323 }
324 else
325 {
326 /*
327 * complete header, byte swap, and send to local net
328 */
329 p->ip_len = htons((u_short)len);
330 p->ip_off = htons(p->ip_off);
331 /*
332 * No reason not to have kernel checksum, even for raw packets.
333 */
334 p->ip_sum = 0;
335 p->ip_sum = in_cksum(dtom(p), hlen);
336 IF_SEND (ifp, mp, rt, retval);
337 }
338
339 rt->rt_use ++; /* Yet another IP packet sent away */
340
341 if (free_route)
342 {
343 struct socket *so;
344
345 if ((so = inp->inp_socket) &&
346 (so->so_proto->pr_flags & PR_CONNREQUIRED))
347 /*
348 * Found a new route after old one pinged out.
349 */
350 inp->inp_route.ro_rt = rt;
351 else
352 rtfree(rt);
353 }
354
355 return(retval);
356}
357
358/*
359 * Ip_frag is called with a packet with a completed ip header
360 * (except for checksum). It fragments the packet, inserts the
361 * IP checksum, and calls the appropriate local net output routine
362 * to send it to the net.
363 *
364 * Previously, when there was only one kind of mbuf, it tried to
365 * reduce space requirements by recycling the chain to be fragmented.
366 * Preserving this approach is overly complicated, and should mbufs
367 * change again, cause problems. Therefore, have switched to copying
368 * the chain to be fragmented.
369 */
370ip_frag(p, ifp, rt, hlen)
371register struct ip *p;
372struct ifnet *ifp;
373struct rtentry *rt;
374register int hlen;
375{
376 register struct mbuf *m; /* original chunk */
377 register struct mbuf *mhdr; /* fragment */
378 register struct ip *fip; /* the fragment IP header */
379 int off; /* offset into entire IP datagram */
380 int here; /* offset into this chunk of it */
381 register int len; /* length of data in this chunk */
382 int flags; /* of this chunk to fragment */
383 int max; /* max data length in a fragment */
384 int fdlen; /* actual fragment data length */
385 int error;
386
387 m = dtom(p);
388
389 if (p->ip_off & ip_df)
390 { /* can't fragment */
391 m_freem(m);
392 return(EMSGSIZE);
393 }
394 max = MTU(ifp) - hlen; /* max data length in frag */
395 len = p->ip_len - hlen; /* data length */
396
397 /*
398 * this only needs to be this complicated if we are handed
399 * an already-fragmented packet
400 */
401 flags = p->ip_off&(ip_mf|ip_df); /* save old flags */
402 p->ip_off &= ~flags; /* take them out of ip_off */
403 off = p->ip_off << IP_OFFSHIFT; /* fragment offset */
404 here = hlen;
405 error = 0;
406
407 while (len > 0)
408 {
409 /*
410 * Allocate mbuf for fragment IP header
411 */
412 mhdr = m_get(M_DONTWAIT, MT_HEADER);
413 if (mhdr == NULL)
414 {
415 error = ENOBUFS;
416 break;
417 }
418 /*
419 * get copy of data for fragment
420 */
421 if (len < max)
422 fdlen = len;
423 else
424 fdlen = max & (~7); /* 7 == 2^IP_OFFSHIFT -1 */
425 mhdr->m_next = m_copy(m, here, fdlen);
426 if (mhdr->m_next == NULL)
427 {
428 m_free(mhdr);
429 error = ENOBUFS;
430 break;
431 }
432 /*
433 * build the header for this fragment and ship it off.
434 */
435 mhdr->m_len = hlen;
436 mhdr->m_off = MMAXOFF - hlen;
437 fip = mtod(mhdr, struct ip *);
438 bcopy((caddr_t)p, (caddr_t)fip, (unsigned)hlen);
439 fip->ip_off = off >> IP_OFFSHIFT;
440 if (fdlen >= len)
441 /* it's the last fragment */
442 fip->ip_off |= flags;
443 else
444 fip->ip_off |= ip_mf;
445 fip->ip_off = htons((u_short)fip->ip_off);
446 fip->ip_len = htons((u_short)fdlen + hlen);
447 fip->ip_sum = 0;
448 fip->ip_sum = in_cksum(mhdr, hlen);
449 if (error = if_send (ifp, mhdr, rt))
450 break;
451
452 /*
453 * and get ready for next pass through the loop
454 */
455 len -= fdlen;
456 off += fdlen;
457 here += fdlen;
458 }
459
460 m_freem(m);
461 return (error);
462}
463
464/*
465 * Current connection should use a new path.
466 */
467struct rtentry *ip_reroute(inp)
468register struct inpcb *inp;
469{
470 register struct route *ro = &inp->inp_route;
471
472 rtfree(ro->ro_rt);
473 return(ro->ro_rt = ip_route(&inp->inp_laddr, &inp->inp_faddr));
474}
475
476/*
477 * A gateway has gone down. Change route used by all connections currently
478 * using it.
479 */
480ip_gdown(addr)
481u_long addr;
482{
483 register struct protosw *psw;
484
485 for(psw=inetdomain.dom_protosw; psw < inetdomain.dom_protoswNPROTOSW; psw++)
486 if (psw->pr_type != SOCK_RAW)
487 if (psw->pr_ctlinput)
488 (*(psw->pr_ctlinput)) (PRC_GWDOWN, addr);
489}
490
491/*
492 * Called from protocol ctlinput routine. This way, IP/ICMP don't need to know
493 * about protocol's head of inpcbs... for all the protocols.
494 */
495in_gdown (head, addr)
496register struct inpcb *head;
497u_long addr;
498{
499 register struct inpcb *inp;
500 register struct rtentry *rt;
501
502 if (head == NULL)
503 return;
504
505 for(inp = head->inp_next; inp != head; inp = inp->inp_next)
506 {
507 if (rt = inp->inp_route.ro_rt)
508 {
509 if (rt->rt_flags & RTF_GATEWAY)
510 {
511 if (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == addr)
512 {
513 /*
514 * Don't remove route permanently, since want to catch
515 * the gateway when it reboots:
516 * -- rtrequest (SIOCDELRT, rt) --
517 *
518 * make sure rtfree() not remove route mbuf
519 * incrementing reference count here, and decrementing
520 * when timeout on reinstatement goes off. Cannot call
521 * rtfree with zero reference count when have not done
522 * SIOCDELRT.
523 */
524 if (rt->rt_flags & RTF_UP)
525 {
526 rt->rt_flags &= ~RTF_UP;
527 rt->rt_flags |= RTF_REINSTATE;
528 rt->irt_gdown = RT_REINSTATE;
529 rt->rt_refcnt ++;
530 }
531
532 if (!ip_reroute(inp))
533 advise_user(inp->inp_socket, ENETUNREACH);
534
535 }
536 }
537 }
538 }
539}