deprecate ENETUNREACH, always use EHOSTUNREACH; support for new pcb options
[unix-history] / usr / src / sys / netinet / ip_output.c
CommitLineData
5afc289d 1/*
76ea132e 2 * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California.
2b6b6284 3 * All rights reserved.
5afc289d 4 *
2b6b6284 5 * Redistribution and use in source and binary forms are permitted
616d42db
KB
6 * provided that the above copyright notice and this paragraph are
7 * duplicated in all such forms and that any documentation,
8 * advertising materials, and other materials related to such
9 * distribution and use acknowledge that the software was developed
10 * by the University of California, Berkeley. The name of the
11 * University may not be used to endorse or promote products derived
12 * from this software without specific prior written permission.
13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2b6b6284 16 *
76ea132e 17 * @(#)ip_output.c 7.19 (Berkeley) %G%
5afc289d 18 */
6e8b2eca 19
20666ad3 20#include "param.h"
d1f75e79 21#include "malloc.h"
20666ad3
JB
22#include "mbuf.h"
23#include "errno.h"
53ba4464 24#include "protosw.h"
20666ad3
JB
25#include "socket.h"
26#include "socketvar.h"
f4d55810
SL
27
28#include "../net/if.h"
29#include "../net/route.h"
30
20666ad3
JB
31#include "in.h"
32#include "in_systm.h"
33#include "ip.h"
60c0bb09
KS
34#include "in_pcb.h"
35#include "in_var.h"
20666ad3 36#include "ip_var.h"
f4d55810 37
b152d3c4 38#ifdef vax
c18e27ed 39#include "machine/mtpr.h"
b152d3c4 40#endif
ac904f92 41
53ba4464
MK
42struct mbuf *ip_insertoptions();
43
44/*
45 * IP output. The packet in mbuf chain m contains a skeletal IP
99fe25ae
MK
46 * header (with len, off, ttl, proto, tos, src, dst).
47 * The mbuf chain containing the packet will be freed.
48 * The mbuf opt, if present, will not be freed.
53ba4464 49 */
07136d15
MK
50ip_output(m0, opt, ro, flags)
51 struct mbuf *m0;
8a13b737 52 struct mbuf *opt;
ee787340 53 struct route *ro;
0e3f761f 54 int flags;
ac904f92 55{
07136d15 56 register struct ip *ip, *mhip;
8a13b737 57 register struct ifnet *ifp;
07136d15
MK
58 register struct mbuf *m = m0;
59 register int hlen = sizeof (struct ip);
60 int len, off, error = 0;
ee787340 61 struct route iproute;
a8671e7e 62 struct sockaddr_in *dst;
60c0bb09 63 struct in_ifaddr *ia;
ac904f92 64
d1f75e79
MK
65if ((m->m_flags & M_PKTHDR) == 0)
66panic("ip_output no HDR");
07136d15
MK
67 if (opt) {
68 m = ip_insertoptions(m, opt, &len);
69 hlen = len;
70 }
53ba4464 71 ip = mtod(m, struct ip *);
e8d11875 72 /*
2b4b57cd 73 * Fill in IP header.
e8d11875 74 */
0e3f761f
SL
75 if ((flags & IP_FORWARDING) == 0) {
76 ip->ip_v = IPVERSION;
77 ip->ip_off &= IP_DF;
78 ip->ip_id = htons(ip_id++);
4adcd589 79 ip->ip_hl = hlen >> 2;
ea9a9897 80 } else {
53ba4464 81 hlen = ip->ip_hl << 2;
ea9a9897
KS
82 ipstat.ips_localout++;
83 }
8a13b737 84 /*
a13c006d 85 * Route packet.
8a13b737 86 */
ee787340
SL
87 if (ro == 0) {
88 ro = &iproute;
89 bzero((caddr_t)ro, sizeof (*ro));
90 }
a8671e7e 91 dst = (struct sockaddr_in *)&ro->ro_dst;
ccb87262
MK
92 /*
93 * If there is a cached route,
94 * check that it is to the same destination
95 * and is still up. If not, free it and try again.
96 */
97 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
98 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
99 RTFREE(ro->ro_rt);
100 ro->ro_rt = (struct rtentry *)0;
101 }
ee787340 102 if (ro->ro_rt == 0) {
a8671e7e 103 dst->sin_family = AF_INET;
d1f75e79 104 dst->sin_len = sizeof(*dst);
a8671e7e 105 dst->sin_addr = ip->ip_dst;
d55475b1
MK
106 }
107 /*
108 * If routing to interface only,
109 * short circuit routing lookup.
110 */
111 if (flags & IP_ROUTETOIF) {
52c6a991 112
9340d736 113 ia = (struct in_ifaddr *)ifa_ifwithdstaddr((struct sockaddr *)dst);
52c6a991
MK
114 if (ia == 0)
115 ia = in_iaonnetof(in_netof(ip->ip_dst));
d55475b1
MK
116 if (ia == 0) {
117 error = ENETUNREACH;
118 goto bad;
a13c006d 119 }
d55475b1
MK
120 ifp = ia->ia_ifp;
121 } else {
d55475b1
MK
122 if (ro->ro_rt == 0)
123 rtalloc(ro);
76ea132e
MK
124 if (ro->ro_rt == 0) {
125 error = EHOSTUNREACH;
d55475b1
MK
126 goto bad;
127 }
60c0bb09 128 ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa;
76ea132e 129 ifp = ro->ro_rt->rt_ifp;
d55475b1 130 ro->ro_rt->rt_use++;
39e4cc50 131 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
d1f75e79 132 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
0e3f761f 133 }
5afc289d
MK
134#ifndef notdef
135 /*
136 * If source address not specified yet, use address
137 * of outgoing interface.
138 */
60c0bb09
KS
139 if (ip->ip_src.s_addr == INADDR_ANY)
140 ip->ip_src = IA_SIN(ia)->sin_addr;
5afc289d 141#endif
a13c006d 142 /*
af099287
SL
143 * Look for broadcast address and
144 * and verify user is allowed to send
19f96414 145 * such a packet.
a13c006d 146 */
dfce8240 147 if (in_broadcast(dst->sin_addr)) {
19f96414
SL
148 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
149 error = EADDRNOTAVAIL;
150 goto bad;
151 }
0e3f761f 152 if ((flags & IP_ALLOWBROADCAST) == 0) {
a13c006d 153 error = EACCES;
ee787340 154 goto bad;
8a2f82db 155 }
19f96414
SL
156 /* don't allow broadcast messages to be fragmented */
157 if (ip->ip_len > ifp->if_mtu) {
158 error = EMSGSIZE;
159 goto bad;
160 }
d1f75e79 161 m->m_flags |= M_BCAST;
ee787340 162 }
ac904f92 163
2b4b57cd
BJ
164 /*
165 * If small enough for interface, can just send directly.
166 */
8a13b737
BJ
167 if (ip->ip_len <= ifp->if_mtu) {
168 ip->ip_len = htons((u_short)ip->ip_len);
169 ip->ip_off = htons((u_short)ip->ip_off);
170 ip->ip_sum = 0;
171 ip->ip_sum = in_cksum(m, hlen);
60c0bb09
KS
172 error = (*ifp->if_output)(ifp, m,
173 (struct sockaddr *)dst, ro->ro_rt);
a13c006d 174 goto done;
cdad2eb1 175 }
ea9a9897 176 ipstat.ips_fragmented++;
2b4b57cd
BJ
177 /*
178 * Too large for interface; fragment if possible.
179 * Must be able to put at least 8 bytes per fragment.
180 */
8a2f82db
SL
181 if (ip->ip_off & IP_DF) {
182 error = EMSGSIZE;
2b4b57cd 183 goto bad;
8a2f82db 184 }
8a13b737 185 len = (ifp->if_mtu - hlen) &~ 7;
8a2f82db
SL
186 if (len < 8) {
187 error = EMSGSIZE;
2b4b57cd 188 goto bad;
8a2f82db 189 }
2b4b57cd 190
e537fed1
MK
191 {
192 int mhlen, firstlen = len;
d1f75e79 193 struct mbuf **mnext = &m->m_nextpkt;
e537fed1 194
2b4b57cd 195 /*
e537fed1
MK
196 * Loop through length of segment after first fragment,
197 * make new header and copy data of each part and link onto chain.
2b4b57cd 198 */
07136d15 199 m0 = m;
e537fed1
MK
200 mhlen = sizeof (struct ip);
201 for (off = hlen + len; off < ip->ip_len; off += len) {
d1f75e79 202 MGETHDR(m, M_DONTWAIT, MT_HEADER);
07136d15 203 if (m == 0) {
8a2f82db 204 error = ENOBUFS;
f688491d 205 goto sendorfree;
8a2f82db 206 }
d1f75e79 207 m->m_data += max_linkhdr;
07136d15 208 mhip = mtod(m, struct ip *);
2b4b57cd 209 *mhip = *ip;
4ad99bae 210 if (hlen > sizeof (struct ip)) {
e537fed1 211 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
07136d15 212 mhip->ip_hl = mhlen >> 2;
e537fed1 213 }
07136d15 214 m->m_len = mhlen;
e537fed1 215 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
4adcd589
MK
216 if (ip->ip_off & IP_MF)
217 mhip->ip_off |= IP_MF;
e537fed1
MK
218 if (off + len >= ip->ip_len)
219 len = ip->ip_len - off;
07136d15 220 else
2b4b57cd 221 mhip->ip_off |= IP_MF;
07136d15
MK
222 mhip->ip_len = htons((u_short)(len + mhlen));
223 m->m_next = m_copy(m0, off, len);
224 if (m->m_next == 0) {
8a2f82db 225 error = ENOBUFS; /* ??? */
e537fed1 226 goto sendorfree;
98444525 227 }
d1f75e79
MK
228 m->m_pkthdr.len = mhlen + len;
229 m->m_pkthdr.rcvif = (struct ifnet *)0;
0b49870f 230 mhip->ip_off = htons((u_short)mhip->ip_off);
0b49870f 231 mhip->ip_sum = 0;
07136d15 232 mhip->ip_sum = in_cksum(m, mhlen);
e537fed1 233 *mnext = m;
d1f75e79 234 mnext = &m->m_nextpkt;
ea9a9897 235 ipstat.ips_ofragments++;
2b4b57cd 236 }
e537fed1
MK
237 /*
238 * Update first fragment by trimming what's been copied out
239 * and updating header, then send each fragment (in order).
240 */
9d14dd83
KM
241 m = m0;
242 m_adj(m, hlen + firstlen - ip->ip_len);
d1f75e79
MK
243 m->m_pkthdr.len = hlen + firstlen;
244 ip->ip_len = htons((u_short)m->m_pkthdr.len);
4ee97dd8 245 ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
e537fed1 246 ip->ip_sum = 0;
9d14dd83 247 ip->ip_sum = in_cksum(m, hlen);
e537fed1
MK
248sendorfree:
249 for (m = m0; m; m = m0) {
d1f75e79
MK
250 m0 = m->m_nextpkt;
251 m->m_nextpkt = 0;
e537fed1
MK
252 if (error == 0)
253 error = (*ifp->if_output)(ifp, m,
60c0bb09 254 (struct sockaddr *)dst, ro->ro_rt);
e537fed1
MK
255 else
256 m_freem(m);
257 }
258 }
a13c006d 259done:
0e3f761f 260 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
a13c006d 261 RTFREE(ro->ro_rt);
8a2f82db 262 return (error);
e537fed1
MK
263bad:
264 m_freem(m0);
265 goto done;
2b4b57cd
BJ
266}
267
53ba4464
MK
268/*
269 * Insert IP options into preformed packet.
270 * Adjust IP destination as required for IP source routing,
271 * as indicated by a non-zero in_addr at the start of the options.
272 */
273struct mbuf *
274ip_insertoptions(m, opt, phlen)
275 register struct mbuf *m;
276 struct mbuf *opt;
277 int *phlen;
278{
279 register struct ipoption *p = mtod(opt, struct ipoption *);
280 struct mbuf *n;
281 register struct ip *ip = mtod(m, struct ip *);
8011f5df 282 unsigned optlen;
53ba4464
MK
283
284 optlen = opt->m_len - sizeof(p->ipopt_dst);
285 if (p->ipopt_dst.s_addr)
286 ip->ip_dst = p->ipopt_dst;
d1f75e79
MK
287 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
288 MGETHDR(n, M_DONTWAIT, MT_HEADER);
53ba4464
MK
289 if (n == 0)
290 return (m);
d1f75e79 291 n->m_pkthdr.len = m->m_pkthdr.len + optlen;
53ba4464 292 m->m_len -= sizeof(struct ip);
d1f75e79 293 m->m_data += sizeof(struct ip);
53ba4464
MK
294 n->m_next = m;
295 m = n;
53ba4464 296 m->m_len = optlen + sizeof(struct ip);
d1f75e79 297 m->m_data += max_linkhdr;
53ba4464
MK
298 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
299 } else {
d1f75e79 300 m->m_data -= optlen;
53ba4464 301 m->m_len += optlen;
d1f75e79 302 m->m_pkthdr.len += optlen;
53ba4464
MK
303 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
304 }
305 ip = mtod(m, struct ip *);
8011f5df 306 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
53ba4464
MK
307 *phlen = sizeof(struct ip) + optlen;
308 ip->ip_len += optlen;
309 return (m);
310}
311
2b4b57cd 312/*
e537fed1
MK
313 * Copy options from ip to jp,
314 * omitting those not copied during fragmentation.
2b4b57cd 315 */
e537fed1 316ip_optcopy(ip, jp)
2b4b57cd 317 struct ip *ip, *jp;
2b4b57cd
BJ
318{
319 register u_char *cp, *dp;
320 int opt, optlen, cnt;
321
322 cp = (u_char *)(ip + 1);
323 dp = (u_char *)(jp + 1);
324 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
325 for (; cnt > 0; cnt -= optlen, cp += optlen) {
326 opt = cp[0];
327 if (opt == IPOPT_EOL)
328 break;
329 if (opt == IPOPT_NOP)
330 optlen = 1;
331 else
53ba4464 332 optlen = cp[IPOPT_OLEN];
e537fed1
MK
333 /* bogus lengths should have been caught by ip_dooptions */
334 if (optlen > cnt)
335 optlen = cnt;
336 if (IPOPT_COPIED(opt)) {
4ad99bae 337 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
2b4b57cd 338 dp += optlen;
ac904f92 339 }
e8d11875 340 }
2b4b57cd
BJ
341 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
342 *dp++ = IPOPT_EOL;
343 return (optlen);
ac904f92 344}
53ba4464
MK
345
346/*
347 * IP socket option processing.
348 */
76ea132e 349ip_ctloutput(op, so, level, optname, mp)
53ba4464
MK
350 int op;
351 struct socket *so;
352 int level, optname;
76ea132e 353 struct mbuf **mp;
53ba4464 354{
76ea132e
MK
355 register struct inpcb *inp = sotoinpcb(so);
356 register struct mbuf *m = *mp;
357 register int optval;
53ba4464 358 int error = 0;
53ba4464
MK
359
360 if (level != IPPROTO_IP)
361 error = EINVAL;
362 else switch (op) {
363
364 case PRCO_SETOPT:
365 switch (optname) {
366 case IP_OPTIONS:
76ea132e
MK
367 case IP_RETOPTS:
368 return (ip_pcbopts(optname, &inp->inp_options, m));
369
370 case IP_TOS:
371 case IP_TTL:
372 case IP_RECVOPTS:
373 case IP_RECVRETOPTS:
374 case IP_RECVDSTADDR:
375 if (m->m_len != sizeof(int))
376 error = EINVAL;
377 else {
378 optval = *mtod(m, int *);
379 switch (op) {
380
381 case IP_TOS:
382 inp->inp_ip.ip_tos = optval;
383 break;
384
385 case IP_TTL:
386 inp->inp_ip.ip_tos = optval;
387 break;
388#define OPTSET(bit) \
389 if (optval) \
390 inp->inp_flags |= bit; \
391 else \
392 inp->inp_flags &= ~bit;
393
394 case IP_RECVOPTS:
395 OPTSET(INP_RECVOPTS);
396 break;
397
398 case IP_RECVRETOPTS:
399 OPTSET(INP_RECVRETOPTS);
400 break;
401
402 case IP_RECVDSTADDR:
403 OPTSET(INP_RECVDSTADDR);
404 break;
405 }
406 }
407 break;
408#undef OPTSET
53ba4464
MK
409
410 default:
411 error = EINVAL;
412 break;
413 }
76ea132e
MK
414 if (m)
415 (void)m_free(m);
53ba4464
MK
416 break;
417
418 case PRCO_GETOPT:
419 switch (optname) {
420 case IP_OPTIONS:
76ea132e 421 *mp = m = m_get(M_WAIT, MT_SOOPTS);
53ba4464 422 if (inp->inp_options) {
76ea132e 423 m->m_len = inp->inp_options->m_len;
53ba4464 424 bcopy(mtod(inp->inp_options, caddr_t),
76ea132e 425 mtod(m, caddr_t), (unsigned)m->m_len);
53ba4464 426 } else
76ea132e
MK
427 m->m_len = 0;
428 break;
429
430 case IP_TOS:
431 case IP_TTL:
432 case IP_RECVOPTS:
433 case IP_RECVRETOPTS:
434 case IP_RECVDSTADDR:
435 *mp = m = m_get(M_WAIT, MT_SOOPTS);
436 m->m_len = sizeof(int);
437 switch (op) {
438
439 case IP_TOS:
440 optval = inp->inp_ip.ip_tos;
441 break;
442
443 case IP_TTL:
444 optval = inp->inp_ip.ip_tos;
445 break;
446
447#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
448
449 case IP_RECVOPTS:
450 optval = OPTBIT(INP_RECVOPTS);
451 break;
452
453 case IP_RECVRETOPTS:
454 optval = OPTBIT(INP_RECVRETOPTS);
455 break;
456
457 case IP_RECVDSTADDR:
458 optval = OPTBIT(INP_RECVDSTADDR);
459 break;
460 }
461 *mtod(m, int *) = optval;
53ba4464 462 break;
76ea132e 463
53ba4464
MK
464 default:
465 error = EINVAL;
466 break;
467 }
468 break;
469 }
53ba4464
MK
470 return (error);
471}
472
473/*
0c3fb1b4
MK
474 * Set up IP options in pcb for insertion in output packets.
475 * Store in mbuf with pointer in pcbopt, adding pseudo-option
476 * with destination address if source routed.
53ba4464 477 */
0c3fb1b4
MK
478ip_pcbopts(pcbopt, m)
479 struct mbuf **pcbopt;
480 register struct mbuf *m;
53ba4464
MK
481{
482 register cnt, optlen;
483 register u_char *cp;
484 u_char opt;
485
486 /* turn off any old options */
0c3fb1b4 487 if (*pcbopt)
8011f5df 488 (void)m_free(*pcbopt);
0c3fb1b4 489 *pcbopt = 0;
53ba4464
MK
490 if (m == (struct mbuf *)0 || m->m_len == 0) {
491 /*
492 * Only turning off any previous options.
493 */
494 if (m)
8011f5df 495 (void)m_free(m);
53ba4464
MK
496 return (0);
497 }
498
499#ifndef vax
500 if (m->m_len % sizeof(long))
501 goto bad;
502#endif
503 /*
504 * IP first-hop destination address will be stored before
505 * actual options; move other options back
506 * and clear it when none present.
507 */
d1f75e79 508 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
53ba4464 509 goto bad;
53ba4464
MK
510 cnt = m->m_len;
511 m->m_len += sizeof(struct in_addr);
512 cp = mtod(m, u_char *) + sizeof(struct in_addr);
8011f5df 513 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
53ba4464
MK
514 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
515
516 for (; cnt > 0; cnt -= optlen, cp += optlen) {
517 opt = cp[IPOPT_OPTVAL];
518 if (opt == IPOPT_EOL)
519 break;
520 if (opt == IPOPT_NOP)
521 optlen = 1;
522 else {
523 optlen = cp[IPOPT_OLEN];
524 if (optlen <= IPOPT_OLEN || optlen > cnt)
525 goto bad;
526 }
527 switch (opt) {
528
529 default:
530 break;
531
532 case IPOPT_LSRR:
533 case IPOPT_SSRR:
534 /*
535 * user process specifies route as:
536 * ->A->B->C->D
537 * D must be our final destination (but we can't
538 * check that since we may not have connected yet).
539 * A is first hop destination, which doesn't appear in
540 * actual IP option, but is stored before the options.
541 */
542 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
543 goto bad;
544 m->m_len -= sizeof(struct in_addr);
545 cnt -= sizeof(struct in_addr);
546 optlen -= sizeof(struct in_addr);
547 cp[IPOPT_OLEN] = optlen;
548 /*
549 * Move first hop before start of options.
550 */
8011f5df 551 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
53ba4464
MK
552 sizeof(struct in_addr));
553 /*
554 * Then copy rest of options back
555 * to close up the deleted entry.
556 */
8011f5df
MK
557 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
558 sizeof(struct in_addr)),
559 (caddr_t)&cp[IPOPT_OFFSET+1],
560 (unsigned)cnt + sizeof(struct in_addr));
53ba4464
MK
561 break;
562 }
563 }
d1f75e79
MK
564 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
565 goto bad;
0c3fb1b4 566 *pcbopt = m;
53ba4464
MK
567 return (0);
568
569bad:
8011f5df 570 (void)m_free(m);
53ba4464
MK
571 return (EINVAL);
572}