1. Remove a rather strangely gratuitous bit of profanity
[unix-history] / sys / netinet / in_mtudisc.c
CommitLineData
2cb63509
GW
1/*-
2 * Copyright (c) 1993, University of Vermont and State
3 * Agricultural College.
4 * Copyright (c) 1993, Garrett A. Wollman.
5 *
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND AUTHOR ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR AUTHORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
fde1aeb2 29 * $Id: in_mtudisc.c,v 1.1 1993/11/18 00:08:14 wollman Exp $
2cb63509
GW
30 */
31
32#ifdef MTUDISC
33
34#include "param.h"
35#include "systm.h"
36#include "kernel.h"
37#include "mbuf.h"
38#include "socket.h"
39#include "socketvar.h"
40#include "in_systm.h"
41#include "net/if.h"
42#include "net/route.h"
43#include "in.h"
44#include "in_var.h"
45#include "ip.h"
46#include "protosw.h"
47#include "in_pcb.h"
48
49#ifdef INET
50
51/*
52 * checkpcbs[] lists all the PCB heads that might call on the services
53 * of MTU discovery.
54 * This is really bogus 'cuz a ULP needs to both get its entry added here
55 * /and/ set INP_DISCOVERMTU in each PCB.
56 */
57extern struct inpcb tcb; /* XXX move to header file */
58
59struct inpcb *checkpcbs[] = {
60 &tcb,
61 0
62};
63
2cb63509
GW
64
65/*
66 * Table of likely MTU values, courtesy of RFC 1191.
67 * This MUST remain in sorted order.
68 */
fde1aeb2 69static const u_short in_mtus[] = {
2cb63509
GW
70 65535, /* maximum */
71 32767, /* convenient power of 2 - 1 */
72 17914, /* 16Mb Token Ring */
73 16383, /* convenient power of 2 - 1 */
74 8166, /* IEEE 802.4 */
75 6288, /* convenient stopping point */
76 4352, /* FDDI */
77 3144, /* convenient stopping point */
78 2002, /* IEEE 802.5 */
79 1492, /* IEEE 802.3 */
80 1006, /* BBN 1822 */
81 508, /* ARCNET */
82 296, /* SLIP, PPP */
83 128 /* minimum we'll accept */
84};
85
86#define NMTUS ((sizeof in_mtus)/(sizeof in_mtus[0]))
87
88/*
89 * Find the next MTU in the sequence from CURRENT.
90 * If HIGHER, increase size; else decrease.
91 * Return of zero means we're stuck.
92 * NB: We might be called with a CURRENT MTU that's not in the
93 * table (as, for example, when an ICMP tells us there's a problem
94 * and reports a max path MTU value).
95 */
96unsigned
97in_nextmtu(unsigned current, int higher) {
98 int i;
99
100 for(i = 0; i < NMTUS; i++) {
101 if(in_mtus[i] <= (u_short)current)
102 break;
103 }
104
105 if(i == NMTUS) {
106 if(higher) return in_mtus[NMTUS - 1];
107 else return 0; /* error return */
108 }
109
110 /*
111 * Now we know that CURRENT lies somewhere in the interval
112 * (in_mtus[i - 1], in_mtus[i]]. If we want to go higher,
113 * take in_mtus[i - 1] always. If we want to go lower, we
114 * must check the lower bound to see if it's equal, and if so,
115 * take in_mtus[i + 1], unless i == NMTUS - 1, in which case
116 * we return failure.
117 * Got that?
118 */
119 if(higher)
120 return in_mtus[(i >= 1) ? (i - 1) : 0];
121
122 /* now we know it's lower */
123 if(current == in_mtus[i]) {
124 if(i == NMTUS - 1)
125 return 0;
126 else
127 return in_mtus[i + 1];
128 }
129
130 return in_mtus[i];
131}
132
133/*
134 * Set up the route to do MTU discovery. This only works for host routes,
135 * not net routes; in any case, ALL systems should have all IP routes
136 * marked with RTF_CLONING (and a genmask of zero), which will do the right
137 * thing, and also arrange for the pre-ARPing code to get called on
138 * on appropriate interfaces.
139 *
140 * We also go to some pains to keep listeners on the routing socket aware
141 * of what's going on when we fiddle the flags or metrics. I don't know
142 * if this is really necessary or not (or even if we're doing it in the
143 * right way).
144 */
145int in_routemtu(struct route *ro) {
146 if(!ro->ro_rt)
147 return 0;
148
149 if((ro->ro_rt->rt_flags & (RTF_HOST | RTF_UP)) != (RTF_HOST | RTF_UP))
150 return 0;
151
152 if(ro->ro_rt->rt_rmx.rmx_mtu) {
153 /*
154 * Let the user know that we've turned on MTU discovery for this
155 * route entry. This doesn't do anything at present, but may
156 * be useful later on.
157 */
158 if(!(ro->ro_rt->rt_flags & RTF_PROTO1)) {
159 ro->ro_rt->rt_flags |= RTF_PROTO1;
160 }
161 return 1;
162 }
163
164 if(ro->ro_rt->rt_ifp && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) {
165 ro->ro_rt->rt_flags |= RTF_PROTO1;
166 /*
167 * Subtraction is necessary because the interface's MTU includes
168 * the interface's own headers. We subtract the header length
169 * provided and hope for the best.
170 */
171 ro->ro_rt->rt_rmx.rmx_mtu =
172 ro->ro_rt->rt_ifp->if_mtu - ro->ro_rt->rt_ifp->if_hdrlen;
173 return 1;
174 }
175 return 0;
176}
177
178/*
179 * Perform the PCB fiddling necessary when the route changes.
180 * Protect against recursion, since we might get called as a
181 * result of notifying someone else that the MTU is changing.
182 */
183void
184in_pcbmtu(struct inpcb *inp) {
185 static int notifying = 0;
186 static int timerstarted = 0;
187 unsigned oldmtu = inp->inp_pmtu;
188 int oldflags = inp->inp_flags;
189
190 if (!timerstarted) {
191 timeout(in_mtutimer, 0, 60 * hz);
192 timerstarted = 1;
193 }
194
195 if (inp->inp_flags & INP_DISCOVERMTU) {
196 /*
197 * If no route present, get one.
198 * If there is one present, but it's marked as being `down',
199 * try to get another one.
200 */
201 if(!inp->inp_route.ro_rt)
202 rtalloc(&inp->inp_route);
203 else if((inp->inp_route.ro_rt->rt_flags & RTF_UP) == 0) {
204 RTFREE(inp->inp_route.ro_rt);
205 inp->inp_route.ro_rt = 0;
206 rtalloc(&inp->inp_route);
207 }
208
209 if(in_routemtu(&inp->inp_route)) {
210 inp->inp_flags |= INP_MTUDISCOVERED;
211 inp->inp_pmtu = inp->inp_route.ro_rt->rt_rmx.rmx_mtu;
212 inp->inp_ip.ip_off |= IP_DF;
213 } else {
214 inp->inp_flags &= ~INP_MTUDISCOVERED;
215 inp->inp_ip.ip_off &= ~IP_DF;
216 }
217 /*
218 * If nothing has changed since the last value we had,
219 * don't waste any time notifying everybody that nothing
220 * has changed.
221 */
222 if(inp->inp_pmtu != oldmtu
223 || (inp->inp_flags ^ oldflags)) {
224 notifying = 1;
225 /*
226 * If the MTU has decreased, use timer 2.
227 */
228 inp->inp_mtutimer =
229 (inp->inp_pmtu < oldmtu) ? in_mtutimer2 : in_mtutimer1;
230 in_mtunotify(inp);
231 notifying = 0;
232 }
233 }
234}
235
236/*
237 * Tell the clients that have the same destination as INP that they
238 * need to take a new look at the MTU value and flags.
239 */
240void
241in_mtunotify(struct inpcb *inp) {
242 in_pcbnotify(inp->inp_head, &inp->inp_route.ro_dst, 0, zeroin_addr,
243 0, PRC_MTUCHANGED, inp->inp_mtunotify);
244}
245
246/*
247 * Adjust the MTU listed in the route on the basis of an ICMP
248 * Unreachable: Need Fragmentation message.
249 * Note that the PRC_MSGSIZE error is still delivered; this just
250 * makes the adjustment in the route, and depends on the ULPs which
251 * are required to translate PRC_MSGSIZE into an in_pcbmtu() which will
252 * pick up the new size.
253 */
254void
255in_mtureduce(struct in_addr dst, unsigned newsize) {
256 struct route ro;
257
258 ro.ro_dst.sa_family = AF_INET;
259 ro.ro_dst.sa_len = sizeof ro.ro_dst;
260 ((struct sockaddr_in *)&ro.ro_dst)->sin_addr = dst;
261 ro.ro_rt = 0;
262 rtalloc(&ro);
263
264 /*
265 * If there was no route, just forget about it, can't do anything.
266 */
267 if(!ro.ro_rt)
268 return;
269
270 /*
271 * If there was a route, but it's the wrong kind, forget it.
272 */
273 if((ro.ro_rt->rt_flags & (RTF_UP | RTF_HOST)) != (RTF_UP | RTF_HOST)) {
274 RTFREE(ro.ro_rt);
275 return;
276 }
277
278 /*
279 * If the MTU is locked by some outside agency, forget it.
280 */
281 if(ro.ro_rt->rt_rmx.rmx_locks & RTV_MTU) {
282 RTFREE(ro.ro_rt);
283 return;
284 }
285
286 /*
287 * If newsize == 0, then we got an ICMP from a router
288 * which doesn't support the MTU extension, so just go down one.
289 */
290 newsize = in_nextmtu(ro.ro_rt->rt_rmx.rmx_mtu, 0);
291
292 if(!newsize) {
293 ro.ro_rt->rt_rmx.rmx_mtu = 0; /* we can't go any lower */
294 RTFREE(ro.ro_rt);
295 return;
296 }
297 /*
298 * If the new MTU is greater than the old MTU, forget it. (Prevent
299 * denial-of-service attack.) Don't bother if the new MTU is the
300 * same as the old one.
301 */
302 if(ro.ro_rt->rt_rmx.rmx_mtu <= newsize) {
303 RTFREE(ro.ro_rt);
304 return;
305 }
306
307 /*
308 * OK, do it.
309 */
310 ro.ro_rt->rt_rmx.rmx_mtu = newsize;
311 RTFREE(ro.ro_rt);
312}
313
314/*
315 * Walk through all the PCB lists in checkpcbs[] and decrement the
316 * timers on the ones still participating in MTU discovery.
317 * If the timers reach zero, bump the MTU (clamped to the interface
318 * MTU), assuming the route is still good.
319 */
320void
321in_mtutimer(caddr_t dummy1, int dummy2) {
322 int i;
323 struct inpcb *inp;
324 struct rtentry *rt;
325 int s = splnet();
326
327 for(i = 0; checkpcbs[i]; i++) {
328 inp = checkpcbs[i];
329
330 while(inp = inp->inp_next) {
331 if(inp->inp_flags & INP_MTUDISCOVERED) {
332 if(!inp->inp_route.ro_rt
333 || !(inp->inp_route.ro_rt->rt_flags & RTF_UP)) {
334 inp->inp_flags &= ~INP_MTUDISCOVERED;
335 continue; /* we'll notice it later */
336 }
337
338 if(--inp->inp_mtutimer == 0) {
339 in_bumpmtu(inp);
340 inp->inp_mtutimer = in_mtutimer1;
341 if(inp->inp_route.ro_rt->rt_rmx.rmx_rtt
342 && ((in_mtutimer1 * 60)
343 > (inp->inp_route.ro_rt->rt_rmx.rmx_rtt / RTM_RTTUNIT))) {
344 inp->inp_mtutimer =
345 inp->inp_route.ro_rt->rt_rmx.rmx_rtt / RTM_RTTUNIT;
346 }
347 }
348 }
349 }
350 }
351 splx(s);
352 timeout(in_mtutimer, (caddr_t)0, 60 * hz);
353}
354
355/*
356 * Try to increase the MTU and let everyone know that it has changed.
357 * Must be called with a valid route in inp->inp_route. Probably
358 * must be at splnet(), too.
359 */
360void
361in_bumpmtu(struct inpcb *inp) {
362 struct route *ro;
363 unsigned newmtu;
364
365 ro = &inp->inp_route;
366 newmtu = in_nextmtu(inp->inp_pmtu, 1);
367 if(!newmtu) return; /* doing the best we can */
368 if(newmtu <= ro->ro_rt->rt_ifp->if_mtu) {
369 if(!(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) {
370 ro->ro_rt->rt_rmx.rmx_mtu = newmtu;
371 in_pcbmtu(inp);
372 }
373 }
374}
375
376#endif /* INET */
377#endif /* MTUDISC */