Commit | Line | Data |
---|---|---|
2cb63509 GW |
1 | /*- |
2 | * Copyright (c) 1993, University of Vermont and State | |
3 | * Agricultural College. | |
4 | * Copyright (c) 1993, Garrett A. Wollman. | |
5 | * | |
6 | * All rights reserved. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * | |
17 | * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND AUTHOR ``AS IS'' AND | |
18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
20 | * ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR AUTHORS BE LIABLE | |
21 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
22 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
23 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
24 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
25 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
26 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
27 | * SUCH DAMAGE. | |
28 | * | |
fde1aeb2 | 29 | * $Id: in_mtudisc.c,v 1.1 1993/11/18 00:08:14 wollman Exp $ |
2cb63509 GW |
30 | */ |
31 | ||
32 | #ifdef MTUDISC | |
33 | ||
34 | #include "param.h" | |
35 | #include "systm.h" | |
36 | #include "kernel.h" | |
37 | #include "mbuf.h" | |
38 | #include "socket.h" | |
39 | #include "socketvar.h" | |
40 | #include "in_systm.h" | |
41 | #include "net/if.h" | |
42 | #include "net/route.h" | |
43 | #include "in.h" | |
44 | #include "in_var.h" | |
45 | #include "ip.h" | |
46 | #include "protosw.h" | |
47 | #include "in_pcb.h" | |
48 | ||
49 | #ifdef INET | |
50 | ||
51 | /* | |
52 | * checkpcbs[] lists all the PCB heads that might call on the services | |
53 | * of MTU discovery. | |
54 | * This is really bogus 'cuz a ULP needs to both get its entry added here | |
55 | * /and/ set INP_DISCOVERMTU in each PCB. | |
56 | */ | |
57 | extern struct inpcb tcb; /* XXX move to header file */ | |
58 | ||
59 | struct inpcb *checkpcbs[] = { | |
60 | &tcb, | |
61 | 0 | |
62 | }; | |
63 | ||
2cb63509 GW |
64 | |
65 | /* | |
66 | * Table of likely MTU values, courtesy of RFC 1191. | |
67 | * This MUST remain in sorted order. | |
68 | */ | |
fde1aeb2 | 69 | static const u_short in_mtus[] = { |
2cb63509 GW |
70 | 65535, /* maximum */ |
71 | 32767, /* convenient power of 2 - 1 */ | |
72 | 17914, /* 16Mb Token Ring */ | |
73 | 16383, /* convenient power of 2 - 1 */ | |
74 | 8166, /* IEEE 802.4 */ | |
75 | 6288, /* convenient stopping point */ | |
76 | 4352, /* FDDI */ | |
77 | 3144, /* convenient stopping point */ | |
78 | 2002, /* IEEE 802.5 */ | |
79 | 1492, /* IEEE 802.3 */ | |
80 | 1006, /* BBN 1822 */ | |
81 | 508, /* ARCNET */ | |
82 | 296, /* SLIP, PPP */ | |
83 | 128 /* minimum we'll accept */ | |
84 | }; | |
85 | ||
86 | #define NMTUS ((sizeof in_mtus)/(sizeof in_mtus[0])) | |
87 | ||
88 | /* | |
89 | * Find the next MTU in the sequence from CURRENT. | |
90 | * If HIGHER, increase size; else decrease. | |
91 | * Return of zero means we're stuck. | |
92 | * NB: We might be called with a CURRENT MTU that's not in the | |
93 | * table (as, for example, when an ICMP tells us there's a problem | |
94 | * and reports a max path MTU value). | |
95 | */ | |
96 | unsigned | |
97 | in_nextmtu(unsigned current, int higher) { | |
98 | int i; | |
99 | ||
100 | for(i = 0; i < NMTUS; i++) { | |
101 | if(in_mtus[i] <= (u_short)current) | |
102 | break; | |
103 | } | |
104 | ||
105 | if(i == NMTUS) { | |
106 | if(higher) return in_mtus[NMTUS - 1]; | |
107 | else return 0; /* error return */ | |
108 | } | |
109 | ||
110 | /* | |
111 | * Now we know that CURRENT lies somewhere in the interval | |
112 | * (in_mtus[i - 1], in_mtus[i]]. If we want to go higher, | |
113 | * take in_mtus[i - 1] always. If we want to go lower, we | |
114 | * must check the lower bound to see if it's equal, and if so, | |
115 | * take in_mtus[i + 1], unless i == NMTUS - 1, in which case | |
116 | * we return failure. | |
117 | * Got that? | |
118 | */ | |
119 | if(higher) | |
120 | return in_mtus[(i >= 1) ? (i - 1) : 0]; | |
121 | ||
122 | /* now we know it's lower */ | |
123 | if(current == in_mtus[i]) { | |
124 | if(i == NMTUS - 1) | |
125 | return 0; | |
126 | else | |
127 | return in_mtus[i + 1]; | |
128 | } | |
129 | ||
130 | return in_mtus[i]; | |
131 | } | |
132 | ||
133 | /* | |
134 | * Set up the route to do MTU discovery. This only works for host routes, | |
135 | * not net routes; in any case, ALL systems should have all IP routes | |
136 | * marked with RTF_CLONING (and a genmask of zero), which will do the right | |
137 | * thing, and also arrange for the pre-ARPing code to get called on | |
138 | * on appropriate interfaces. | |
139 | * | |
140 | * We also go to some pains to keep listeners on the routing socket aware | |
141 | * of what's going on when we fiddle the flags or metrics. I don't know | |
142 | * if this is really necessary or not (or even if we're doing it in the | |
143 | * right way). | |
144 | */ | |
145 | int in_routemtu(struct route *ro) { | |
146 | if(!ro->ro_rt) | |
147 | return 0; | |
148 | ||
149 | if((ro->ro_rt->rt_flags & (RTF_HOST | RTF_UP)) != (RTF_HOST | RTF_UP)) | |
150 | return 0; | |
151 | ||
152 | if(ro->ro_rt->rt_rmx.rmx_mtu) { | |
153 | /* | |
154 | * Let the user know that we've turned on MTU discovery for this | |
155 | * route entry. This doesn't do anything at present, but may | |
156 | * be useful later on. | |
157 | */ | |
158 | if(!(ro->ro_rt->rt_flags & RTF_PROTO1)) { | |
159 | ro->ro_rt->rt_flags |= RTF_PROTO1; | |
160 | } | |
161 | return 1; | |
162 | } | |
163 | ||
164 | if(ro->ro_rt->rt_ifp && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) { | |
165 | ro->ro_rt->rt_flags |= RTF_PROTO1; | |
166 | /* | |
167 | * Subtraction is necessary because the interface's MTU includes | |
168 | * the interface's own headers. We subtract the header length | |
169 | * provided and hope for the best. | |
170 | */ | |
171 | ro->ro_rt->rt_rmx.rmx_mtu = | |
172 | ro->ro_rt->rt_ifp->if_mtu - ro->ro_rt->rt_ifp->if_hdrlen; | |
173 | return 1; | |
174 | } | |
175 | return 0; | |
176 | } | |
177 | ||
178 | /* | |
179 | * Perform the PCB fiddling necessary when the route changes. | |
180 | * Protect against recursion, since we might get called as a | |
181 | * result of notifying someone else that the MTU is changing. | |
182 | */ | |
183 | void | |
184 | in_pcbmtu(struct inpcb *inp) { | |
185 | static int notifying = 0; | |
186 | static int timerstarted = 0; | |
187 | unsigned oldmtu = inp->inp_pmtu; | |
188 | int oldflags = inp->inp_flags; | |
189 | ||
190 | if (!timerstarted) { | |
191 | timeout(in_mtutimer, 0, 60 * hz); | |
192 | timerstarted = 1; | |
193 | } | |
194 | ||
195 | if (inp->inp_flags & INP_DISCOVERMTU) { | |
196 | /* | |
197 | * If no route present, get one. | |
198 | * If there is one present, but it's marked as being `down', | |
199 | * try to get another one. | |
200 | */ | |
201 | if(!inp->inp_route.ro_rt) | |
202 | rtalloc(&inp->inp_route); | |
203 | else if((inp->inp_route.ro_rt->rt_flags & RTF_UP) == 0) { | |
204 | RTFREE(inp->inp_route.ro_rt); | |
205 | inp->inp_route.ro_rt = 0; | |
206 | rtalloc(&inp->inp_route); | |
207 | } | |
208 | ||
209 | if(in_routemtu(&inp->inp_route)) { | |
210 | inp->inp_flags |= INP_MTUDISCOVERED; | |
211 | inp->inp_pmtu = inp->inp_route.ro_rt->rt_rmx.rmx_mtu; | |
212 | inp->inp_ip.ip_off |= IP_DF; | |
213 | } else { | |
214 | inp->inp_flags &= ~INP_MTUDISCOVERED; | |
215 | inp->inp_ip.ip_off &= ~IP_DF; | |
216 | } | |
217 | /* | |
218 | * If nothing has changed since the last value we had, | |
219 | * don't waste any time notifying everybody that nothing | |
220 | * has changed. | |
221 | */ | |
222 | if(inp->inp_pmtu != oldmtu | |
223 | || (inp->inp_flags ^ oldflags)) { | |
224 | notifying = 1; | |
225 | /* | |
226 | * If the MTU has decreased, use timer 2. | |
227 | */ | |
228 | inp->inp_mtutimer = | |
229 | (inp->inp_pmtu < oldmtu) ? in_mtutimer2 : in_mtutimer1; | |
230 | in_mtunotify(inp); | |
231 | notifying = 0; | |
232 | } | |
233 | } | |
234 | } | |
235 | ||
236 | /* | |
237 | * Tell the clients that have the same destination as INP that they | |
238 | * need to take a new look at the MTU value and flags. | |
239 | */ | |
240 | void | |
241 | in_mtunotify(struct inpcb *inp) { | |
242 | in_pcbnotify(inp->inp_head, &inp->inp_route.ro_dst, 0, zeroin_addr, | |
243 | 0, PRC_MTUCHANGED, inp->inp_mtunotify); | |
244 | } | |
245 | ||
246 | /* | |
247 | * Adjust the MTU listed in the route on the basis of an ICMP | |
248 | * Unreachable: Need Fragmentation message. | |
249 | * Note that the PRC_MSGSIZE error is still delivered; this just | |
250 | * makes the adjustment in the route, and depends on the ULPs which | |
251 | * are required to translate PRC_MSGSIZE into an in_pcbmtu() which will | |
252 | * pick up the new size. | |
253 | */ | |
254 | void | |
255 | in_mtureduce(struct in_addr dst, unsigned newsize) { | |
256 | struct route ro; | |
257 | ||
258 | ro.ro_dst.sa_family = AF_INET; | |
259 | ro.ro_dst.sa_len = sizeof ro.ro_dst; | |
260 | ((struct sockaddr_in *)&ro.ro_dst)->sin_addr = dst; | |
261 | ro.ro_rt = 0; | |
262 | rtalloc(&ro); | |
263 | ||
264 | /* | |
265 | * If there was no route, just forget about it, can't do anything. | |
266 | */ | |
267 | if(!ro.ro_rt) | |
268 | return; | |
269 | ||
270 | /* | |
271 | * If there was a route, but it's the wrong kind, forget it. | |
272 | */ | |
273 | if((ro.ro_rt->rt_flags & (RTF_UP | RTF_HOST)) != (RTF_UP | RTF_HOST)) { | |
274 | RTFREE(ro.ro_rt); | |
275 | return; | |
276 | } | |
277 | ||
278 | /* | |
279 | * If the MTU is locked by some outside agency, forget it. | |
280 | */ | |
281 | if(ro.ro_rt->rt_rmx.rmx_locks & RTV_MTU) { | |
282 | RTFREE(ro.ro_rt); | |
283 | return; | |
284 | } | |
285 | ||
286 | /* | |
287 | * If newsize == 0, then we got an ICMP from a router | |
288 | * which doesn't support the MTU extension, so just go down one. | |
289 | */ | |
290 | newsize = in_nextmtu(ro.ro_rt->rt_rmx.rmx_mtu, 0); | |
291 | ||
292 | if(!newsize) { | |
293 | ro.ro_rt->rt_rmx.rmx_mtu = 0; /* we can't go any lower */ | |
294 | RTFREE(ro.ro_rt); | |
295 | return; | |
296 | } | |
297 | /* | |
298 | * If the new MTU is greater than the old MTU, forget it. (Prevent | |
299 | * denial-of-service attack.) Don't bother if the new MTU is the | |
300 | * same as the old one. | |
301 | */ | |
302 | if(ro.ro_rt->rt_rmx.rmx_mtu <= newsize) { | |
303 | RTFREE(ro.ro_rt); | |
304 | return; | |
305 | } | |
306 | ||
307 | /* | |
308 | * OK, do it. | |
309 | */ | |
310 | ro.ro_rt->rt_rmx.rmx_mtu = newsize; | |
311 | RTFREE(ro.ro_rt); | |
312 | } | |
313 | ||
314 | /* | |
315 | * Walk through all the PCB lists in checkpcbs[] and decrement the | |
316 | * timers on the ones still participating in MTU discovery. | |
317 | * If the timers reach zero, bump the MTU (clamped to the interface | |
318 | * MTU), assuming the route is still good. | |
319 | */ | |
320 | void | |
321 | in_mtutimer(caddr_t dummy1, int dummy2) { | |
322 | int i; | |
323 | struct inpcb *inp; | |
324 | struct rtentry *rt; | |
325 | int s = splnet(); | |
326 | ||
327 | for(i = 0; checkpcbs[i]; i++) { | |
328 | inp = checkpcbs[i]; | |
329 | ||
330 | while(inp = inp->inp_next) { | |
331 | if(inp->inp_flags & INP_MTUDISCOVERED) { | |
332 | if(!inp->inp_route.ro_rt | |
333 | || !(inp->inp_route.ro_rt->rt_flags & RTF_UP)) { | |
334 | inp->inp_flags &= ~INP_MTUDISCOVERED; | |
335 | continue; /* we'll notice it later */ | |
336 | } | |
337 | ||
338 | if(--inp->inp_mtutimer == 0) { | |
339 | in_bumpmtu(inp); | |
340 | inp->inp_mtutimer = in_mtutimer1; | |
341 | if(inp->inp_route.ro_rt->rt_rmx.rmx_rtt | |
342 | && ((in_mtutimer1 * 60) | |
343 | > (inp->inp_route.ro_rt->rt_rmx.rmx_rtt / RTM_RTTUNIT))) { | |
344 | inp->inp_mtutimer = | |
345 | inp->inp_route.ro_rt->rt_rmx.rmx_rtt / RTM_RTTUNIT; | |
346 | } | |
347 | } | |
348 | } | |
349 | } | |
350 | } | |
351 | splx(s); | |
352 | timeout(in_mtutimer, (caddr_t)0, 60 * hz); | |
353 | } | |
354 | ||
355 | /* | |
356 | * Try to increase the MTU and let everyone know that it has changed. | |
357 | * Must be called with a valid route in inp->inp_route. Probably | |
358 | * must be at splnet(), too. | |
359 | */ | |
360 | void | |
361 | in_bumpmtu(struct inpcb *inp) { | |
362 | struct route *ro; | |
363 | unsigned newmtu; | |
364 | ||
365 | ro = &inp->inp_route; | |
366 | newmtu = in_nextmtu(inp->inp_pmtu, 1); | |
367 | if(!newmtu) return; /* doing the best we can */ | |
368 | if(newmtu <= ro->ro_rt->rt_ifp->if_mtu) { | |
369 | if(!(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) { | |
370 | ro->ro_rt->rt_rmx.rmx_mtu = newmtu; | |
371 | in_pcbmtu(inp); | |
372 | } | |
373 | } | |
374 | } | |
375 | ||
376 | #endif /* INET */ | |
377 | #endif /* MTUDISC */ |