Commit | Line | Data |
---|---|---|
17efd7fe MK |
1 | #ifdef RCSIDENT |
2 | static char rcsident[] = "$Header: tcp_usrreq.c,v 1.30 85/07/31 09:43:43 walsh Exp $"; | |
3 | #endif RCSIDENT | |
4 | ||
5 | #include "../h/param.h" | |
6 | #include "../h/systm.h" | |
7 | #include "../h/mbuf.h" | |
8 | #include "../h/socket.h" | |
9 | #include "../h/socketvar.h" | |
10 | #include "../h/protosw.h" | |
11 | #include "../h/errno.h" | |
12 | #include "../h/ioctl.h" | |
13 | #include "../h/time.h" | |
14 | #include "../h/kernel.h" | |
15 | ||
16 | #include "../net/if.h" | |
17 | #include "../net/route.h" | |
18 | ||
19 | #include "../bbnnet/in.h" | |
20 | #include "../bbnnet/in_var.h" | |
21 | #include "../bbnnet/in_pcb.h" | |
22 | #include "../bbnnet/net.h" | |
23 | #include "../bbnnet/fsm.h" | |
24 | #include "../bbnnet/tcp.h" | |
25 | #include "../bbnnet/ip.h" | |
26 | #include "../bbnnet/icmp.h" | |
27 | #include "../bbnnet/macros.h" | |
28 | #include "../bbnnet/sws.h" | |
29 | ||
30 | /* | |
31 | * TCP protocol interface to socket abstraction. | |
32 | */ | |
33 | ||
34 | #ifdef GPROF | |
35 | int tcp_acounts[TCP_NSTATES][PRU_NREQ]; | |
36 | #endif | |
37 | ||
38 | extern tcp_pcbdisconnect(); | |
39 | extern tcp_binding_used(); | |
40 | ||
41 | struct inpcb tcp; | |
42 | struct tcp_stat tcpstat; | |
43 | sequence tcp_iss; /* tcp initial send seq # */ | |
44 | ||
45 | struct dfilter tcp_dfilter; | |
46 | ||
47 | struct pr_advice tcp_advice = | |
48 | { | |
49 | TCP_RESERVED, /* application reserved */ | |
50 | TCP_USERRESERVED, /* user reserved */ | |
51 | TCP_MAXPORT, /* max port */ | |
52 | TCP_USERRESERVED+1, /* random last used */ | |
53 | sizeof(u_short), /* port size */ | |
54 | tcp_binding_used, /* confirmation routine */ | |
55 | } ; | |
56 | ||
57 | dowedebug(inp, so, filter) | |
58 | register struct inpcb *inp; | |
59 | struct socket *so; | |
60 | register struct dfilter *filter; | |
61 | { | |
62 | register int count; | |
63 | ||
64 | count = 0; | |
65 | if (inp->inp_faddr.s_addr == filter->foreign_host.s_addr) | |
66 | count ++; | |
67 | if (inp->inp_fport == filter->foreign_port) | |
68 | count ++; | |
69 | if (inp->inp_laddr.s_addr == filter->local_host.s_addr) | |
70 | count ++; | |
71 | if (inp->inp_lport == filter->local_port) | |
72 | count ++; | |
73 | ||
74 | if (count >= filter->matches) | |
75 | so->so_options |= SO_DEBUG; | |
76 | } | |
77 | ||
78 | int tcp_noact = 0; /* patchable */ | |
79 | ||
80 | /* | |
81 | * Allocate and initialize a new TCB | |
82 | * tcp_usrreq calls tcp_attach calls us. tcp_usrreq splnet()'s | |
83 | */ | |
84 | struct tcpcb *tcp_newtcpcb(inp) | |
85 | register struct inpcb *inp; | |
86 | { | |
87 | register struct tcpcb *tp; | |
88 | register struct mbuf *m; | |
89 | ||
90 | m = m_getclr(M_WAIT, MT_PCB); | |
91 | if (m == NULL) | |
92 | return(NULL); | |
93 | tp = mtod(m, struct tcpcb *); | |
94 | ||
95 | /* initialize non-zero tcb fields */ | |
96 | ||
97 | tp->t_rcv_next = (struct th *)tp; | |
98 | tp->t_rcv_prev = (struct th *)tp; | |
99 | /* | |
100 | * Don't start off assuming minimum srtt/rxmitime. If we do, and | |
101 | * TCP_tvRXMIN is small and we decide to communicate over a | |
102 | * reliable, but slow, network then we may not find true values for | |
103 | * these. We may assume an ACK was for a retransmission that | |
104 | * we're measuring the srtt of, not the original packet. | |
105 | * | |
106 | * Instead, start high and approach from above in a deterministic | |
107 | * fashion. We should get close to the right values fairly rapidly. | |
108 | * | |
109 | * 7/85: start from above by special casing first round trip time | |
110 | * measurement. If srtt == 0, do not reset rtt, and do not use | |
111 | * weighted averaging. srtt starts as time to ack(xmit [+ rxmit...]) | |
112 | * and then gets smoothed with new round trip times. This compromise | |
113 | * for getting to long-term srtt more quickly on LANs should work | |
114 | * on the Internet as well. It will only hurt Internet connections | |
115 | * if packet loss is high, and even then would only slow getting | |
116 | * to long term srtt. | |
117 | * This method can be turned off by initializing srtt with a non-zero | |
118 | * value. | |
119 | */ | |
120 | /* tp->t_srtt = TCP_tvMAXSRTT; */ | |
121 | tp->t_rxmitime = TCP_tvMAXSRTT + 1; | |
122 | tp->t_rttltimeo = TCP_tvRTTL; | |
123 | tp->t_xmt_val = tp->snd_end = tp->seq_fin = tp->snd_nxt = | |
124 | tp->snd_hi = tp->snd_una = tp->iss = tcp_iss; | |
125 | tcp_iss += ISSINCR; | |
126 | ||
127 | /* | |
128 | * Imitate Berkeley code by setting push as a default. This should | |
129 | * increase compatibility at the user code level. | |
130 | */ | |
131 | tp->t_push = TRUE; | |
132 | ||
133 | /* | |
134 | * Berkeley 4.2 code sends a data byte beyond the window's edge to see | |
135 | * if the other end is up. If other end does not respond, connection | |
136 | * times out and aborts. This is dangerous since the byte may make its | |
137 | * way into the input stream if the recipient is coded keeping in mind | |
138 | * how expensive packets are. | |
139 | * | |
140 | * We'll provide for an optional method to send a well formed ack that | |
141 | * will catch remote failure and generate a tcp reset. Note that we | |
142 | * don't care if the other end ignores the ack; we only hope for a well | |
143 | * coded tcp to respond with a reset in the right circumstances. This | |
144 | * sort of handshaking/probing should really be done at the application | |
145 | * level, but not all specs (eg., SMTP) provide for such a noop. | |
146 | * | |
147 | * Optional, since some networks charge for packets and since some might | |
148 | * see this as unecessary traffic. | |
149 | * | |
150 | * also see tcp_ioctl() | |
151 | */ | |
152 | if (tp->t_noact = tcp_noact) | |
153 | tp->t_noactprobe = TRUE; | |
154 | ||
155 | /* attach the tcpcb to the in_pcb */ | |
156 | ||
157 | inp->inp_ppcb = (caddr_t)tp; | |
158 | tp->t_in_pcb = inp; | |
159 | ||
160 | return(tp); | |
161 | } | |
162 | ||
163 | /* | |
164 | * Is a tcp port/address pair already in use by some socket on this machine? | |
165 | * Passed to in_pcbbind() to help it find a port/address binding | |
166 | * that is unique for tcp. | |
167 | */ | |
168 | int tcp_binding_used(inp, lport, lsaddr, reuselocal) | |
169 | struct inpcb *inp; | |
170 | u_short lport; | |
171 | u_long lsaddr; | |
172 | { | |
173 | register struct inpcb *i; | |
174 | ||
175 | for (i = tcp.inp_next; i != &tcp; i = i->inp_next) | |
176 | { | |
177 | /* | |
178 | * Since our inpcb is in this linked list, don't want to know | |
179 | * if we, ourselves, are already using this binding. | |
180 | */ | |
181 | if (i != inp) | |
182 | if (i->inp_lport == lport) | |
183 | /* | |
184 | * Our/His address is unbound (INADDR_ANY) iff | |
185 | * not yet connected to foreign host. | |
186 | */ | |
187 | if ((i->inp_laddr.s_addr == lsaddr) || | |
188 | (i->inp_laddr.s_addr == INADDR_ANY) || | |
189 | (lsaddr == INADDR_ANY)) | |
190 | { | |
191 | if (!reuselocal) | |
192 | break; | |
193 | if (i->inp_faddr.s_addr == INADDR_ANY) | |
194 | /* | |
195 | * We're both waiting for foreign | |
196 | * connection. Could only re-use if | |
197 | * he was already connected. | |
198 | */ | |
199 | break; | |
200 | } | |
201 | } | |
202 | return (i != &tcp); | |
203 | } | |
204 | ||
205 | /* | |
206 | * returns a (struct tcpcb *) cast to a (char *). This is | |
207 | * so in_pcbconnect() can correctly handle return value. All | |
208 | * other uses promptly cast back. | |
209 | */ | |
210 | ||
211 | char *tcp_conn_used(inp, lport, lsaddr, fport, fsaddr) | |
212 | struct inpcb *inp; | |
213 | u_short lport; | |
214 | u_long lsaddr; | |
215 | u_short fport; | |
216 | u_long fsaddr; | |
217 | { | |
218 | register struct inpcb *i; | |
219 | ||
220 | for (i = tcp.inp_next; i != &tcp; i = i->inp_next) | |
221 | { | |
222 | /* | |
223 | * Since our inpcb is in this linked list, don't want to know | |
224 | * if we, ourselves, are already using this connetion. | |
225 | */ | |
226 | if (i != inp) | |
227 | if ((i->inp_lport == lport) && | |
228 | (i->inp_fport == fport) && | |
229 | (i->inp_laddr.s_addr == lsaddr) && | |
230 | (i->inp_faddr.s_addr == fsaddr)) | |
231 | return((char *)i->inp_ppcb); | |
232 | } | |
233 | return ((char *) NULL); | |
234 | } | |
235 | ||
236 | tcp_ioctl (tp, command, data) | |
237 | struct tcpcb *tp; | |
238 | int command; | |
239 | caddr_t data; | |
240 | { | |
241 | switch (command) | |
242 | { | |
243 | /* push */ | |
244 | case SIOCSPUSH: | |
245 | tp->t_push = TRUE; | |
246 | break; | |
247 | ||
248 | case SIOCCPUSH: | |
249 | tp->t_push = FALSE; | |
250 | break; | |
251 | ||
252 | /* no activity timer */ | |
253 | case SIOCSNOACT: | |
254 | { | |
255 | u_long value; | |
256 | ||
257 | value = *((u_long *) data); | |
258 | /* | |
259 | * A shutdown socket should still be able to request some sort of | |
260 | * check on the status of the remote end. Also see tcp_newtcpcb(). | |
261 | */ | |
262 | tp->t_noactprobe = (value & TCP_NOACTPROBE) ? TRUE : FALSE; | |
263 | tp->t_noactsig = (value & TCP_NOACTSIG) ? TRUE : FALSE; | |
264 | ||
265 | if ((tp->t_state <= ESTAB) || (tp->t_state == CLOSE_WAIT)) | |
266 | { | |
267 | /* don't interfere with system use of timer */ | |
268 | value &= ~(TCP_NOACTPROBE|TCP_NOACTSIG); | |
269 | tp->t_noact = MIN (MAX_TCPTIMERVAL, value); | |
270 | tp->t_timers[TNOACT] = tp->t_noact; | |
271 | } | |
272 | } | |
273 | break; | |
274 | ||
275 | case SIOCGNOACT: | |
276 | { | |
277 | u_long value; | |
278 | ||
279 | value = tp->t_noact; | |
280 | if (tp->t_noactprobe) | |
281 | value |= TCP_NOACTPROBE; | |
282 | if (tp->t_noactsig) | |
283 | value |= TCP_NOACTSIG; | |
284 | ||
285 | *((u_long *) data) = value; | |
286 | } | |
287 | break; | |
288 | ||
289 | /* init timer */ | |
290 | case SIOCSINIT: | |
291 | tp->t_itimeo = MIN (MAX_TCPTIMERVAL, *((unsigned *) data)); | |
292 | break; | |
293 | ||
294 | case SIOCGINIT: | |
295 | *((int *) data) = tp->t_itimeo; | |
296 | break; | |
297 | ||
298 | /* retransmit took too long timer */ | |
299 | case SIOCSRTTL: | |
300 | tp->t_rttltimeo = MIN (MAX_TCPTIMERVAL, *((unsigned *) data)); | |
301 | break; | |
302 | ||
303 | case SIOCGRTTL: | |
304 | *((int *) data) = tp->t_rttltimeo; | |
305 | break; | |
306 | ||
307 | case SIOCABORT: | |
308 | { | |
309 | struct socket *so; | |
310 | ||
311 | /* there really should be a generic way for | |
312 | * a user to get to soabort() | |
313 | */ | |
314 | ||
315 | tp->usr_abort = TRUE; | |
316 | /* | |
317 | * Just in case asked to abort a LISTENing socket, | |
318 | * Don't leave unattached, unaccepted connections. | |
319 | */ | |
320 | so = tp->t_in_pcb->inp_socket; | |
321 | while (so->so_q0 && (so->so_q0 != so)) | |
322 | (void) soabort(so->so_q0); | |
323 | while (so->so_q && (so->so_q != so)) | |
324 | (void) soabort(so->so_q); | |
325 | ||
326 | w_alloc(IUABORT, 0, tp, tp->t_in_pcb); | |
327 | } | |
328 | break; | |
329 | ||
330 | default: | |
331 | /* not our ioctl, let lower level try ioctl */ | |
332 | return ip_ioctl (tp->t_in_pcb, command, data); | |
333 | } | |
334 | ||
335 | return (0); | |
336 | } | |
337 | ||
338 | ||
339 | /* | |
340 | * Process a TCP user request for TCP tb. If this is a send request | |
341 | * then m is the mbuf chain of send data. If this is a timer expiration | |
342 | * (called from the software clock routine), then timertype tells which timer. | |
343 | */ | |
344 | /*ARGSUSED*/ | |
345 | tcp_usrreq(so, req, m, nam, rights) | |
346 | struct socket *so; | |
347 | int req; | |
348 | struct mbuf *m, *nam, *rights; | |
349 | { | |
350 | register struct inpcb *inp; | |
351 | register struct tcpcb *tp; | |
352 | register int s; | |
353 | register int act, newstate; | |
354 | int error = 0; | |
355 | ||
356 | s = splnet(); | |
357 | inp = sotoinpcb(so); | |
358 | ||
359 | /* keep in mind call from ifioctl() */ | |
360 | if (rights && req != PRU_CONTROL) | |
361 | { | |
362 | if (rights->m_len) | |
363 | { | |
364 | splx(s); | |
365 | return (EINVAL); | |
366 | } | |
367 | } | |
368 | /* | |
369 | * When a TCP is attached to a socket, then there will be | |
370 | * a (struct inpcb) pointed at by the socket, and this | |
371 | * structure will point at a subsidary (struct tcpcb). | |
372 | */ | |
373 | if (inp == NULL && req != PRU_ATTACH) | |
374 | { | |
375 | splx(s); | |
376 | return (EINVAL); /* XXX */ | |
377 | } | |
378 | if (inp) | |
379 | { | |
380 | tp = inptotcpcb(inp); | |
381 | /* WHAT IF TP IS 0? */ | |
382 | #ifdef GPROF | |
383 | tcp_acounts[tp->t_state][req]++; | |
384 | #endif | |
385 | } | |
386 | ||
387 | /* | |
388 | * This switch becomes a 'caseb', so put common ones at top. | |
389 | */ | |
390 | switch (req) | |
391 | { | |
392 | ||
393 | case PRU_RCVD: | |
394 | /* | |
395 | * After a receive, possibly send window update to peer. | |
396 | */ | |
397 | W_ALLOC(IURECV, 0, tp, NULL, so, act, newstate); | |
398 | break; | |
399 | ||
400 | case PRU_SEND: | |
401 | /* | |
402 | * Do a send by initiating the proper entry to the FSM. | |
403 | * Don't let urgent continue. | |
404 | */ | |
405 | tp->t_urg = FALSE; | |
406 | W_ALLOC(IUSEND, 0, tp, m, so, act, newstate); | |
407 | break; | |
408 | ||
409 | /* | |
410 | * TCP attaches to socket via PRU_ATTACH, reserving space, | |
411 | * and an internet control block. | |
412 | */ | |
413 | case PRU_ATTACH: | |
414 | if (inp) | |
415 | { | |
416 | error = EISCONN; | |
417 | break; | |
418 | } | |
419 | error = tcp_attach(so); | |
420 | if (error) | |
421 | break; | |
422 | if ((so->so_options & SO_LINGER) && so->so_linger == 0) | |
423 | so->so_linger = T_LINGERTIME; | |
424 | tp = sototcpcb(so); | |
425 | break; | |
426 | ||
427 | /* | |
428 | * PRU_DETACH detaches the TCP protocol from the socket. | |
429 | * This is only done after SO_ISCONNECTED has been cleared. | |
430 | */ | |
431 | case PRU_DETACH: | |
432 | tcp_disconnect(tp); | |
433 | break; | |
434 | ||
435 | /* | |
436 | * Give the socket an address. | |
437 | */ | |
438 | case PRU_BIND: | |
439 | error = in_pcbbind(inp, nam, &tcp_advice); | |
440 | break; | |
441 | ||
442 | /* | |
443 | * Prepare to accept connections. | |
444 | */ | |
445 | case PRU_LISTEN: | |
446 | if (inp->inp_lport == 0) | |
447 | error = in_pcbbind(inp, (struct mbuf *)0, &tcp_advice); | |
448 | if (error == 0) | |
449 | w_alloc(IUOPENA, 0, tp, NULL); | |
450 | break; | |
451 | ||
452 | /* | |
453 | * Initiate connection to peer. | |
454 | * Bind the local end if not already. | |
455 | * Set the routing. | |
456 | * Crank up the TCP state machine. | |
457 | */ | |
458 | case PRU_CONNECT: | |
459 | { | |
460 | struct in_addr laddr; | |
461 | ||
462 | laddr = inp->inp_laddr; | |
463 | if (inp->inp_lport == 0) | |
464 | { | |
465 | error = in_pcbbind(inp, (struct mbuf *)0, &tcp_advice); | |
466 | if (error) | |
467 | break; | |
468 | } | |
469 | error = in_pcbconnect(inp, nam, tcp_conn_used); | |
470 | if (error) | |
471 | break; | |
472 | ||
473 | if (in_broadcast(inp->inp_faddr)) | |
474 | { | |
475 | in_pcbdisconnect (inp, tcp_pcbdisconnect); | |
476 | inp->inp_laddr = laddr; | |
477 | error = EADDRNOTAVAIL; | |
478 | break; | |
479 | } | |
480 | ||
481 | if (! (tp->t_template = tcp_template(tp))) | |
482 | { | |
483 | in_pcbdisconnect (inp, tcp_pcbdisconnect); | |
484 | inp->inp_laddr = laddr; | |
485 | error = ENOBUFS; | |
486 | break; | |
487 | } | |
488 | ||
489 | tp->sws_qff = SWS_QFF_DEF; | |
490 | ||
491 | /* | |
492 | * So can debug connection problems without having to change | |
493 | * every program or apply debugging flag to each program every | |
494 | * time run it. | |
495 | */ | |
496 | dowedebug(inp, so, &tcp_dfilter); | |
497 | ||
498 | soisconnecting(so); | |
499 | w_alloc(IUOPENR, 0, tp, NULL); | |
500 | } | |
501 | break; | |
502 | ||
503 | /* | |
504 | * Create a TCP connection between two sockets. | |
505 | */ | |
506 | case PRU_CONNECT2: | |
507 | error = EOPNOTSUPP; | |
508 | break; | |
509 | ||
510 | /* | |
511 | * Initiate disconnect from peer. | |
512 | * If connection never passed embryonic stage, just drop; | |
513 | * else if don't need to let data drain, then can just drop anyways, | |
514 | * else have to begin TCP shutdown process: mark socket disconnecting, | |
515 | * drain unread data, state switch to reflect user close, and | |
516 | * send segment (e.g. FIN) to peer. Socket will be really disconnected | |
517 | * when peer sends FIN and acks ours. | |
518 | */ | |
519 | case PRU_DISCONNECT: | |
520 | tcp_disconnect(tp); | |
521 | break; | |
522 | ||
523 | /* | |
524 | * Accept a connection. Essentially all the work is | |
525 | * done at higher levels; just return the address | |
526 | * of the peer, storing through addr. | |
527 | * | |
528 | * BBN-NOTE: upper levels do all the waiting; this stays the same. | |
529 | */ | |
530 | case PRU_ACCEPT: | |
531 | { | |
532 | struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *); | |
533 | ||
534 | nam->m_len = sizeof (struct sockaddr_in); | |
535 | sin->sin_family = AF_INET; | |
536 | sin->sin_port = inp->inp_fport; | |
537 | sin->sin_addr = inp->inp_faddr; | |
538 | break; | |
539 | } | |
540 | ||
541 | /* | |
542 | * Mark the connection as being incapable of further output. | |
543 | */ | |
544 | case PRU_SHUTDOWN: | |
545 | socantsendmore(so); | |
546 | if (! tp->usr_closed) | |
547 | w_alloc(IUCLOSE, 0, tp, inp); | |
548 | break; | |
549 | ||
550 | /* | |
551 | * Abort the TCP. | |
552 | */ | |
553 | case PRU_ABORT: | |
554 | w_alloc(IUABORT, 0, tp, inp); | |
555 | break; | |
556 | ||
557 | case PRU_CONTROL: | |
558 | error = tcp_ioctl(tp, (int) m, (caddr_t) nam); | |
559 | break; | |
560 | ||
561 | ||
562 | /* SOME AS YET UNIMPLEMENTED HOOKS */ | |
563 | case PRU_SENSE: | |
564 | error = EOPNOTSUPP; | |
565 | break; | |
566 | /* END UNIMPLEMENTED HOOKS */ | |
567 | ||
568 | case PRU_RCVOOB: | |
569 | ||
570 | { | |
571 | int desired; | |
572 | ||
573 | if (so->so_oobmark == 0 && (so->so_state & SS_RCVATMARK) == 0) | |
574 | { | |
575 | error = EINVAL; | |
576 | break; | |
577 | } | |
578 | if (tp->oob_data == NULL) | |
579 | { | |
580 | error = EWOULDBLOCK; | |
581 | break; | |
582 | } | |
583 | desired = *(mtod(m, int *)); | |
584 | ||
585 | while ((desired > 0) && (tp->oob_data)) | |
586 | { | |
587 | char *p; | |
588 | unsigned count; | |
589 | ||
590 | p = mtod(m, caddr_t); | |
591 | count = MIN(desired, tp->oob_data->m_len); | |
592 | count = MIN(count, MLEN); | |
593 | bcopy(mtod(tp->oob_data, caddr_t), p, count); | |
594 | m->m_len = count; | |
595 | desired -= count; | |
596 | ||
597 | tp->oob_data->m_len -= count; | |
598 | tp->oob_data->m_off += count; | |
599 | if (tp->oob_data->m_len <= 0) | |
600 | tp->oob_data = m_free(tp->oob_data); | |
601 | ||
602 | if ((desired > 0) && (tp->oob_data)) | |
603 | { | |
604 | m->m_next = m_get(M_WAIT, MT_DATA); | |
605 | m = m->m_next; | |
606 | } | |
607 | } | |
608 | ||
609 | } | |
610 | break; | |
611 | ||
612 | case PRU_SENDOOB: | |
613 | /* | |
614 | * allows up to MAX_TCPOOB bytes of out of band data | |
615 | * even if user has used up all his allocated space. | |
616 | */ | |
617 | if (sbspace(&so->so_snd) < (- MAX_TCPOOB)) | |
618 | { | |
619 | m_freem(m); | |
620 | error = ENOBUFS; | |
621 | break; | |
622 | ||
623 | } | |
624 | tp->t_urg = TRUE; | |
625 | w_alloc(IUSEND, 0, tp, m); | |
626 | break; | |
627 | ||
628 | /* | |
629 | * Return the address of this socket (local-side binding) | |
630 | */ | |
631 | case PRU_SOCKADDR: | |
632 | in_setsockaddr(inp, nam); | |
633 | break; | |
634 | ||
635 | case PRU_PEERADDR: | |
636 | in_setpeeraddr(inp, nam); | |
637 | break; | |
638 | ||
639 | /* | |
640 | * TCP slow timer went off; run down all those timers. | |
641 | */ | |
642 | case PRU_SLOWTIMO: | |
643 | tcp_timeo(); | |
644 | break; | |
645 | ||
646 | default: | |
647 | panic("tcp_usrreq"); | |
648 | } | |
649 | splx(s); | |
650 | return (error); | |
651 | } | |
652 | ||
653 | /* | |
654 | * getsockopt() / setsockopt() | |
655 | */ | |
656 | tcp_ctloutput (req,so,level,optname,optval) | |
657 | int req; | |
658 | struct socket *so; | |
659 | int level, optname; | |
660 | struct mbuf **optval; | |
661 | { | |
662 | int s = splnet(); /* like PRU/packet/timer entry into net code */ | |
663 | int error; | |
664 | struct inpcb *inp; | |
665 | ||
666 | /* | |
667 | * possibly for us? | |
668 | * Follow Berkeley methods: level is protocol number if meant for the | |
669 | * protocol layer. (Why not say if=0, arp=1, ip=2, udp/tcp/rdp=3....?) | |
670 | * | |
671 | * Problem: tcp needs to know about IP options in order to use right | |
672 | * maxseg. This doesn't quite work with the layering. | |
673 | * | |
674 | * Why not combine ioctl/setsockopt/getsockopt paths, since ioctl can be | |
675 | * seen as fixed size sockopt- tried at BBN; removed for 4.3 | |
676 | */ | |
677 | ||
678 | /* should be "mature" socket so pointers all valid... */ | |
679 | inp = sotoinpcb(so); | |
680 | ||
681 | switch(req) | |
682 | { | |
683 | case PRCO_GETOPT: | |
684 | error = tcp_getopt (inp, optname, optval); | |
685 | break; | |
686 | ||
687 | case PRCO_SETOPT: | |
688 | error = tcp_setopt (inp, optname, optval); | |
689 | break; | |
690 | ||
691 | default: | |
692 | panic("tcp_ctloutput"); | |
693 | } | |
694 | ||
695 | splx(s); | |
696 | return (error); | |
697 | } | |
698 | ||
699 | tcp_getopt (inp, command, data) | |
700 | struct inpcb *inp; | |
701 | struct mbuf **data; | |
702 | { | |
703 | /* | |
704 | * no TCP specific options accessed by getsockopt() as yet. | |
705 | * let lower level at cmd | |
706 | */ | |
707 | return ip_getopt (inp, command, data); | |
708 | } | |
709 | ||
710 | tcp_setopt (inp, command, data) | |
711 | struct inpcb *inp; | |
712 | struct mbuf **data; | |
713 | { | |
714 | int error; | |
715 | struct tcpcb *tp; | |
716 | ||
717 | /* no TCP specific options accessed by setsockopt() as yet */ | |
718 | tp = inptotcpcb(inp); | |
719 | ||
720 | if (command == SO_IPROUTE) | |
721 | tp->t_maxseg += inp->inp_optlen; | |
722 | ||
723 | error = ip_setopt(inp, command, data); | |
724 | ||
725 | if (command == SO_IPROUTE) | |
726 | tp->t_maxseg -= inp->inp_optlen; | |
727 | ||
728 | return (error); | |
729 | } | |
730 | ||
731 | /* | |
732 | * These numbers come from measurements described in the paper | |
733 | * "Converting the BBN TCP/IP to 4.2BSD" (S.L.C. USENIX) | |
734 | * If your network handles packets larger than an ethernet frame, you | |
735 | * could change tcp_init back to determine the largest net's packet size, | |
736 | * multiply that by some number, and round up to a multiple of a CLSIZE. | |
737 | */ | |
738 | int tcp_recvspace = 4096; | |
739 | int tcp_sendspace = 4096; | |
740 | ||
741 | /* | |
742 | * Attach TCP protocol to socket, allocating | |
743 | * internet protocol control block, tcp control block, buffer space. | |
744 | */ | |
745 | tcp_attach(so) | |
746 | struct socket *so; | |
747 | { | |
748 | register struct tcpcb *tp; | |
749 | struct inpcb *inp; | |
750 | int error; | |
751 | ||
752 | if (! (error = soreserve(so, tcp_sendspace, tcp_recvspace))) | |
753 | { | |
754 | if (! (error = in_pcballoc(so, &tcp))) | |
755 | { | |
756 | inp = sotoinpcb(so); | |
757 | if (tp = tcp_newtcpcb(inp)) | |
758 | { | |
759 | /* | |
760 | * Should change state tables to have an UNOPENED state like | |
761 | * the butterfly's which is different from SAME. | |
762 | */ | |
763 | tp->t_state = 0; | |
764 | return (0); | |
765 | } | |
766 | error = ENOBUFS; | |
767 | in_pcbdetach(inp, (int (*)())0); | |
768 | } | |
769 | } | |
770 | return (error); | |
771 | } | |
772 | ||
773 | /* | |
774 | * Initiate (or continue) disconnect. | |
775 | * If embryonic state, just send reset (once). | |
776 | * If not in ``let data drain'' option, just drop. | |
777 | * Otherwise (hard), mark socket disconnecting and drop | |
778 | * current input data; switch states based on user close, and | |
779 | * send segment to peer (with FIN). | |
780 | */ | |
781 | ||
782 | tcp_disconnect(tp) | |
783 | register struct tcpcb *tp; | |
784 | { | |
785 | struct socket *so = tp->t_in_pcb->inp_socket; | |
786 | ||
787 | soisdisconnecting(so); | |
788 | sbflush(&so->so_rcv); | |
789 | tp->usr_abort = TRUE; | |
790 | if (!tp->usr_closed) | |
791 | w_alloc(IUCLOSE, 0, tp, tp->t_in_pcb); | |
792 | } | |
793 | ||
794 | tcp_init() | |
795 | { | |
796 | /* | |
797 | * Leave these checks in! It's a pain in the ass to find out | |
798 | * problems caused by too small mbufs if someone changes the | |
799 | * size of an mbuf. | |
800 | */ | |
801 | if (sizeof(struct inpcb) > MLEN) | |
802 | panic("inpcb too big"); | |
803 | ||
804 | if (sizeof(struct socket) > MLEN) | |
805 | panic("socket too big"); | |
806 | ||
807 | if (sizeof(struct th) > MLEN) | |
808 | panic("th too big"); | |
809 | ||
810 | if (sizeof(struct tcpcb) > MLEN) | |
811 | panic("tcpcb too big"); | |
812 | ||
813 | if (sizeof(struct t_debug) > MLEN) | |
814 | panic("t_debug too big"); | |
815 | ||
816 | /* init queue */ | |
817 | tcp.inp_next = tcp.inp_prev = &tcp; | |
818 | ||
819 | /* are only 4 things to match. turn off for now */ | |
820 | tcp_dfilter.matches = 5; | |
821 | ||
822 | tcp_iss = time.tv_sec; | |
823 | ||
824 | ipsw[IPPROTO_TCP].ipsw_hlen = sizeof(struct th); | |
825 | } | |
826 | ||
827 | tcp_ctlinput (prc_code, arg) | |
828 | caddr_t arg; | |
829 | { | |
830 | int error; | |
831 | ||
832 | error = inetctlerrmap[prc_code]; | |
833 | ||
834 | switch (prc_code) | |
835 | { | |
836 | case PRC_UNREACH_PROTOCOL: /* icmp message */ | |
837 | case PRC_UNREACH_PORT: | |
838 | case PRC_MSGSIZE: | |
839 | { | |
840 | register struct th *tp; | |
841 | struct tcpcb *t; | |
842 | ||
843 | tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr); | |
844 | t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0, | |
845 | tp->t_src, tp->t_s.s_addr, | |
846 | tp->t_dst, tp->t_d.s_addr); | |
847 | if (t) | |
848 | t_close(t, error); | |
849 | } | |
850 | break; | |
851 | ||
852 | case PRC_UNREACH_NET: | |
853 | case PRC_UNREACH_HOST: | |
854 | { | |
855 | register struct th *tp; | |
856 | struct tcpcb *t; | |
857 | ||
858 | tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr); | |
859 | t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0, | |
860 | tp->t_src, tp->t_s.s_addr, | |
861 | tp->t_dst, tp->t_d.s_addr); | |
862 | if (t) | |
863 | { | |
864 | struct socket *so; | |
865 | ||
866 | so = t->t_in_pcb->inp_socket; | |
867 | if ((so->so_state & SS_NOFDREF) == 0) | |
868 | advise_user(so, error); | |
869 | else | |
870 | t_close(t, error); | |
871 | } | |
872 | } | |
873 | break; | |
874 | ||
875 | case PRC_GWDOWN: | |
876 | in_gdown (&tcp, (u_long) arg); | |
877 | break; | |
878 | ||
879 | case PRC_REDIRECT_NET: /* icmp message */ | |
880 | case PRC_REDIRECT_HOST: | |
881 | { | |
882 | struct tcpcb *t; | |
883 | register struct th *tp; | |
884 | ||
885 | tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr); | |
886 | t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0, | |
887 | tp->t_src, tp->t_s.s_addr, | |
888 | tp->t_dst, tp->t_d.s_addr); | |
889 | if (t) | |
890 | icmp_redirect_inp(t->t_in_pcb, (struct icmp *) arg, | |
891 | prc_code == PRC_REDIRECT_NET ? rtnet : rthost); | |
892 | } | |
893 | break; | |
894 | ||
895 | case PRC_TIMXCEED_INTRANS: /* icmp message */ | |
896 | case PRC_TIMXCEED_REASS: | |
897 | case PRC_PARAMPROB: | |
898 | break; | |
899 | ||
900 | case PRC_QUENCH: /* icmp message */ | |
901 | /* | |
902 | * See RFC 896. The idea is, when we get a source quench message on | |
903 | * a connection we should send fewer packets. This ties in with the | |
904 | * silly window syndrome whose solution is to send fewer, larger packets. | |
905 | * Deal with quenches by altering threshold used by silly window | |
906 | * syndrome. This is similar to acting as if the window is smaller | |
907 | * than it actually is for deciding when to send, except that when we | |
908 | * do, we use as much as there really is. | |
909 | */ | |
910 | { | |
911 | register struct th *tp; | |
912 | struct tcpcb *t; | |
913 | ||
914 | tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr); | |
915 | t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0, | |
916 | tp->t_src, tp->t_s.s_addr, | |
917 | tp->t_dst, tp->t_d.s_addr); | |
918 | if (t) | |
919 | { | |
920 | t->sws_qff -= SWS_QFF_DEC; | |
921 | if (t->sws_qff < SWS_QFF_MIN) | |
922 | t->sws_qff = SWS_QFF_MIN; | |
923 | } | |
924 | } | |
925 | break; | |
926 | ||
927 | case PRC_IFDOWN: | |
928 | { | |
929 | u_long addr; | |
930 | ||
931 | addr = ((struct sockaddr_in *)(arg))->sin_addr.s_addr; | |
932 | inpcb_notify(&tcp, addr, (u_long) 0, error); | |
933 | inpcb_notify(&tcp, (u_long) 0, addr, error); | |
934 | } | |
935 | break; | |
936 | ||
937 | case PRC_HOSTDEAD: /* from imp interface */ | |
938 | case PRC_HOSTUNREACH: | |
939 | /* | |
940 | * get same message for destination hosts and gateways. | |
941 | */ | |
942 | { | |
943 | u_long addr; | |
944 | ||
945 | addr = ((struct sockaddr_in *)arg)->sin_addr.s_addr; | |
946 | in_gdown (&tcp, addr); | |
947 | inpcb_notify(&tcp, (u_long) 0, addr, error); | |
948 | } | |
949 | break; | |
950 | ||
951 | default: | |
952 | panic("tcp_ctlinput"); | |
953 | } | |
954 | } |