Commit | Line | Data |
---|---|---|
17efd7fe MK |
1 | #ifdef RCSIDENT |
2 | static char rcsident[] = "$Header: tcp_input.c,v 1.25 85/07/31 09:33:47 walsh Exp $"; | |
3 | #endif | |
4 | ||
5 | #include "../h/param.h" | |
6 | #include "../h/dir.h" | |
7 | #include "../h/user.h" | |
8 | #include "../h/kernel.h" | |
9 | #include "../h/inode.h" | |
10 | #include "../h/mbuf.h" | |
11 | #include "../h/socket.h" | |
12 | #include "../h/socketvar.h" | |
13 | #include "../h/syslog.h" | |
14 | ||
15 | #include "../net/if.h" | |
16 | #include "../net/route.h" | |
17 | ||
18 | #include "../bbnnet/in.h" | |
19 | #include "../bbnnet/net.h" | |
20 | #include "../bbnnet/in_pcb.h" | |
21 | #include "../bbnnet/in_var.h" | |
22 | #include "../bbnnet/fsm.h" | |
23 | #include "../bbnnet/tcp.h" | |
24 | #include "../bbnnet/seq.h" | |
25 | #include "../bbnnet/ip.h" | |
26 | #include "../bbnnet/fsmdef.h" | |
27 | #include "../bbnnet/macros.h" | |
28 | #include "../bbnnet/nopcb.h" | |
29 | #ifdef HMPTRAPS | |
30 | #include "../bbnnet/hmp_traps.h" | |
31 | #endif | |
32 | ||
33 | #ifdef HMPTRAPS | |
34 | #define HMP_TRAP(a,b,c) hmp_trap(a,b,c) | |
35 | #else | |
36 | #define HMP_TRAP(a,b,c) | |
37 | #endif | |
38 | ||
39 | extern int nosum; | |
40 | extern struct inpcb tcp; | |
41 | ||
42 | /* | |
43 | * net preproc (66,67,68,69,70,71,72,73,74,75,76) | |
44 | * | |
45 | * macro form of former function netprepr() | |
46 | * | |
47 | * tp valid tcpcb | |
48 | * n valid th | |
49 | * inp valid inpcb ( == tp->t_in_pcb ) | |
50 | */ | |
51 | #define NETPREPR(tp, n, inp, retval) \ | |
52 | { \ | |
53 | retval = (-1); /* assume bad */ \ | |
54 | /* tell caller to eat segment (unacceptable) */ \ | |
55 | \ | |
56 | switch (tp->t_state) { \ | |
57 | case LISTEN: \ | |
58 | /* Ignore resets, ACKs cause resets, must have SYN. */ \ | |
59 | if (n->t_flags&T_RST) \ | |
60 | break; \ | |
61 | else if (n->t_flags&T_ACK) \ | |
62 | send_rst(tp, n); \ | |
63 | else if (n->t_flags&T_SYN) \ | |
64 | retval = SAME; \ | |
65 | break; \ | |
66 | \ | |
67 | case SYN_SENT: \ | |
68 | /* Bad ACKs cause resets, good resets close, must have SYN. */ \ | |
69 | if (n->t_flags&T_ACK && (SEQ_GEQ(tp->iss, n->t_ackno) || \ | |
70 | SEQ_GT(n->t_ackno, tp->snd_hi))) \ | |
71 | send_rst(tp, n); \ | |
72 | else if (n->t_flags&T_RST) { \ | |
73 | if (n->t_flags&T_ACK) { \ | |
74 | t_close(tp, ECONNREFUSED); \ | |
75 | retval = CLOSED; \ | |
76 | } \ | |
77 | } else if (n->t_flags&T_SYN) \ | |
78 | retval = SAME; \ | |
79 | break; \ | |
80 | \ | |
81 | case 0: \ | |
82 | /* \ | |
83 | * after bind, but before we've had a chance to \ | |
84 | * listen or connect \ | |
85 | */ \ | |
86 | break; \ | |
87 | \ | |
88 | default: \ | |
89 | { struct sockbuf *sorcv; sequence xend; \ | |
90 | /* \ | |
91 | * Part of packet must fall in window. \ | |
92 | * This allows for segments that are partially retransmits \ | |
93 | * and partially new. \ | |
94 | * otherwise just ACK and drop. \ | |
95 | */ \ | |
96 | sorcv = &inp->inp_socket->so_rcv; \ | |
97 | xend = n->t_seq; \ | |
98 | if (n->t_len) \ | |
99 | /* remember, could be an ACK-only packet */ \ | |
100 | xend += n->t_len -1; \ | |
101 | if (n->t_flags & T_FIN) \ | |
102 | xend ++; /* in case FIN + rxmitted data (TOPS-20) */ \ | |
103 | if (SEQ_LT(xend, tp->rcv_nxt) || \ | |
104 | SEQ_GEQ(n->t_seq, tp->rcv_nxt + sbspace(sorcv))) { \ | |
105 | tp->t_preproc++; \ | |
106 | send_tcp(tp, TCP_CTL); \ | |
107 | HMP_TRAP(T_TCP_WINDOW, (caddr_t)0,0); \ | |
108 | /* \ | |
109 | * Due to 4.2BSD net architecture, don't need to send \ | |
110 | * L_SYN_RCVD socket back to LISTEN on reset since server \ | |
111 | * socket and communication paths are separate. \ | |
112 | */ \ | |
113 | } else if (n->t_flags&T_RST) { \ | |
114 | t_close(tp, ENETRESET); \ | |
115 | retval = CLOSED; \ | |
116 | /* No SYNs allowed unless *SYN_RCVD */ \ | |
117 | } else if ((n->t_flags&T_SYN) && (tp->t_state >= ESTAB)) { \ | |
118 | send_rst(tp, n); \ | |
119 | t_close(tp, ENETRESET); \ | |
120 | retval = CLOSED; \ | |
121 | /* \ | |
122 | * Must have good ACK. Bad ACKs cause resets only in \ | |
123 | * SYN_RCVD states. In other states, this may be a slow pkt? \ | |
124 | */ \ | |
125 | } else if (n->t_flags&T_ACK) \ | |
126 | if (SEQ_GT(tp->snd_una, n->t_ackno) || \ | |
127 | SEQ_GT(n->t_ackno, tp->snd_hi)) { \ | |
128 | if (tp->t_state == SYN_RCVD || \ | |
129 | tp->t_state == L_SYN_RCVD) \ | |
130 | send_rst(tp, n); \ | |
131 | } else { \ | |
132 | /* \ | |
133 | * Acceptable segment: \ | |
134 | * Reset no activity timer on established and \ | |
135 | * closing connections. \ | |
136 | */ \ | |
137 | if (tp->t_state >= ESTAB) \ | |
138 | tp->t_timers[TNOACT] = tp->t_noact; \ | |
139 | retval = SAME; \ | |
140 | } } } } | |
141 | ||
142 | ||
143 | int tcp_net_keep; | |
144 | ||
145 | /* | |
146 | * This is the scheduler for the tcp machine. It is called | |
147 | * from the lower network levels, either directly from the | |
148 | * internet level, in case of input from the network; or | |
149 | * indirectly from netmain, in case of user or timer events | |
150 | * which awaken the main loop. | |
151 | */ | |
152 | tcp_input(mp, fragsize) | |
153 | register struct mbuf *mp; | |
154 | int fragsize; | |
155 | { | |
156 | register struct th *tp; | |
157 | register int hlen; | |
158 | register struct tcpcb *t; | |
159 | register struct inpcb *inp; | |
160 | struct mbuf *m; | |
161 | int i, tlen; | |
162 | struct work w; | |
163 | u_short cks; | |
164 | ||
165 | tcpstat.t_total ++; | |
166 | ||
167 | /* | |
168 | * see ip_input() | |
169 | */ | |
170 | if ((mp->m_off > MMAXOFF) || (mp->m_len < sizeof(struct th))) | |
171 | { | |
172 | if ((mp = m_pullup(mp, sizeof(struct th))) == NULL) | |
173 | { | |
174 | tcpstat.t_tooshort ++; | |
175 | return; | |
176 | } | |
177 | } | |
178 | ||
179 | /* set up needed info from ip header, note that beginning | |
180 | of tcp header struct overlaps ip header. ip options | |
181 | have been removed by ip level option processing */ | |
182 | ||
183 | tp = mtod(mp, struct th *); | |
184 | ||
185 | /* make sure header does not overflow mbuf */ | |
186 | ||
187 | hlen = tp->t_off << TCP_OFFSHIFT; | |
188 | if (hlen < TCPSIZE) | |
189 | { | |
190 | ip_log ((struct ip *) tp, "tcp t_off too small"); | |
191 | netlog(mp); | |
192 | return; | |
193 | } | |
194 | if (hlen > mp->m_len) | |
195 | { | |
196 | if ((mp = m_pullup(mp, hlen)) == NULL) | |
197 | { | |
198 | ip_log((struct ip *) tp, "tcp header overflow"); | |
199 | #ifdef HMPTRAPS | |
200 | /* hmp_trap(T_TCP_OVFLO, (caddr_t)0, 0); */ | |
201 | #else | |
202 | /* netlog(mp); */ | |
203 | #endif | |
204 | return; | |
205 | } | |
206 | tp = mtod(mp, struct th *); | |
207 | } | |
208 | ||
209 | tlen = ((struct ip *)tp)->ip_len; | |
210 | tp->t_len = htons((u_short)tlen); | |
211 | tp->t_next = NULL; | |
212 | tp->t_prev = NULL; | |
213 | tp->t_x1 = 0; | |
214 | ||
215 | /* | |
216 | * do checksum calculation, drop seg if bad | |
217 | */ | |
218 | i = (u_short)tp->t_sum; | |
219 | tp->t_sum = 0; | |
220 | if (i != (cks = (u_short)in_cksum(mp, tlen + sizeof(struct ip)))) | |
221 | { | |
222 | tcpstat.t_badsum++; | |
223 | if (! nosum) | |
224 | { | |
225 | #ifdef HMPTRAPS | |
226 | /* hmp_trap(T_TCP_CKSUM, (caddr_t)0,0); */ | |
227 | #endif | |
228 | inet_cksum_err ("tcp", (struct ip *) tp, (u_long) i, (u_long) cks); | |
229 | netlog(mp); | |
230 | return; | |
231 | } | |
232 | } | |
233 | ||
234 | /* find a tcb for incoming message */ | |
235 | inp = in_pcblookup(&tcp, tp->t_s.s_addr, tp->t_src, | |
236 | tp->t_d.s_addr, tp->t_dst, TRUE); | |
237 | ||
238 | if ((inp != NULL) && ((t = (struct tcpcb *)inp->inp_ppcb) != NULL)) | |
239 | { | |
240 | /* found a tcp for message */ | |
241 | /* byte swap header */ | |
242 | ||
243 | if ((int)(tp->t_len = tlen - hlen) < 0) | |
244 | { | |
245 | ip_log((struct ip *) tp, "tcp header length"); | |
246 | #ifdef HMPTRAPS | |
247 | /* hmp_trap(T_TCP_HLEN, (caddr_t)0,0); */ | |
248 | #else | |
249 | netlog(mp); | |
250 | #endif | |
251 | return; | |
252 | } | |
253 | tp->t_seq = ntohl(tp->t_seq); | |
254 | tp->t_ackno = ntohl(tp->t_ackno); | |
255 | tp->t_win = ntohs((u_short)tp->t_win); | |
256 | tp->t_urp = ntohs((u_short)tp->t_urp); | |
257 | ||
258 | /* record the max fragment size */ | |
259 | ||
260 | t->t_maxfrag = MAX(t->t_maxfrag, fragsize); | |
261 | ||
262 | /* do TCP option processing */ | |
263 | ||
264 | if (hlen > TCPSIZE) | |
265 | tcp_opt(t, tp, hlen); | |
266 | ||
267 | /* check seg seq #, do RST processing */ | |
268 | ||
269 | NETPREPR(t, tp, inp, i); | |
270 | if (i != SAME) | |
271 | { | |
272 | /* segment failed preprocessing. Drop it and | |
273 | * possibly enter new state. For now, always | |
274 | * returns SAME/-1/CLOSED | |
275 | */ | |
276 | m_freem(mp); | |
277 | /* | |
278 | if ((i != -1) && (i != CLOSED)) | |
279 | t->t_state = i; | |
280 | */ | |
281 | } | |
282 | else | |
283 | { | |
284 | if (sbspace(&inp->inp_socket->so_rcv) <= 0 && | |
285 | tp->t_len != 0) | |
286 | { | |
287 | /* | |
288 | * The user's receive q is full. Either the | |
289 | * remote TCP is not paying attention to the | |
290 | * window, or this is a persistence packet. | |
291 | * | |
292 | * The first reason was once common with | |
293 | * TOPS-20. Let's conserve network resources | |
294 | * by holding onto the packet in the unack q. | |
295 | * Place it at the end of the list. | |
296 | */ | |
297 | mp->m_act = NULL; | |
298 | if ((m = t->t_rcv_unack) != NULL) | |
299 | { | |
300 | while (m->m_act != NULL) | |
301 | m = m->m_act; | |
302 | m->m_act = mp; | |
303 | } | |
304 | else | |
305 | t->t_rcv_unack = mp; | |
306 | ||
307 | /* | |
308 | * ACK if it was a window probe, just in case | |
309 | * they have a TNOACT timer running. | |
310 | */ | |
311 | send_tcp(t, TCP_CTL); | |
312 | } | |
313 | else | |
314 | { | |
315 | int act, newstate; | |
316 | struct socket *so; | |
317 | ||
318 | /* set up work entry for seg, and call | |
319 | the fsm to process it */ | |
320 | ||
321 | hlen += sizeof(struct ip); | |
322 | mp->m_off += hlen; | |
323 | mp->m_len -= hlen; | |
324 | ||
325 | /** HAND CODED action() CALL **/ | |
326 | ||
327 | w.w_type = INRECV; | |
328 | w.w_tcb = t; | |
329 | w.w_dat = (char *)tp; | |
330 | ||
331 | /* get index of action routine from | |
332 | * transition table | |
333 | */ | |
334 | act = fstab[t->t_state][INRECV]; | |
335 | ||
336 | /* invalid state transition, just | |
337 | * print a message and ignore */ | |
338 | ||
339 | if (act == 0) | |
340 | { | |
8902c2d0 | 341 | log(LOG_INFO, "tcp bad state: tcb=%x state=%d INRECV\n", t, t->t_state); |
17efd7fe MK |
342 | m_freem(mp); |
343 | return; | |
344 | } | |
345 | ||
346 | so = t->t_in_pcb->inp_socket; | |
347 | tcp_net_keep = FALSE; | |
348 | newstate = (*fsactab[act])(&w); | |
349 | ||
350 | /* debugging info */ | |
351 | TCP_DEBUG (so, t, &w, act, newstate); | |
352 | ||
353 | /* if CLOSED, lost tcpcb */ | |
354 | if ((newstate != SAME) && (newstate != CLOSED)) | |
355 | t->t_state = newstate; | |
356 | if (! tcp_net_keep) | |
357 | m_freem(mp); | |
358 | ||
359 | /** END action() **/ | |
360 | } | |
361 | } | |
362 | } | |
363 | else | |
364 | /* nobody wants it */ | |
365 | send_uncon_rst (tp, mp, tlen, hlen); | |
366 | } | |
367 | ||
368 | send_uncon_rst (n, mp, tlen, hlen) | |
369 | register struct th *n; | |
370 | register struct mbuf *mp; | |
371 | { | |
372 | struct in_addr tempinaddr; | |
373 | u_short tempport; | |
374 | int error; | |
375 | ||
376 | /* make sure we don't send a RST in response to an RST */ | |
377 | ||
378 | if (n->t_flags & T_RST) | |
379 | { | |
380 | m_freem(mp); | |
381 | return; | |
382 | } | |
383 | ||
384 | /* free everything but the header */ | |
385 | ||
386 | m_freem(mp->m_next); | |
387 | mp->m_next = NULL; | |
388 | mp->m_len = sizeof(struct th); | |
389 | ||
390 | /* form a reset from the packet and send */ | |
391 | ||
392 | tempinaddr = n->t_d; | |
393 | n->t_d = n->t_s; | |
394 | n->t_s = tempinaddr; | |
395 | ||
396 | tempport = n->t_src; | |
397 | n->t_src = n->t_dst; | |
398 | n->t_dst = tempport; | |
399 | ||
400 | if (n->t_flags&T_ACK) | |
401 | n->t_seq = n->t_ackno; | |
402 | else | |
403 | { | |
404 | n->t_ackno = htonl((u_long) | |
405 | ntohl((u_long)n->t_seq) | |
406 | + tlen - hlen | |
407 | + (n->t_flags&T_SYN ? 1 : 0)); | |
408 | n->t_seq = 0; | |
409 | } | |
410 | n->t_flags = (n->t_flags&T_ACK) ? T_RST : T_RST+T_ACK; | |
411 | n->t_len = htons((u_short)TCPSIZE); | |
412 | n->t_off = TCPSIZE >> TCP_OFFSHIFT; | |
413 | n->t_sum = in_cksum(mp, sizeof(struct th)); | |
414 | ||
415 | NOPCB_IPSEND (mp, TCPSIZE, FALSE, error); | |
416 | tcpstat.t_badsegs++; | |
417 | ||
418 | #ifdef lint | |
419 | error = error; | |
420 | #endif | |
421 | } | |
422 | ||
423 | /* | |
424 | * Entry into TCP finite state machine | |
425 | */ | |
426 | action(wp) | |
427 | register struct work *wp; | |
428 | { | |
429 | register act, newstate; | |
430 | register struct tcpcb *tp; | |
431 | register struct socket *so; | |
432 | ||
433 | tp = wp->w_tcb; | |
434 | so = tp->t_in_pcb->inp_socket; | |
435 | ||
436 | ACTION (tp, so, wp, wp->w_type, wp->w_dat, act, newstate); | |
437 | return(newstate); | |
438 | } | |
439 | ||
440 | ||
441 | struct mbuf *tcpdebuf; | |
442 | int tcprint; | |
443 | ||
444 | /* | |
445 | * Write a record in the tcp debugging log | |
446 | */ | |
447 | tcp_debug(tp, wp, newstate) | |
448 | register struct tcpcb *tp; | |
449 | register struct work *wp; | |
450 | register newstate; | |
451 | { | |
452 | register struct t_debug *dp; | |
453 | register struct mbuf *m; | |
454 | ||
455 | #ifdef TCPDEBUG | |
456 | if (tcprint) | |
457 | { | |
458 | /* | |
459 | * Print debugging info directly on the console (use this for | |
460 | * intial testing only). | |
461 | */ | |
462 | printf("TCP(%x) %s X %s", tp, tcpstates[tp->t_state], | |
463 | tcpinputs[wp->w_type]); | |
464 | ||
465 | if (wp->w_type == ISTIMER) | |
466 | printf("(%s)", tcptimers[wp->w_stype]); | |
467 | ||
468 | printf(" --> %s", | |
469 | tcpstates[ (newstate > 0) ? newstate : tp->t_state]); | |
470 | ||
471 | if (newstate < 0) | |
472 | printf(" (FAILED)\n"); | |
473 | else | |
474 | putchar('\n', 0); | |
475 | } | |
476 | #endif | |
477 | ||
478 | /* | |
479 | * Get an mbuf to write the debugging record into. If we don't already | |
480 | * have one, allocate a new one. | |
481 | */ | |
482 | if ((m = tcpdebuf) == NULL) | |
483 | { | |
484 | register struct mbuf *c; | |
485 | ||
486 | if ((tcpdebuf = m = m_get(M_DONTWAIT, MT_DATA)) == NULL) | |
487 | return; | |
488 | /* | |
489 | * If possible, use a cluster so that we need to wake up the | |
490 | * raw listener less often and reduce likelihood he misses | |
491 | * some information. | |
492 | */ | |
493 | MCLGET(c, 1); | |
494 | if (c) | |
495 | { | |
496 | m->m_off = ((int) c) - ((int) m); | |
497 | m->m_act = (struct mbuf *) TCDBLEN; | |
498 | } | |
499 | else | |
500 | m->m_act = (struct mbuf *) TDBLEN; | |
501 | m->m_len = 0; | |
502 | } | |
503 | ||
504 | dp = (struct t_debug *) (mtod(m, char *) + m->m_len); | |
505 | /* | |
506 | * Set up the debugging record. | |
507 | */ | |
508 | dp->t_iptime = iptime(); | |
509 | dp->t_input = wp->w_type; | |
510 | dp->t_timer = wp->w_stype; | |
511 | dp->t_newstate = newstate; | |
512 | if (tp != NULL) | |
513 | { | |
514 | dp->t_oldstate = tp->t_state; | |
515 | dp->t_tcb = (*tp); /* structure copy */ | |
516 | } | |
517 | else | |
518 | dp->t_oldstate = 0; | |
519 | ||
520 | if (wp->w_type == INRECV) | |
521 | { | |
522 | register struct th *n; | |
523 | ||
524 | n = (struct th *)wp->w_dat; | |
525 | dp->t_hdr = (*n); /* structure copy */ | |
526 | } | |
527 | /* | |
528 | * If the mbuf is full, dispatch it to a raw listener. | |
529 | * Also flush if the connection we're debugging closes so that | |
530 | * packet-printer/systems analyst sees final transitions. | |
531 | */ | |
532 | m->m_len += sizeof(struct t_debug); | |
533 | if ((m->m_len >= ((int) m->m_act)) || (newstate == CLOSED)) | |
534 | { | |
535 | m->m_act = 0; | |
536 | tcpdebuglog(m); | |
537 | tcpdebuf = NULL; | |
538 | } | |
539 | } |