fixes for range locking
[unix-history] / usr / src / sys / deprecated / bbnnet / tcp_input.c
CommitLineData
17efd7fe
MK
1#ifdef RCSIDENT
2static char rcsident[] = "$Header: tcp_input.c,v 1.25 85/07/31 09:33:47 walsh Exp $";
3#endif
4
5#include "../h/param.h"
6#include "../h/dir.h"
7#include "../h/user.h"
8#include "../h/kernel.h"
9#include "../h/inode.h"
10#include "../h/mbuf.h"
11#include "../h/socket.h"
12#include "../h/socketvar.h"
13#include "../h/syslog.h"
14
15#include "../net/if.h"
16#include "../net/route.h"
17
18#include "../bbnnet/in.h"
19#include "../bbnnet/net.h"
20#include "../bbnnet/in_pcb.h"
21#include "../bbnnet/in_var.h"
22#include "../bbnnet/fsm.h"
23#include "../bbnnet/tcp.h"
24#include "../bbnnet/seq.h"
25#include "../bbnnet/ip.h"
26#include "../bbnnet/fsmdef.h"
27#include "../bbnnet/macros.h"
28#include "../bbnnet/nopcb.h"
29#ifdef HMPTRAPS
30#include "../bbnnet/hmp_traps.h"
31#endif
32
33#ifdef HMPTRAPS
34#define HMP_TRAP(a,b,c) hmp_trap(a,b,c)
35#else
36#define HMP_TRAP(a,b,c)
37#endif
38
39extern int nosum;
40extern struct inpcb tcp;
41
42/*
43 * net preproc (66,67,68,69,70,71,72,73,74,75,76)
44 *
45 * macro form of former function netprepr()
46 *
47 * tp valid tcpcb
48 * n valid th
49 * inp valid inpcb ( == tp->t_in_pcb )
50 */
51#define NETPREPR(tp, n, inp, retval) \
52{ \
53 retval = (-1); /* assume bad */ \
54 /* tell caller to eat segment (unacceptable) */ \
55 \
56 switch (tp->t_state) { \
57 case LISTEN: \
58 /* Ignore resets, ACKs cause resets, must have SYN. */ \
59 if (n->t_flags&T_RST) \
60 break; \
61 else if (n->t_flags&T_ACK) \
62 send_rst(tp, n); \
63 else if (n->t_flags&T_SYN) \
64 retval = SAME; \
65 break; \
66\
67 case SYN_SENT: \
68 /* Bad ACKs cause resets, good resets close, must have SYN. */ \
69 if (n->t_flags&T_ACK && (SEQ_GEQ(tp->iss, n->t_ackno) || \
70 SEQ_GT(n->t_ackno, tp->snd_hi))) \
71 send_rst(tp, n); \
72 else if (n->t_flags&T_RST) { \
73 if (n->t_flags&T_ACK) { \
74 t_close(tp, ECONNREFUSED); \
75 retval = CLOSED; \
76 } \
77 } else if (n->t_flags&T_SYN) \
78 retval = SAME; \
79 break; \
80 \
81 case 0: \
82 /* \
83 * after bind, but before we've had a chance to \
84 * listen or connect \
85 */ \
86 break; \
87 \
88 default: \
89 { struct sockbuf *sorcv; sequence xend; \
90 /* \
91 * Part of packet must fall in window. \
92 * This allows for segments that are partially retransmits \
93 * and partially new. \
94 * otherwise just ACK and drop. \
95 */ \
96 sorcv = &inp->inp_socket->so_rcv; \
97 xend = n->t_seq; \
98 if (n->t_len) \
99 /* remember, could be an ACK-only packet */ \
100 xend += n->t_len -1; \
101 if (n->t_flags & T_FIN) \
102 xend ++; /* in case FIN + rxmitted data (TOPS-20) */ \
103 if (SEQ_LT(xend, tp->rcv_nxt) || \
104 SEQ_GEQ(n->t_seq, tp->rcv_nxt + sbspace(sorcv))) { \
105 tp->t_preproc++; \
106 send_tcp(tp, TCP_CTL); \
107 HMP_TRAP(T_TCP_WINDOW, (caddr_t)0,0); \
108 /* \
109 * Due to 4.2BSD net architecture, don't need to send \
110 * L_SYN_RCVD socket back to LISTEN on reset since server \
111 * socket and communication paths are separate. \
112 */ \
113 } else if (n->t_flags&T_RST) { \
114 t_close(tp, ENETRESET); \
115 retval = CLOSED; \
116 /* No SYNs allowed unless *SYN_RCVD */ \
117 } else if ((n->t_flags&T_SYN) && (tp->t_state >= ESTAB)) { \
118 send_rst(tp, n); \
119 t_close(tp, ENETRESET); \
120 retval = CLOSED; \
121 /* \
122 * Must have good ACK. Bad ACKs cause resets only in \
123 * SYN_RCVD states. In other states, this may be a slow pkt? \
124 */ \
125 } else if (n->t_flags&T_ACK) \
126 if (SEQ_GT(tp->snd_una, n->t_ackno) || \
127 SEQ_GT(n->t_ackno, tp->snd_hi)) { \
128 if (tp->t_state == SYN_RCVD || \
129 tp->t_state == L_SYN_RCVD) \
130 send_rst(tp, n); \
131 } else { \
132 /* \
133 * Acceptable segment: \
134 * Reset no activity timer on established and \
135 * closing connections. \
136 */ \
137 if (tp->t_state >= ESTAB) \
138 tp->t_timers[TNOACT] = tp->t_noact; \
139 retval = SAME; \
140} } } }
141
142
143int tcp_net_keep;
144
145/*
146 * This is the scheduler for the tcp machine. It is called
147 * from the lower network levels, either directly from the
148 * internet level, in case of input from the network; or
149 * indirectly from netmain, in case of user or timer events
150 * which awaken the main loop.
151 */
152tcp_input(mp, fragsize)
153register struct mbuf *mp;
154int fragsize;
155{
156 register struct th *tp;
157 register int hlen;
158 register struct tcpcb *t;
159 register struct inpcb *inp;
160 struct mbuf *m;
161 int i, tlen;
162 struct work w;
163 u_short cks;
164
165 tcpstat.t_total ++;
166
167 /*
168 * see ip_input()
169 */
170 if ((mp->m_off > MMAXOFF) || (mp->m_len < sizeof(struct th)))
171 {
172 if ((mp = m_pullup(mp, sizeof(struct th))) == NULL)
173 {
174 tcpstat.t_tooshort ++;
175 return;
176 }
177 }
178
179 /* set up needed info from ip header, note that beginning
180 of tcp header struct overlaps ip header. ip options
181 have been removed by ip level option processing */
182
183 tp = mtod(mp, struct th *);
184
185 /* make sure header does not overflow mbuf */
186
187 hlen = tp->t_off << TCP_OFFSHIFT;
188 if (hlen < TCPSIZE)
189 {
190 ip_log ((struct ip *) tp, "tcp t_off too small");
191 netlog(mp);
192 return;
193 }
194 if (hlen > mp->m_len)
195 {
196 if ((mp = m_pullup(mp, hlen)) == NULL)
197 {
198 ip_log((struct ip *) tp, "tcp header overflow");
199#ifdef HMPTRAPS
200 /* hmp_trap(T_TCP_OVFLO, (caddr_t)0, 0); */
201#else
202 /* netlog(mp); */
203#endif
204 return;
205 }
206 tp = mtod(mp, struct th *);
207 }
208
209 tlen = ((struct ip *)tp)->ip_len;
210 tp->t_len = htons((u_short)tlen);
211 tp->t_next = NULL;
212 tp->t_prev = NULL;
213 tp->t_x1 = 0;
214
215 /*
216 * do checksum calculation, drop seg if bad
217 */
218 i = (u_short)tp->t_sum;
219 tp->t_sum = 0;
220 if (i != (cks = (u_short)in_cksum(mp, tlen + sizeof(struct ip))))
221 {
222 tcpstat.t_badsum++;
223 if (! nosum)
224 {
225#ifdef HMPTRAPS
226 /* hmp_trap(T_TCP_CKSUM, (caddr_t)0,0); */
227#endif
228 inet_cksum_err ("tcp", (struct ip *) tp, (u_long) i, (u_long) cks);
229 netlog(mp);
230 return;
231 }
232 }
233
234 /* find a tcb for incoming message */
235 inp = in_pcblookup(&tcp, tp->t_s.s_addr, tp->t_src,
236 tp->t_d.s_addr, tp->t_dst, TRUE);
237
238 if ((inp != NULL) && ((t = (struct tcpcb *)inp->inp_ppcb) != NULL))
239 {
240 /* found a tcp for message */
241 /* byte swap header */
242
243 if ((int)(tp->t_len = tlen - hlen) < 0)
244 {
245 ip_log((struct ip *) tp, "tcp header length");
246#ifdef HMPTRAPS
247 /* hmp_trap(T_TCP_HLEN, (caddr_t)0,0); */
248#else
249 netlog(mp);
250#endif
251 return;
252 }
253 tp->t_seq = ntohl(tp->t_seq);
254 tp->t_ackno = ntohl(tp->t_ackno);
255 tp->t_win = ntohs((u_short)tp->t_win);
256 tp->t_urp = ntohs((u_short)tp->t_urp);
257
258 /* record the max fragment size */
259
260 t->t_maxfrag = MAX(t->t_maxfrag, fragsize);
261
262 /* do TCP option processing */
263
264 if (hlen > TCPSIZE)
265 tcp_opt(t, tp, hlen);
266
267 /* check seg seq #, do RST processing */
268
269 NETPREPR(t, tp, inp, i);
270 if (i != SAME)
271 {
272 /* segment failed preprocessing. Drop it and
273 * possibly enter new state. For now, always
274 * returns SAME/-1/CLOSED
275 */
276 m_freem(mp);
277/*
278 if ((i != -1) && (i != CLOSED))
279 t->t_state = i;
280*/
281 }
282 else
283 {
284 if (sbspace(&inp->inp_socket->so_rcv) <= 0 &&
285 tp->t_len != 0)
286 {
287 /*
288 * The user's receive q is full. Either the
289 * remote TCP is not paying attention to the
290 * window, or this is a persistence packet.
291 *
292 * The first reason was once common with
293 * TOPS-20. Let's conserve network resources
294 * by holding onto the packet in the unack q.
295 * Place it at the end of the list.
296 */
297 mp->m_act = NULL;
298 if ((m = t->t_rcv_unack) != NULL)
299 {
300 while (m->m_act != NULL)
301 m = m->m_act;
302 m->m_act = mp;
303 }
304 else
305 t->t_rcv_unack = mp;
306
307 /*
308 * ACK if it was a window probe, just in case
309 * they have a TNOACT timer running.
310 */
311 send_tcp(t, TCP_CTL);
312 }
313 else
314 {
315 int act, newstate;
316 struct socket *so;
317
318 /* set up work entry for seg, and call
319 the fsm to process it */
320
321 hlen += sizeof(struct ip);
322 mp->m_off += hlen;
323 mp->m_len -= hlen;
324
325 /** HAND CODED action() CALL **/
326
327 w.w_type = INRECV;
328 w.w_tcb = t;
329 w.w_dat = (char *)tp;
330
331 /* get index of action routine from
332 * transition table
333 */
334 act = fstab[t->t_state][INRECV];
335
336 /* invalid state transition, just
337 * print a message and ignore */
338
339 if (act == 0)
340 {
8902c2d0 341 log(LOG_INFO, "tcp bad state: tcb=%x state=%d INRECV\n", t, t->t_state);
17efd7fe
MK
342 m_freem(mp);
343 return;
344 }
345
346 so = t->t_in_pcb->inp_socket;
347 tcp_net_keep = FALSE;
348 newstate = (*fsactab[act])(&w);
349
350 /* debugging info */
351 TCP_DEBUG (so, t, &w, act, newstate);
352
353 /* if CLOSED, lost tcpcb */
354 if ((newstate != SAME) && (newstate != CLOSED))
355 t->t_state = newstate;
356 if (! tcp_net_keep)
357 m_freem(mp);
358
359 /** END action() **/
360 }
361 }
362 }
363 else
364 /* nobody wants it */
365 send_uncon_rst (tp, mp, tlen, hlen);
366}
367
368send_uncon_rst (n, mp, tlen, hlen)
369register struct th *n;
370register struct mbuf *mp;
371{
372 struct in_addr tempinaddr;
373 u_short tempport;
374 int error;
375
376 /* make sure we don't send a RST in response to an RST */
377
378 if (n->t_flags & T_RST)
379 {
380 m_freem(mp);
381 return;
382 }
383
384 /* free everything but the header */
385
386 m_freem(mp->m_next);
387 mp->m_next = NULL;
388 mp->m_len = sizeof(struct th);
389
390 /* form a reset from the packet and send */
391
392 tempinaddr = n->t_d;
393 n->t_d = n->t_s;
394 n->t_s = tempinaddr;
395
396 tempport = n->t_src;
397 n->t_src = n->t_dst;
398 n->t_dst = tempport;
399
400 if (n->t_flags&T_ACK)
401 n->t_seq = n->t_ackno;
402 else
403 {
404 n->t_ackno = htonl((u_long)
405 ntohl((u_long)n->t_seq)
406 + tlen - hlen
407 + (n->t_flags&T_SYN ? 1 : 0));
408 n->t_seq = 0;
409 }
410 n->t_flags = (n->t_flags&T_ACK) ? T_RST : T_RST+T_ACK;
411 n->t_len = htons((u_short)TCPSIZE);
412 n->t_off = TCPSIZE >> TCP_OFFSHIFT;
413 n->t_sum = in_cksum(mp, sizeof(struct th));
414
415 NOPCB_IPSEND (mp, TCPSIZE, FALSE, error);
416 tcpstat.t_badsegs++;
417
418#ifdef lint
419 error = error;
420#endif
421}
422
423/*
424 * Entry into TCP finite state machine
425 */
426action(wp)
427register struct work *wp;
428{
429 register act, newstate;
430 register struct tcpcb *tp;
431 register struct socket *so;
432
433 tp = wp->w_tcb;
434 so = tp->t_in_pcb->inp_socket;
435
436 ACTION (tp, so, wp, wp->w_type, wp->w_dat, act, newstate);
437 return(newstate);
438}
439
440
441struct mbuf *tcpdebuf;
442int tcprint;
443
444/*
445 * Write a record in the tcp debugging log
446 */
447tcp_debug(tp, wp, newstate)
448register struct tcpcb *tp;
449register struct work *wp;
450register newstate;
451{
452 register struct t_debug *dp;
453 register struct mbuf *m;
454
455#ifdef TCPDEBUG
456 if (tcprint)
457 {
458 /*
459 * Print debugging info directly on the console (use this for
460 * intial testing only).
461 */
462 printf("TCP(%x) %s X %s", tp, tcpstates[tp->t_state],
463 tcpinputs[wp->w_type]);
464
465 if (wp->w_type == ISTIMER)
466 printf("(%s)", tcptimers[wp->w_stype]);
467
468 printf(" --> %s",
469 tcpstates[ (newstate > 0) ? newstate : tp->t_state]);
470
471 if (newstate < 0)
472 printf(" (FAILED)\n");
473 else
474 putchar('\n', 0);
475 }
476#endif
477
478 /*
479 * Get an mbuf to write the debugging record into. If we don't already
480 * have one, allocate a new one.
481 */
482 if ((m = tcpdebuf) == NULL)
483 {
484 register struct mbuf *c;
485
486 if ((tcpdebuf = m = m_get(M_DONTWAIT, MT_DATA)) == NULL)
487 return;
488 /*
489 * If possible, use a cluster so that we need to wake up the
490 * raw listener less often and reduce likelihood he misses
491 * some information.
492 */
493 MCLGET(c, 1);
494 if (c)
495 {
496 m->m_off = ((int) c) - ((int) m);
497 m->m_act = (struct mbuf *) TCDBLEN;
498 }
499 else
500 m->m_act = (struct mbuf *) TDBLEN;
501 m->m_len = 0;
502 }
503
504 dp = (struct t_debug *) (mtod(m, char *) + m->m_len);
505 /*
506 * Set up the debugging record.
507 */
508 dp->t_iptime = iptime();
509 dp->t_input = wp->w_type;
510 dp->t_timer = wp->w_stype;
511 dp->t_newstate = newstate;
512 if (tp != NULL)
513 {
514 dp->t_oldstate = tp->t_state;
515 dp->t_tcb = (*tp); /* structure copy */
516 }
517 else
518 dp->t_oldstate = 0;
519
520 if (wp->w_type == INRECV)
521 {
522 register struct th *n;
523
524 n = (struct th *)wp->w_dat;
525 dp->t_hdr = (*n); /* structure copy */
526 }
527 /*
528 * If the mbuf is full, dispatch it to a raw listener.
529 * Also flush if the connection we're debugging closes so that
530 * packet-printer/systems analyst sees final transitions.
531 */
532 m->m_len += sizeof(struct t_debug);
533 if ((m->m_len >= ((int) m->m_act)) || (newstate == CLOSED))
534 {
535 m->m_act = 0;
536 tcpdebuglog(m);
537 tcpdebuf = NULL;
538 }
539}