drop small numbers of bytes from ends of packets quickly
[unix-history] / usr / src / sys / netinet / tcp_usrreq.c
CommitLineData
c124e997 1/* tcp_usrreq.c 1.54 82/03/29 */
72f24d7d 2
4eb5d593 3#include "../h/param.h"
72f24d7d 4#include "../h/systm.h"
dad64fdf
BJ
5#include "../h/mbuf.h"
6#include "../h/socket.h"
eee3ab16
BJ
7#include "../h/socketvar.h"
8#include "../h/protosw.h"
0974b45c 9#include "../net/in.h"
c124e997 10#include "../net/route.h"
0974b45c
BJ
11#include "../net/in_pcb.h"
12#include "../net/in_systm.h"
4ad99bae 13#include "../net/if.h"
eee3ab16 14#include "../net/ip.h"
eb44bfb2 15#include "../net/ip_var.h"
eee3ab16 16#include "../net/tcp.h"
eee3ab16 17#include "../net/tcp_fsm.h"
0974b45c
BJ
18#include "../net/tcp_seq.h"
19#include "../net/tcp_timer.h"
eee3ab16 20#include "../net/tcp_var.h"
0974b45c 21#include "../net/tcpip.h"
17b82ed4 22#include "../net/tcp_debug.h"
f1b2fa5b 23#include "../errno.h"
eee3ab16 24
290e0b0a
BJ
25/*
26 * TCP protocol interface to socket abstraction.
27 */
28extern char *tcpstates[];
4ad99bae 29struct tcpcb *tcp_newtcpcb();
290e0b0a 30
9c5022e3 31/*
290e0b0a 32 * Process a TCP user request for TCP tb. If this is a send request
9c5022e3
BJ
33 * then m is the mbuf chain of send data. If this is a timer expiration
34 * (called from the software clock routine), then timertype tells which timer.
35 */
eee3ab16
BJ
36tcp_usrreq(so, req, m, addr)
37 struct socket *so;
38 int req;
9c5022e3 39 struct mbuf *m;
eee3ab16 40 caddr_t addr;
4eb5d593 41{
53a5409e 42 register struct inpcb *inp = sotoinpcb(so);
cdad2eb1 43 register struct tcpcb *tp;
72f24d7d 44 int s = splnet();
eee3ab16 45 int error = 0;
17b82ed4 46 int ostate;
72f24d7d
BJ
47COUNT(TCP_USRREQ);
48
53a5409e 49 /*
290e0b0a
BJ
50 * When a TCP is attached to a socket, then there will be
51 * a (struct inpcb) pointed at by the socket, and this
52 * structure will point at a subsidary (struct tcpcb).
53 * The normal sequence of events is:
54 * PRU_ATTACH creating these structures
55 * PRU_CONNECT connecting to a remote peer
56 * (PRU_SEND|PRU_RCVD)* exchanging data
57 * PRU_DISCONNECT disconnecting from remote peer
58 * PRU_DETACH deleting the structures
59 * With the operations from PRU_CONNECT through PRU_DISCONNECT
60 * possible repeated several times.
61 *
62 * MULTIPLE CONNECTS ARE NOT YET IMPLEMENTED.
53a5409e 63 */
0974b45c 64 if (inp == 0 && req != PRU_ATTACH) {
a6503abf 65 splx(s);
290e0b0a 66 return (EINVAL); /* XXX */
a6503abf
BJ
67 }
68 if (inp) {
cdad2eb1 69 tp = intotcpcb(inp);
9c5022e3 70#ifdef KPROF
a6503abf 71 tcp_acounts[tp->t_state][req]++;
9c5022e3 72#endif
17b82ed4 73 ostate = tp->t_state;
cdad2eb1 74 }
eee3ab16 75 switch (req) {
4eb5d593 76
290e0b0a
BJ
77 /*
78 * TCP attaches to socket via PRU_ATTACH, reserving space,
79 * and internet and TCP control blocks.
80 * If the socket is to receive connections,
81 * then the LISTEN state is entered.
82 */
eee3ab16 83 case PRU_ATTACH:
4ad99bae 84 if (inp) {
eee3ab16 85 error = EISCONN;
cdad2eb1 86 break;
53a5409e 87 }
290e0b0a 88 error = tcp_attach(so, (struct sockaddr *)addr);
a6503abf 89 if (error)
4ad99bae 90 break;
8e65fd66
BJ
91 if ((so->so_options & SO_DONTLINGER) == 0)
92 so->so_linger = TCP_LINGERTIME;
290e0b0a 93 tp = sototcpcb(so);
72f24d7d 94 break;
4eb5d593 95
290e0b0a
BJ
96 /*
97 * PRU_DETACH detaches the TCP protocol from the socket.
98 * If the protocol state is non-embryonic, then can't
99 * do this directly: have to initiate a PRU_DISCONNECT,
100 * which may finish later; embryonic TCB's can just
101 * be discarded here.
102 */
eee3ab16 103 case PRU_DETACH:
290e0b0a
BJ
104 if (tp->t_state > TCPS_LISTEN)
105 tcp_disconnect(tp);
106 else {
107 tcp_close(tp);
108 tp = 0;
109 }
eee3ab16
BJ
110 break;
111
290e0b0a
BJ
112 /*
113 * Initiate connection to peer.
114 * Create a template for use in transmissions on this connection.
115 * Enter SYN_SENT state, and mark socket as connecting.
116 * Start keep-alive timer, and seed output sequence space.
117 * Send initial segment on connection.
118 */
eee3ab16 119 case PRU_CONNECT:
405c9168 120 error = in_pcbconnect(inp, (struct sockaddr_in *)addr);
4ad99bae 121 if (error)
53a5409e 122 break;
b454c3ea 123 tp->t_template = tcp_template(tp);
290e0b0a
BJ
124 if (tp->t_template == 0) {
125 in_pcbdisconnect(inp);
126 error = ENOBUFS;
127 break;
128 }
53a5409e 129 soisconnecting(so);
a6503abf 130 tp->t_state = TCPS_SYN_SENT;
4aed14e3
BJ
131 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP;
132 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
133 tcp_sendseqinit(tp);
f1b2fa5b 134 (void) tcp_output(tp);
72f24d7d 135 break;
4eb5d593 136
290e0b0a
BJ
137 /*
138 * Initiate disconnect from peer.
139 * If connection never passed embryonic stage, just drop;
140 * else if don't need to let data drain, then can just drop anyways,
141 * else have to begin TCP shutdown process: mark socket disconnecting,
142 * drain unread data, state switch to reflect user close, and
143 * send segment (e.g. FIN) to peer. Socket will be really disconnected
144 * when peer sends FIN and acks ours.
145 *
146 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
147 */
148 case PRU_DISCONNECT:
149 tcp_disconnect(tp);
4aed14e3
BJ
150 break;
151
290e0b0a
BJ
152 /*
153 * Accept a connection. Essentially all the work is
154 * done at higher levels; just return the address
155 * of the peer, storing through addr.
156 */
1acff8ec
BJ
157 case PRU_ACCEPT: {
158 struct sockaddr_in *sin = (struct sockaddr_in *)addr;
159
160 if (sin) {
161 bzero((caddr_t)sin, sizeof (*sin));
162 sin->sin_family = AF_INET;
163 sin->sin_port = inp->inp_fport;
164 sin->sin_addr = inp->inp_faddr;
165 }
166 }
eee3ab16
BJ
167 break;
168
290e0b0a
BJ
169 /*
170 * Mark the connection as being incapable of further output.
171 */
eee3ab16 172 case PRU_SHUTDOWN:
0974b45c 173 socantsendmore(so);
4aed14e3
BJ
174 tcp_usrclosed(tp);
175 (void) tcp_output(tp);
72f24d7d
BJ
176 break;
177
290e0b0a
BJ
178 /*
179 * After a receive, possibly send window update to peer.
180 */
eee3ab16 181 case PRU_RCVD:
f1b2fa5b 182 (void) tcp_output(tp);
72f24d7d
BJ
183 break;
184
290e0b0a
BJ
185 /*
186 * Do a send by putting data in output queue and updating urgent
187 * marker if URG set. Possibly send more data.
188 */
eee3ab16 189 case PRU_SEND:
a6503abf 190 sbappend(&so->so_snd, m);
0974b45c
BJ
191/*
192 if (tp->t_flags & TF_PUSH)
a6503abf 193 tp->snd_end = tp->snd_una + so->so_snd.sb_cc;
0974b45c 194 */
f1b2fa5b 195 (void) tcp_output(tp);
72f24d7d
BJ
196 break;
197
290e0b0a
BJ
198 /*
199 * Abort the TCP.
200 */
eee3ab16 201 case PRU_ABORT:
a6503abf 202 tcp_drop(tp, ECONNABORTED);
72f24d7d
BJ
203 break;
204
290e0b0a 205/* SOME AS YET UNIMPLEMENTED HOOKS */
eee3ab16 206 case PRU_CONTROL:
53a5409e 207 error = EOPNOTSUPP;
eee3ab16
BJ
208 break;
209
f1b2fa5b
BJ
210 case PRU_SENSE:
211 error = EOPNOTSUPP;
212 break;
0244dbc7 213/* END UNIMPLEMENTED HOOKS */
f1b2fa5b
BJ
214
215 case PRU_RCVOOB:
8b5a83bb
BJ
216 if (so->so_oobmark == 0 &&
217 (so->so_state & SS_RCVATMARK) == 0) {
0244dbc7
BJ
218 error = EINVAL;
219 break;
220 }
b2db9217 221 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
8b5a83bb 222 error = EWOULDBLOCK;
b2db9217 223 break;
8b5a83bb 224 }
b2db9217 225 *mtod(m, caddr_t) = tp->t_iobc;
f1b2fa5b
BJ
226 break;
227
228 case PRU_SENDOOB:
b2db9217
BJ
229#ifdef TCPTRUEOOB
230 if (tp->t_flags & TF_DOOOB) {
231 tp->t_oobseq++;
232 tp->t_oobc = *mtod(m, caddr_t);
233 tp->t_oobmark = tp->snd_una + so->so_snd.sb_cc;
234printf("sendoob seq now %x oobc %x\n", tp->t_oobseq, tp->t_oobc);
235 tp->t_oobflags |= TCPOOB_NEEDACK;
236 (void) tcp_output(tp);
237 }
238#endif
8b5a83bb
BJ
239 if (sbspace(&so->so_snd) < -512) {
240 error = ENOBUFS;
241 break;
242 }
0244dbc7
BJ
243 tp->snd_up = tp->snd_una + so->so_snd.sb_cc + 1;
244 sbappend(&so->so_snd, m);
245/*
246 if (tp->t_flags & TF_PUSH)
247 tp->snd_end = tp->snd_una + so->so_snd.sb_cc;
248 */
b2db9217
BJ
249 tp->t_force = 1;
250 (void) tcp_output(tp);
251 tp->t_force = 0;
f1b2fa5b
BJ
252 break;
253
290e0b0a
BJ
254 /*
255 * TCP slow timer went off; going through this
256 * routine for tracing's sake.
257 */
eee3ab16 258 case PRU_SLOWTIMO:
a6503abf 259 tcp_timers(tp, (int)addr);
17b82ed4 260 req |= (int)addr << 8; /* for debug's sake */
eee3ab16
BJ
261 break;
262
9c5022e3
BJ
263 default:
264 panic("tcp_usrreq");
72f24d7d 265 }
17b82ed4
BJ
266 if (tp && (so->so_options & SO_DEBUG))
267 tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
72f24d7d 268 splx(s);
53a5409e 269 return (error);
4eb5d593 270}
4aed14e3 271
306f91c9
BJ
272int tcp_sendspace = 1024*2;
273int tcp_recvspace = 1024*3;
290e0b0a
BJ
274/*
275 * Attach TCP protocol to socket, allocating
276 * internet protocol control block, tcp control block,
277 * bufer space, and entering LISTEN state if to accept connections.
278 */
279tcp_attach(so, sa)
280 struct socket *so;
281 struct sockaddr *sa;
282{
283 register struct tcpcb *tp;
284 struct inpcb *inp;
285 int error;
286
306f91c9
BJ
287 error = in_pcbattach(so, &tcb,
288 tcp_sendspace, tcp_recvspace, (struct sockaddr_in *)sa);
290e0b0a
BJ
289 if (error)
290 return (error);
291 inp = (struct inpcb *)so->so_pcb;
292 tp = tcp_newtcpcb(inp);
293 if (so->so_options & SO_ACCEPTCONN) {
294 if (tp == 0) {
295 in_pcbdetach(inp);
296 return (ENOBUFS);
297 }
298 tp->t_state = TCPS_LISTEN;
299 } else
300 tp->t_state = TCPS_CLOSED;
301 return (0);
302}
303
304/*
305 * Initiate (or continue) disconnect.
306 * If embryonic state, just send reset (once).
307 * If not in ``let data drain'' option, just drop.
308 * Otherwise (hard), mark socket disconnecting and drop
309 * current input data; switch states based on user close, and
310 * send segment to peer (with FIN).
311 */
312tcp_disconnect(tp)
313 struct tcpcb *tp;
314{
315 struct socket *so = tp->t_inpcb->inp_socket;
316
317 if (tp->t_state < TCPS_ESTABLISHED)
318 tcp_close(tp);
8e65fd66 319 else if (so->so_linger == 0)
290e0b0a
BJ
320 tcp_drop(tp, 0);
321 else {
322 soisdisconnecting(so);
323 sbflush(&so->so_rcv);
324 tcp_usrclosed(tp);
325 (void) tcp_output(tp);
326 }
327}
328
329/*
330 * User issued close, and wish to trail through shutdown states:
331 * if never received SYN, just forget it. If got a SYN from peer,
332 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
333 * If already got a FIN from peer, then almost done; go to LAST_ACK
334 * state. In all other cases, have already sent FIN to peer (e.g.
335 * after PRU_SHUTDOWN), and just have to play tedious game waiting
336 * for peer to send FIN or not respond to keep-alives, etc.
085a0b90 337 * We can let the user exit from the close as soon as the FIN is acked.
290e0b0a 338 */
4aed14e3
BJ
339tcp_usrclosed(tp)
340 struct tcpcb *tp;
341{
342
4aed14e3
BJ
343 switch (tp->t_state) {
344
345 case TCPS_LISTEN:
346 case TCPS_SYN_SENT:
347 tp->t_state = TCPS_CLOSED;
348 tcp_close(tp);
349 break;
350
351 case TCPS_SYN_RECEIVED:
352 case TCPS_ESTABLISHED:
353 tp->t_state = TCPS_FIN_WAIT_1;
354 break;
355
356 case TCPS_CLOSE_WAIT:
357 tp->t_state = TCPS_LAST_ACK;
358 break;
359 }
085a0b90
BJ
360 if (tp->t_state >= TCPS_FIN_WAIT_2)
361 soisdisconnected(tp->t_inpcb->inp_socket);
4aed14e3 362}