include file for per-architecture Makefiles
[unix-history] / usr / src / sys / netiso / tp_subr.c
CommitLineData
7bcd1bb8
KB
1/*-
2 * Copyright (c) 1991 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
6ca227ac 7 * @(#)tp_subr.c 7.17 (Berkeley) %G%
7bcd1bb8
KB
8 */
9
a2c3ecd0
KS
10/***********************************************************
11 Copyright IBM Corporation 1987
12
13 All Rights Reserved
14
15Permission to use, copy, modify, and distribute this software and its
16documentation for any purpose and without fee is hereby granted,
17provided that the above copyright notice appear in all copies and that
18both that copyright notice and this permission notice appear in
19supporting documentation, and that the name of IBM not be
20used in advertising or publicity pertaining to distribution of the
21software without specific, written prior permission.
22
23IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
24ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
25IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
26ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
27WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
28ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
29SOFTWARE.
30
31******************************************************************/
32
33/*
34 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
35 */
36/*
37 * ARGO TP
38 *
39 * $Header: tp_subr.c,v 5.3 88/11/18 17:28:43 nhall Exp $
40 * $Source: /usr/argo/sys/netiso/RCS/tp_subr.c,v $
41 *
42 * The main work of data transfer is done here.
43 * These routines are called from tp.trans.
44 * They include the routines that check the validity of acks and Xacks,
45 * (tp_goodack() and tp_goodXack() )
46 * take packets from socket buffers and send them (tp_send()),
47 * drop the data from the socket buffers (tp_sbdrop()),
48 * and put incoming packet data into socket buffers (tp_stash()).
49 */
50
a2c3ecd0
KS
51#include "param.h"
52#include "mbuf.h"
53#include "socket.h"
54#include "socketvar.h"
55#include "protosw.h"
56#include "errno.h"
57#include "types.h"
58#include "time.h"
38f1e20d 59#include "kernel.h"
a2c3ecd0 60
a50e2bc0
KS
61#include "tp_ip.h"
62#include "iso.h"
63#include "argo_debug.h"
64#include "tp_timer.h"
65#include "tp_param.h"
66#include "tp_stat.h"
67#include "tp_pcb.h"
68#include "tp_tpdu.h"
69#include "tp_trace.h"
70#include "tp_meas.h"
71#include "tp_seq.h"
a2c3ecd0 72
bdf41b09
KS
73int tp_emit(), tp_sbdrop();
74int tprexmtthresh = 3;
6ca227ac 75extern int ticks;
bdf41b09 76void tp_send();
a2c3ecd0
KS
77
78/*
79 * CALLED FROM:
80 * tp.trans, when an XAK arrives
81 * FUNCTION and ARGUMENTS:
82 * Determines if the sequence number (seq) from the XAK
83 * acks anything new. If so, drop the appropriate tpdu
84 * from the XPD send queue.
85 * RETURN VALUE:
86 * Returns 1 if it did this, 0 if the ack caused no action.
87 */
88int
89tp_goodXack(tpcb, seq)
90 struct tp_pcb *tpcb;
91 SeqNum seq;
92{
93
94 IFTRACE(D_XPD)
95 tptraceTPCB(TPPTgotXack,
bdf41b09 96 seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
a2c3ecd0
KS
97 tpcb->tp_snduna);
98 ENDTRACE
99
100 if ( seq == tpcb->tp_Xuna ) {
101 tpcb->tp_Xuna = tpcb->tp_Xsndnxt;
102
103 /* DROP 1 packet from the Xsnd socket buf - just so happens
104 * that only one packet can be there at any time
105 * so drop the whole thing. If you allow > 1 packet
106 * the socket buffer, then you'll have to keep
107 * track of how many characters went w/ each XPD tpdu, so this
108 * will get messier
109 */
110 IFDEBUG(D_XPD)
111 dump_mbuf(tpcb->tp_Xsnd.sb_mb,
112 "tp_goodXack Xsnd before sbdrop");
113 ENDDEBUG
114
115 IFTRACE(D_XPD)
116 tptraceTPCB(TPPTmisc,
117 "goodXack: dropping cc ",
118 (int)(tpcb->tp_Xsnd.sb_cc),
119 0,0,0);
120 ENDTRACE
bdf41b09 121 sbdroprecord(&tpcb->tp_Xsnd);
a2c3ecd0
KS
122 return 1;
123 }
124 return 0;
125}
126
127/*
128 * CALLED FROM:
129 * tp_good_ack()
130 * FUNCTION and ARGUMENTS:
131 * updates
38f1e20d
KS
132 * smoothed average round trip time (*rtt)
133 * roundtrip time variance (*rtv) - actually deviation, not variance
a2c3ecd0
KS
134 * given the new value (diff)
135 * RETURN VALUE:
136 * void
137 */
138
139void
bdf41b09
KS
140tp_rtt_rtv(tpcb)
141register struct tp_pcb *tpcb;
a2c3ecd0 142{
6ca227ac
KS
143 int old = tpcb->tp_rtt;
144 int delta, elapsed = ticks - tpcb->tp_rttemit;
bdf41b09
KS
145
146 if (tpcb->tp_rtt != 0) {
147 /*
148 * rtt is the smoothed round trip time in machine clock ticks (hz).
ef8ed661
KS
149 * It is stored as a fixed point number, unscaled (unlike the tcp
150 * srtt). The rationale here is that it is only significant to the
bdf41b09
KS
151 * nearest unit of slowtimo, which is at least 8 machine clock ticks
152 * so there is no need to scale. The smoothing is done according
153 * to the same formula as TCP (rtt = rtt*7/8 + measured_rtt/8).
154 */
155 delta = elapsed - tpcb->tp_rtt;
156 if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
157 tpcb->tp_rtt = 1;
158 /*
159 * rtv is a smoothed accumulated mean difference, unscaled
160 * for reasons expressed above.
161 * It is smoothed with an alpha of .75, and the round trip timer
162 * will be set to rtt + 4*rtv, also as TCP does.
163 */
164 if (delta < 0)
165 delta = -delta;
166 if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
167 tpcb->tp_rtv = 1;
168 } else {
169 /*
170 * No rtt measurement yet - use the unsmoothed rtt.
171 * Set the variance to half the rtt (so our first
172 * retransmit happens at 3*rtt)
173 */
174 tpcb->tp_rtt = elapsed;
175 tpcb->tp_rtv = elapsed >> 1;
176 }
177 tpcb->tp_rttemit = 0;
178 tpcb->tp_rxtshift = 0;
179 /*
180 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
181 * Because of the way we do the smoothing, srtt and rttvar
182 * will each average +1/2 tick of bias. When we compute
183 * the retransmit timer, we want 1/2 tick of rounding and
184 * 1 extra tick because of +-1/2 tick uncertainty in the
185 * firing of the timer. The bias will give us exactly the
186 * 1.5 tick we need. But, because the bias is
187 * statistical, we have to test that we don't drop below
188 * the minimum feasible timer (which is 2 ticks)."
189 */
ef8ed661
KS
190 TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
191 tpcb->tp_peer_acktime, 128 /* XXX */);
6ca227ac
KS
192 IFDEBUG(D_RTT)
193 printf("%s tpcb 0x%x, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
194 "tp_rtt_rtv:",tpcb,elapsed,delta,tpcb->tp_rtt,tpcb->tp_rtv,old);
195 ENDDEBUG
ef8ed661 196 tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
a2c3ecd0
KS
197}
198
199/*
200 * CALLED FROM:
201 * tp.trans when an AK arrives
202 * FUNCTION and ARGUMENTS:
203 * Given (cdt), the credit from the AK tpdu, and
204 * (seq), the sequence number from the AK tpdu,
205 * tp_goodack() determines if the AK acknowledges something in the send
206 * window, and if so, drops the appropriate packets from the retransmission
207 * list, computes the round trip time, and updates the retransmission timer
208 * based on the new smoothed round trip time.
209 * RETURN VALUE:
210 * Returns 1 if
211 * EITHER it actually acked something heretofore unacknowledged
212 * OR no news but the credit should be processed.
213 * If something heretofore unacked was acked with this sequence number,
214 * the appropriate tpdus are dropped from the retransmission control list,
215 * by calling tp_sbdrop().
216 * No need to see the tpdu itself.
217 */
218int
219tp_goodack(tpcb, cdt, seq, subseq)
220 register struct tp_pcb *tpcb;
221 u_int cdt;
bdf41b09
KS
222 register SeqNum seq;
223 u_int subseq;
a2c3ecd0 224{
bdf41b09 225 int old_fcredit;
a2c3ecd0 226 int bang = 0; /* bang --> ack for something heretofore unacked */
bdf41b09 227 u_int bytes_acked;
a2c3ecd0
KS
228
229 IFDEBUG(D_ACKRECV)
bdf41b09
KS
230 printf("goodack tpcb 0x%x seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
231 tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
a2c3ecd0
KS
232 ENDDEBUG
233 IFTRACE(D_ACKRECV)
234 tptraceTPCB(TPPTgotack,
bdf41b09 235 seq,cdt, tpcb->tp_snduna,tpcb->tp_sndnew,subseq);
a2c3ecd0
KS
236 ENDTRACE
237
238 IFPERF(tpcb)
a50e2bc0 239 tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *)0, seq, 0, 0);
a2c3ecd0
KS
240 ENDPERF
241
bdf41b09
KS
242 if (seq == tpcb->tp_snduna) {
243 if (subseq < tpcb->tp_r_subseq ||
244 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
245 discard_the_ack:
246 IFDEBUG(D_ACKRECV)
247 printf("goodack discard : tpcb 0x%x subseq %d r_subseq %d\n",
248 tpcb, subseq, tpcb->tp_r_subseq);
249 ENDDEBUG
250 goto done;
251 }
252 if (cdt == tpcb->tp_fcredit /*&& thus subseq > tpcb->tp_r_subseq */) {
253 tpcb->tp_r_subseq = subseq;
254 if (tpcb->tp_timer[TM_data_retrans] == 0)
255 tpcb->tp_dupacks = 0;
256 else if (++tpcb->tp_dupacks == tprexmtthresh) {
257 /* partner went out of his way to signal with different
258 subsequences that he has the same lack of an expected
259 packet. This may be an early indiciation of a loss */
260
261 SeqNum onxt = tpcb->tp_sndnxt;
262 struct mbuf *onxt_m = tpcb->tp_sndnxt_m;
263 u_int win = min(tpcb->tp_fcredit,
264 tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
6ca227ac
KS
265 IFDEBUG(D_ACKRECV)
266 printf("%s tpcb 0x%x seq 0x%x rttseq 0x%x onxt 0x%x\n",
267 "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
268 ENDDEBUG
bdf41b09
KS
269 if (win < 2)
270 win = 2;
271 tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
272 tpcb->tp_timer[TM_data_retrans] = 0;
273 tpcb->tp_rttemit = 0;
274 tpcb->tp_sndnxt = tpcb->tp_snduna;
275 tpcb->tp_sndnxt_m = 0;
276 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
277 tp_send(tpcb);
278 tpcb->tp_cong_win = tpcb->tp_ssthresh +
279 tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
280 if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
281 tpcb->tp_sndnxt = onxt;
282 tpcb->tp_sndnxt_m = onxt_m;
283 }
284
285 } else if (tpcb->tp_dupacks > tprexmtthresh) {
286 tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
287 }
288 goto done;
289 }
290 } else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
291 goto discard_the_ack;
292 /*
293 * If the congestion window was inflated to account
294 * for the other side's cached packets, retract it.
295 */
296 if (tpcb->tp_dupacks > tprexmtthresh &&
297 tpcb->tp_cong_win > tpcb->tp_ssthresh)
298 tpcb->tp_cong_win = tpcb->tp_ssthresh;
299 tpcb->tp_r_subseq = subseq;
300 old_fcredit = tpcb->tp_fcredit;
301 tpcb->tp_fcredit = cdt;
302 if (cdt > tpcb->tp_maxfcredit)
303 tpcb->tp_maxfcredit = cdt;
304 tpcb->tp_dupacks = 0;
a2c3ecd0 305
bdf41b09 306 if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {
a2c3ecd0 307
6bf9da92 308 tpsbcheck(tpcb, 0);
bdf41b09 309 bytes_acked = tp_sbdrop(tpcb, seq);
6bf9da92 310 tpsbcheck(tpcb, 1);
bdf41b09
KS
311 /*
312 * If transmit timer is running and timed sequence
313 * number was acked, update smoothed round trip time.
314 * Since we now have an rtt measurement, cancel the
315 * timer backoff (cf., Phil Karn's retransmit alg.).
316 * Recompute the initial retransmit timer.
a2c3ecd0 317 */
bdf41b09
KS
318 if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
319 tp_rtt_rtv(tpcb);
320 /*
321 * If all outstanding data is acked, stop retransmit timer.
322 * If there is more data to be acked, restart retransmit
323 * timer, using current (possibly backed-off) value.
324 * OSI combines the keepalive and persistance functions.
325 * So, there is no persistance timer per se, to restart.
326 */
327 tpcb->tp_timer[TM_data_retrans] =
328 (seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
329 /*
330 * When new data is acked, open the congestion window.
331 * If the window gives us less than ssthresh packets
332 * in flight, open exponentially (maxseg per packet).
333 * Otherwise open linearly: maxseg per window
334 * (maxseg^2 / cwnd per packet), plus a constant
335 * fraction of a packet (maxseg/8) to help larger windows
336 * open quickly enough.
337 */
338 {
339 u_int cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;
38f1e20d 340
bdf41b09
KS
341 incr = min(incr, bytes_acked);
342 if (cw > tpcb->tp_ssthresh)
343 incr = incr * incr / cw + incr / 8;
344 tpcb->tp_cong_win =
345 min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
a2c3ecd0
KS
346 }
347 tpcb->tp_snduna = seq;
bdf41b09
KS
348 if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
349 tpcb->tp_sndnxt = seq;
350 tpcb->tp_sndnxt_m = 0;
351 }
a2c3ecd0
KS
352 bang++;
353 }
354
355 if( cdt != 0 && old_fcredit == 0 ) {
356 tpcb->tp_sendfcc = 1;
357 }
ef8ed661
KS
358 if (cdt == 0) {
359 if (old_fcredit != 0)
360 IncStat(ts_zfcdt);
361 /* The following might mean that the window shrunk */
362 if (tpcb->tp_timer[TM_data_retrans]) {
363 tpcb->tp_timer[TM_data_retrans] = 0;
364 tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
365 if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
366 tpcb->tp_sndnxt = tpcb->tp_snduna;
367 tpcb->tp_sndnxt_m = 0;
368 }
369 }
a2c3ecd0
KS
370 }
371 tpcb->tp_fcredit = cdt;
bdf41b09 372 bang |= (old_fcredit < cdt);
a2c3ecd0 373
bdf41b09 374done:
a2c3ecd0 375 IFDEBUG(D_ACKRECV)
bdf41b09
KS
376 printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%x\n",
377 bang, cdt, old_fcredit, tpcb->tp_cong_win);
a2c3ecd0 378 ENDDEBUG
bdf41b09
KS
379 /* if (bang) XXXXX Very bad to remove this test, but somethings broken */
380 tp_send(tpcb);
381 return (bang);
a2c3ecd0
KS
382}
383
384/*
385 * CALLED FROM:
386 * tp_goodack()
387 * FUNCTION and ARGUMENTS:
6bf9da92 388 * drops everything up TO but not INCLUDING seq # (seq)
a2c3ecd0
KS
389 * from the retransmission queue.
390 */
a2c3ecd0 391tp_sbdrop(tpcb, seq)
6bf9da92 392 register struct tp_pcb *tpcb;
a2c3ecd0
KS
393 SeqNum seq;
394{
6bf9da92 395 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
bdf41b09
KS
396 register int i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
397 int oldcc = sb->sb_cc, oldi = i;
a2c3ecd0 398
bdf41b09
KS
399 if (i >= tpcb->tp_seqhalf)
400 printf("tp_spdropping too much -- should panic");
6bf9da92
KS
401 while (i-- > 0)
402 sbdroprecord(sb);
bdf41b09
KS
403 IFDEBUG(D_ACKRECV)
404 printf("tp_sbdroping %d pkts %d bytes on %x at 0x%x\n",
405 oldi, oldcc - sb->sb_cc, tpcb, seq);
406 ENDDEBUG
407 if (sb->sb_flags & SB_NOTIFY)
408 sowwakeup(tpcb->tp_sock);
38f1e20d 409 return (oldcc - sb->sb_cc);
a2c3ecd0
KS
410}
411
412/*
413 * CALLED FROM:
414 * tp.trans on user send request, arrival of AK and arrival of XAK
415 * FUNCTION and ARGUMENTS:
bdf41b09 416 * Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
a2c3ecd0 417 * Emits until a) runs out of data, or b) runs into an XPD mark, or
bdf41b09 418 * c) it hits seq number (highseq) limited by cong or credit.
a2c3ecd0
KS
419 *
420 * If you want XPD to buffer > 1 du per socket buffer, you can
421 * modifiy this to issue XPD tpdus also, but then it'll have
422 * to take some argument(s) to distinguish between the type of DU to
6bf9da92 423 * hand tp_emit.
a2c3ecd0
KS
424 *
425 * When something is sent for the first time, its time-of-send
bdf41b09
KS
426 * is stashed (in system clock ticks rather than pf_slowtimo ticks).
427 * When the ack arrives, the smoothed round-trip time is figured
428 * using this value.
a2c3ecd0 429 */
bdf41b09 430void
a2c3ecd0
KS
431tp_send(tpcb)
432 register struct tp_pcb *tpcb;
433{
434 register int len;
bdf41b09
KS
435 register struct mbuf *m;
436 struct mbuf *mb = 0;
a2c3ecd0 437 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
bdf41b09
KS
438 unsigned int eotsdu = 0;
439 SeqNum highseq, checkseq;
440 int idle, idleticks, off, cong_win;
a2c3ecd0 441#ifdef TP_PERF_MEAS
6ca227ac 442 int send_start_time = ticks;
bdf41b09 443 SeqNum oldnxt = tpcb->tp_sndnxt;
a2c3ecd0
KS
444#endif TP_PERF_MEAS
445
bdf41b09
KS
446 idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
447 if (idle) {
448 idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
449 if (idleticks > tpcb->tp_dt_ticks)
450 /*
451 * We have been idle for "a while" and no acks are
452 * expected to clock out any data we send --
453 * slow start to get ack "clock" running again.
454 */
455 tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
a2c3ecd0 456 }
a2c3ecd0 457
bdf41b09
KS
458 cong_win = tpcb->tp_cong_win;
459 highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
460 if (tpcb->tp_Xsnd.sb_mb)
461 highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);
462
a2c3ecd0 463 IFDEBUG(D_DATA)
bdf41b09
KS
464 printf("tp_send enter tpcb 0x%x nxt 0x%x win %d high 0x%x\n",
465 tpcb, tpcb->tp_sndnxt, cong_win, highseq);
a2c3ecd0
KS
466 ENDDEBUG
467 IFTRACE(D_DATA)
bdf41b09
KS
468 tptraceTPCB( TPPTmisc, "tp_send sndnew snduna",
469 tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
470 tptraceTPCB( TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
471 tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
a2c3ecd0 472 ENDTRACE
a2c3ecd0 473 IFTRACE(D_DATA)
bdf41b09
KS
474 tptraceTPCB( TPPTmisc, "tp_send 2 nxt high fcredit congwin",
475 tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
a2c3ecd0
KS
476 ENDTRACE
477
bdf41b09
KS
478 if (tpcb->tp_sndnxt_m)
479 m = tpcb->tp_sndnxt_m;
480 else {
481 off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
482 for (m = sb->sb_mb; m && off > 0; m = m->m_next)
483 off--;
484 }
485send:
486 /*
487 * Avoid silly window syndrome here . . . figure out how!
488 */
489 checkseq = tpcb->tp_sndnum;
490 if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
ef8ed661 491 checkseq = highseq; /* i.e. DON'T retain highest assigned packet */
bdf41b09
KS
492
493 while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {
494
495 eotsdu = (m->m_flags & M_EOR) != 0;
496 len = m->m_pkthdr.len;
497 if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
498 len < (tpcb->tp_l_tpdusize / 2))
499 break; /* Nagle . . . . . */
500 cong_win -= len;
a2c3ecd0
KS
501 /* make a copy - mb goes into the retransmission list
502 * while m gets emitted. m_copy won't copy a zero-length mbuf.
503 */
bdf41b09 504 mb = m;
6bf9da92
KS
505 m = m_copy(mb, 0, M_COPYALL);
506 if (m == MNULL)
bdf41b09
KS
507 break;
508 IFTRACE(D_STASH)
a2c3ecd0 509 tptraceTPCB( TPPTmisc,
bdf41b09
KS
510 "tp_send mcopy nxt high eotsdu len",
511 tpcb->tp_sndnxt, highseq, eotsdu, len);
a2c3ecd0 512 ENDTRACE
bdf41b09
KS
513
514 IFDEBUG(D_DATA)
515 printf("tp_sending tpcb 0x%x nxt 0x%x\n",
516 tpcb, tpcb->tp_sndnxt);
517 ENDDEBUG
518 /* when headers are precomputed, may need to fill
6bf9da92 519 in checksum here */
6bf9da92 520 if (tpcb->tp_sock->so_error =
bdf41b09 521 tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m)) {
a2c3ecd0 522 /* error */
bdf41b09 523 break;
a2c3ecd0 524 }
bdf41b09
KS
525 m = mb->m_nextpkt;
526 tpcb->tp_sndnxt_m = m;
527 if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
528 SEQ_INC(tpcb, tpcb->tp_sndnew);
529 /*
530 * Time this transmission if not a retransmission and
531 * not currently timing anything.
532 */
533 if (tpcb->tp_rttemit == 0) {
6ca227ac 534 tpcb->tp_rttemit = ticks;
bdf41b09
KS
535 tpcb->tp_rttseq = tpcb->tp_sndnxt;
536 }
537 tpcb->tp_sndnxt = tpcb->tp_sndnew;
538 } else
539 SEQ_INC(tpcb, tpcb->tp_sndnxt);
540 /*
541 * Set retransmit timer if not currently set.
542 * Initial value for retransmit timer is smoothed
543 * round-trip time + 2 * round-trip time variance.
544 * Initialize shift counter which is used for backoff
545 * of retransmit time.
546 */
547 if (tpcb->tp_timer[TM_data_retrans] == 0) {
548 tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
549 tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
550 tpcb->tp_rxtshift = 0;
38f1e20d 551 }
a2c3ecd0 552 }
bdf41b09
KS
553 if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
554 tpcb->tp_oktonagle = 0;
67857e5a 555#ifdef TP_PERF_MEAS
a2c3ecd0
KS
556 IFPERF(tpcb)
557 {
558 register int npkts;
6ca227ac 559 int elapsed = ticks - send_start_time, *t;
38f1e20d 560 struct timeval now;
a2c3ecd0 561
bdf41b09 562 npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);
a2c3ecd0 563
bdf41b09 564 if (npkts > 0)
a2c3ecd0
KS
565 tpcb->tp_Nwindow++;
566
567 if (npkts > TP_PM_MAX)
568 npkts = TP_PM_MAX;
569
a2c3ecd0 570 t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
38f1e20d 571 *t += (t - elapsed) >> TP_RTT_ALPHA;
a2c3ecd0 572
bdf41b09 573 if (mb == 0) {
a2c3ecd0
KS
574 IncPStat(tpcb, tps_win_lim_by_data[npkts] );
575 } else {
576 IncPStat(tpcb, tps_win_lim_by_cdt[npkts] );
577 /* not true with congestion-window being used */
578 }
38f1e20d
KS
579 now.tv_sec = elapsed / hz;
580 now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
a2c3ecd0 581 tpmeas( tpcb->tp_lref,
bdf41b09 582 TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
a2c3ecd0
KS
583 }
584 ENDPERF
67857e5a 585#endif TP_PERF_MEAS
a2c3ecd0 586
a2c3ecd0 587
a2c3ecd0
KS
588 IFTRACE(D_DATA)
589 tptraceTPCB( TPPTmisc,
bdf41b09
KS
590 "tp_send at end: new nxt eotsdu error",
591 tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu, tpcb->tp_sock->so_error);
a2c3ecd0
KS
592
593 ENDTRACE
594}
595
6bf9da92
KS
596int TPNagleok;
597int TPNagled;
598
599tp_packetize(tpcb, m, eotsdu)
600register struct tp_pcb *tpcb;
601register struct mbuf *m;
602int eotsdu;
603{
604 register struct mbuf *n;
605 register struct sockbuf *sb = &tpcb->tp_sock->so_snd;
606 int maxsize = tpcb->tp_l_tpdusize
607 - tp_headersize(DT_TPDU_type, tpcb)
608 - (tpcb->tp_use_checksum?4:0) ;
609 int totlen = m->m_pkthdr.len;
610 struct mbuf *m_split();
611 /*
612 * Pre-packetize the data in the sockbuf
613 * according to negotiated mtu. Do it here
614 * where we can safely wait for mbufs.
615 *
616 * This presumes knowledge of sockbuf conventions.
617 * TODO: allocate space for header and fill it in (once!).
618 */
bdf41b09
KS
619 IFDEBUG(D_DATA)
620 printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
621 maxsize, totlen, eotsdu, tpcb->tp_sndnum);
6bf9da92
KS
622 ENDTRACE
623 if (tpcb->tp_oktonagle) {
624 if ((n = sb->sb_mb) == 0)
625 panic("tp_packetize");
626 while (n->m_act)
627 n = n->m_act;
628 if (n->m_flags & M_EOR)
629 panic("tp_packetize 2");
630 SEQ_INC(tpcb, tpcb->tp_sndnum);
631 if (totlen + n->m_pkthdr.len < maxsize) {
632 /* There is an unsent packet with space, combine data */
633 struct mbuf *old_n = n;
634 tpsbcheck(tpcb,3);
635 n->m_pkthdr.len += totlen;
636 while (n->m_next)
637 n = n->m_next;
638 sbcompress(sb, m, n);
639 tpsbcheck(tpcb,4);
640 n = old_n;
641 TPNagled++;
642 goto out;
643 }
644 }
645 while (m) {
646 n = m;
647 if (totlen > maxsize) {
648 if ((m = m_split(n, maxsize, M_WAIT)) == 0)
649 panic("tp_packetize");
650 } else
651 m = 0;
652 totlen -= maxsize;
653 tpsbcheck(tpcb, 5);
654 sbappendrecord(sb, n);
655 tpsbcheck(tpcb, 6);
656 SEQ_INC(tpcb, tpcb->tp_sndnum);
657 }
658out:
659 if (eotsdu) {
660 n->m_flags |= M_EOR; /* XXX belongs at end */
661 tpcb->tp_oktonagle = 0;
662 } else {
663 SEQ_DEC(tpcb, tpcb->tp_sndnum);
664 tpcb->tp_oktonagle = 1;
665 TPNagleok++;
666 }
bdf41b09
KS
667 IFDEBUG(D_DATA)
668 printf("SEND out: oktonagle %d sndnum 0x%x\n",
669 tpcb->tp_oktonagle, tpcb->tp_sndnum);
670 ENDTRACE
6bf9da92
KS
671 return 0;
672}
673
674
a2c3ecd0
KS
675/*
676 * NAME: tp_stash()
677 * CALLED FROM:
678 * tp.trans on arrival of a DT tpdu
679 * FUNCTION, ARGUMENTS, and RETURN VALUE:
680 * Returns 1 if
681 * a) something new arrived and it's got eotsdu_reached bit on,
682 * b) this arrival was caused other out-of-sequence things to be
683 * accepted, or
684 * c) this arrival is the highest seq # for which we last gave credit
685 * (sender just sent a whole window)
686 * In other words, returns 1 if tp should send an ack immediately, 0 if
687 * the ack can wait a while.
688 *
689 * Note: this implementation no longer renegs on credit, (except
690 * when debugging option D_RENEG is on, for the purpose of testing
691 * ack subsequencing), so we don't need to check for incoming tpdus
692 * being in a reneged portion of the window.
693 */
694
bdf41b09 695tp_stash(tpcb, e)
a2c3ecd0
KS
696 register struct tp_pcb *tpcb;
697 register struct tp_event *e;
698{
699 register int ack_reason= tpcb->tp_ack_strat & ACK_STRAT_EACH;
700 /* 0--> delay acks until full window */
701 /* 1--> ack each tpdu */
a2c3ecd0
KS
702#ifndef lint
703#define E e->ATTR(DT_TPDU)
704#else lint
705#define E e->ev_union.EV_DT_TPDU
706#endif lint
707
708 if ( E.e_eot ) {
709 register struct mbuf *n = E.e_data;
a50e2bc0 710 n->m_flags |= M_EOR;
44f52ea5 711 n->m_act = 0;
a50e2bc0 712 }
a2c3ecd0 713 IFDEBUG(D_STASH)
a2c3ecd0
KS
714 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
715 "stash: so_rcv before appending");
716 dump_mbuf(E.e_data,
717 "stash: e_data before appending");
718 ENDDEBUG
a2c3ecd0
KS
719
720 IFPERF(tpcb)
721 PStat(tpcb, Nb_from_ll) += E.e_datalen;
722 tpmeas(tpcb->tp_lref, TPtime_from_ll, &e->e_time,
a50e2bc0 723 E.e_seq, (u_int)PStat(tpcb, Nb_from_ll), (u_int)E.e_datalen);
a2c3ecd0
KS
724 ENDPERF
725
13cdf5ec 726 if (E.e_seq == tpcb->tp_rcvnxt) {
a2c3ecd0
KS
727
728 IFDEBUG(D_STASH)
729 printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
730 E.e_seq, E.e_datalen, E.e_eot);
731 ENDDEBUG
732
733 IFTRACE(D_STASH)
734 tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
735 E.e_seq, E.e_datalen, E.e_eot, 0);
736 ENDTRACE
737
13cdf5ec
KS
738 SET_DELACK(tpcb);
739
a50e2bc0
KS
740 sbappend(&tpcb->tp_sock->so_rcv, E.e_data);
741
a2c3ecd0
KS
742 SEQ_INC( tpcb, tpcb->tp_rcvnxt );
743 /*
6bf9da92 744 * move chains from the reassembly queue to the socket buffer
a2c3ecd0 745 */
6bf9da92
KS
746 if (tpcb->tp_rsycnt) {
747 register struct mbuf **mp;
748 struct mbuf **mplim;
749
750 mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt % tpcb->tp_maxlcredit);
751 mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
752
753 while (tpcb->tp_rsycnt && *mp) {
754 sbappend(&tpcb->tp_sock->so_rcv, *mp);
755 tpcb->tp_rsycnt--;
756 *mp = 0;
757 SEQ_INC(tpcb, tpcb->tp_rcvnxt);
a2c3ecd0 758 ack_reason |= ACK_REORDER;
6bf9da92
KS
759 if (++mp == mplim)
760 mp = tpcb->tp_rsyq;
a2c3ecd0
KS
761 }
762 }
763 IFDEBUG(D_STASH)
764 dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
765 "stash: so_rcv after appending");
766 ENDDEBUG
767
768 } else {
6bf9da92
KS
769 register struct mbuf **mp;
770 SeqNum uwe;
a2c3ecd0
KS
771
772 IFTRACE(D_STASH)
773 tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
774 E.e_seq, tpcb->tp_rcvnxt, tpcb->tp_lcredit, 0);
775 ENDTRACE
776
7893315b
KS
777 if (tpcb->tp_rsyq == 0)
778 tp_rsyset(tpcb);
6bf9da92
KS
779 uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
780 if (tpcb->tp_rsyq == 0 ||
781 !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
a2c3ecd0 782 ack_reason = ACK_DONT;
6bf9da92
KS
783 m_freem(E.e_data);
784 } else if (*(mp = tpcb->tp_rsyq + (E.e_seq % tpcb->tp_maxlcredit))) {
a2c3ecd0
KS
785 IFDEBUG(D_STASH)
786 printf("tp_stash - drop & ack\n");
787 ENDDEBUG
788
789 /* retransmission - drop it and force an ack */
790 IncStat(ts_dt_dup);
791 IFPERF(tpcb)
792 IncPStat(tpcb, tps_n_ack_cuz_dup);
793 ENDPERF
794
6bf9da92 795 m_freem(E.e_data);
a2c3ecd0 796 ack_reason |= ACK_DUP;
6bf9da92
KS
797 } else {
798 *mp = E.e_data;
799 tpcb->tp_rsycnt++;
800 ack_reason = ACK_DONT;
a2c3ecd0
KS
801 }
802 }
13cdf5ec 803 /* there were some comments of historical interest here. */
a2c3ecd0
KS
804 {
805 LOCAL_CREDIT(tpcb);
806
807 if ( E.e_seq == tpcb->tp_sent_uwe )
808 ack_reason |= ACK_STRAT_FULLWIN;
809
810 IFTRACE(D_STASH)
811 tptraceTPCB(TPPTmisc,
812 "end of stash, eot, ack_reason, sent_uwe ",
813 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
814 ENDTRACE
815
816 if ( ack_reason == ACK_DONT ) {
817 IncStat( ts_ackreason[ACK_DONT] );
818 return 0;
819 } else {
820 IFPERF(tpcb)
a2c3ecd0
KS
821 if(ack_reason & ACK_STRAT_EACH) {
822 IncPStat(tpcb, tps_n_ack_cuz_strat);
823 } else if(ack_reason & ACK_STRAT_FULLWIN) {
824 IncPStat(tpcb, tps_n_ack_cuz_fullwin);
825 } else if(ack_reason & ACK_REORDER) {
826 IncPStat(tpcb, tps_n_ack_cuz_reorder);
827 }
828 tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
829 SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
830 ENDPERF
831 {
832 register int i;
833
834 /* keep track of all reasons that apply */
835 for( i=1; i<_ACK_NUM_REASONS_ ;i++) {
836 if( ack_reason & (1<<i) )
837 IncStat( ts_ackreason[i] );
838 }
839 }
840 return 1;
841 }
842 }
843}
6bf9da92
KS
844
845/*
846 * tp_rsyflush - drop all the packets on the reassembly queue.
847 * Do this when closing the socket, or when somebody has changed
848 * the space avaible in the receive socket (XXX).
849 */
850tp_rsyflush(tpcb)
851register struct tp_pcb *tpcb;
852{
853 register struct mbuf *m, **mp;
854 if (tpcb->tp_rsycnt) {
855 for (mp == tpcb->tp_rsyq + tpcb->tp_maxlcredit;
856 --mp >= tpcb->tp_rsyq; )
857 if (*mp) {
858 tpcb->tp_rsycnt--;
859 m_freem(*mp);
860 }
861 if (tpcb->tp_rsycnt)
862 panic("tp_rsyflush");
863 }
864 free((caddr_t)tpcb->tp_rsyq, M_PCB);
865 tpcb->tp_rsyq = 0;
866}
867
868tp_rsyset(tpcb)
869register struct tp_pcb *tpcb;
870{
871 register struct socket *so = tpcb->tp_sock;
872 int maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
7893315b
KS
873 int old_credit = tpcb->tp_maxlcredit;
874 caddr_t rsyq;
6bf9da92 875
6bf9da92
KS
876 tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
877 (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize)/ tpcb->tp_l_tpdusize);
878
7893315b
KS
879 if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
880 return;
6bf9da92
KS
881 maxcredit *= sizeof(struct mbuf *);
882 if (tpcb->tp_rsyq)
883 tp_rsyflush(tpcb);
884 if (rsyq = (caddr_t)malloc(maxcredit, M_PCB, M_NOWAIT))
885 bzero(rsyq, maxcredit);
886 tpcb->tp_rsyq = (struct mbuf **)rsyq;
887}
888
889tpsbcheck(tpcb, i)
890struct tp_pcb *tpcb;
891{
892 register struct mbuf *n, *m;
893 register int len = 0, mbcnt = 0, pktlen;
894 struct sockbuf *sb = &tpcb->tp_sock->so_snd;
895
896 for (n = sb->sb_mb; n; n = n->m_nextpkt) {
897 if ((n->m_flags & M_PKTHDR) == 0)
898 panic("tpsbcheck nohdr");
899 pktlen = len + n->m_pkthdr.len;
900 for (m = n; m; m = m->m_next) {
901 len += m->m_len;
902 mbcnt += MSIZE;
903 if (m->m_flags & M_EXT)
904 mbcnt += m->m_ext.ext_size;
905 }
906 if (len != pktlen) {
907 printf("test %d; len %d != pktlen %d on mbuf 0x%x\n",
908 i, len, pktlen, n);
909 panic("tpsbcheck short");
910 }
911 }
912 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
913 printf("test %d: cc %d != %d || mbcnt %d != %d\n", i, len, sb->sb_cc,
914 mbcnt, sb->sb_mbcnt);
915 panic("tpsbcheck");
916 }
917}