PERFORCE change 180382 for review

Andre Oppermann andre at FreeBSD.org
Thu Jul 1 10:37:54 UTC 2010


http://p4web.freebsd.org/@@180382?ac=10

Change 180382 by andre at andre_t61 on 2010/07/01 10:37:40

	Split tcp_retransmit into two functions. One for normal NewReno based
	FastRecovery and one for SACK based loss recovery. They are too
	different to have them in just one common function.
	Limited transmit is kept common as it happens before retransmits are started.

Affected files ...

.. //depot/projects/tcp_new/netinet/tcp_output.c#18 edit

Differences ...

==== //depot/projects/tcp_new/netinet/tcp_output.c#18 (text+ko) ====

@@ -200,6 +200,8 @@
 	 *             <- duna -><-       swnd       ->
 	 *                       <-  cwnd  ->
 	 *                       <-dlen->
+	 *                       <-slen->
+	 *                       <-len ->
 	 * seq  .......|+++++++++xxxxxxxx---z---------|.......
 	 *             ^        ^
 	 *          snd_una  snd_nxt
@@ -249,17 +251,26 @@
 		break;
 	case TP_SENDING:
 		/*
-		 * Limited transmit: transmit new data upon the arrival of the
-		 * first two consecutive duplicate ACKs.
-		 *  RFC3042: section 2
+		 * Only straight transmitting is handled here.
+		 * Everything else goes through the retransmit code which
+		 * also controls sending of new segments during loss cases.
+		 *
+		 * Limited transmit. Clamp the amount of data sent 
 		 */
-		if (tp->snd_dupack > 0 && dlen > len && cwnd < tp->snd_mss)
-			len = min(slen, tp->snd_mss);	/* up to one mss above cwnd */
+		if (tp->snd_dupack > 0 && tp->snd_dupack < 3) {
+			len = min(tp->snd_mss, min(slen, len + 2 * tp->snd_mss));
+			tp->t_flags |= TF_ACKNOW;
+		}
 		break;
 	case TP_LOSSRECOV:
 	case TP_REXMT:
 		optlen = tcp_options(tp, so, &to, &opt[0], flags);
-		error = tcp_retransmit(tp, so, &to, &opt[0], &len, optlen, rwin, dlen, slen, flags);
+		if (tp->t_flags & TF_SACKOK)
+			error = tcp_retransmit_sack(tp, so, &to, &opt[0], optlen, &len, rwin, duna, dlen, slen, len, flags);
+		else
+			error = tcp_retransmit(tp, so, &to, &opt[0], optlen, &len, rwin, dlen, slen, flags);
+		if (error)
+			return (error);
 		break;
 	case TP_PERSIST:
 		/*
@@ -267,11 +278,14 @@
 		 * Send out probe byte if there is data available.
 		 *  RFC793: section 3.7, page 42-44
 		 *  RFC1122: section 4.2.2.17
+		 *
+		 * XXXAO: Make sure to send ACKs and our window updates anyways.
 		 */
 		if (swnd == 0 && dlen > 0 && (tp->t_flags & TF_FORCEDATA)) {
 			len = 1;
 			goto send;
-		}
+		} else
+			len = 0;
 		if (swnd == 0 && duna > tp->snd_wnd) {
 			/*
 			 * Window shrank after we sent into it.
@@ -768,31 +782,134 @@
 /*
  * Do a retransmit from snd_nxt or a later point.  This is separate
  * from the normal transmit case as the logic is quite a bit different.
+ *
+ * Normal NewReno FastRecovery with Limited Transmit.
+ *  1. Fast recovery: After we get three duplicate ACKs RFC5681
+ *  2. NewReno RFC3782
+ *  3. Limited transmit RFC3042
+ *
+ * The fast recovery algorithms start their work by the third
+ * duplicate ACK.
  */
 static int
 tcp_retransmit(struct tcpcb *tp, struct socket *so, struct tcpopt *to,
-    u_char *opt, int *len, int optlen, int rwin, int dlen, int slen, int flags)
+    u_char *opt, int optlen, int *lenp, int duna, int dlen, int slen, int len, int flags)
+{
+	int error = 0, off, rlen = 0, rxmit;
+	struct tcphdr ths, *th = &ths;
+
+	/*
+	 * duna = flight size
+	 * dlen = available unsent data in send buffer
+	 * len = data available to be sent (within cwnd and wnd)
+	 *
+	 * 1a) ssthresh = max (FlightSize / 2, 2*SMSS) [input]
+	 *     recover = snd_nxt [input]
+	 *
+	 * 2)  retransmit snd_una+mss [output]
+	 *     cwnd = ssthresh + 3*mss [input]
+	 *
+	 * 3)  on dupack > 3: cwnd =+ mss [input]
+	 *
+	 * 4)  transmit new segment if cwnd allows [output]
+	 */
+
+	/* Retransmit one mss or the unacknowledged amount of data. */
+	rlen = min(tp->snd_mss, duna);
+
+	/* Transmit one more new data. */
+	if (len > 0 && (len >= tp->snd_mss || dlen == len))
+		*lenp = len;
+	else
+		*lenp = 0;
+
+	/* Fill in headers. */
+	th->th_win = (u_short)rwin;
+	th->th_seq = tp->snd_nxt;
+	th->th_flags = flags;
+	th->th_ack = tp->rcv_nxt;
+
+	/* If resending a SYN or FIN, be sure NOT to use a new sequence number. */
+	if ((flags & TH_SYN) && (tp->t_flags & TF_SENTSYN))
+		th->th_seq--;
+	if ((flags & TH_FIN) && (tp->t_flags & TF_SENTFIN) &&
+	    th->th_seq == tp->snd_nxt)
+		th->th_seq--;
+	
+	SOCKBUF_LOCK(&so->so_snd);
+	off = 0;
+	error = tcp_send_segments(tp, &ths, opt, off, &rlen, optlen);
+	SOCKBUF_UNLOCK(&so->so_snd);
+
+	if (rlen > 0) {
+	}
+
+	if (error == 0) {
+	}
+
+	if (rlen > 0 || error == 0) {
+		/* Update last send timestamp. */
+		if ((tp->t_flags & TF_TIMESTAMP) && tp->snd_tslast != to->to_tsval)
+			tp->snd_tslast = to->to_tsval;
+
+		/* Last ACK we sent. */
+		if (tp->snd_lastack != th->th_ack)
+			tp->snd_lastack = th->th_ack;
+	}
+
+	if (rlen > 0 && error == 0) {
+	}
+
+	switch (error) {
+	case EPERM:
+	case EACCES:
+		break;
+	case ENOBUFS:
+	case ENOMEM:
+		break;
+	case EMSGSIZE:
+		break;
+	case ENETDOWN:
+	case ENETUNREACH:
+	case EHOSTDOWN:
+	case EHOSTUNREACH:
+		break;
+	case 0:	/* No error. */
+		break;
+	default:
+		KASSERT(0 == 1,
+		    ("%s: unkown error %i", __func__, error));
+	}
+
+	return (error);
+}
+
+/*
+ * Do a retransmit from snd_nxt or a later point when SACK is enabled.
+ * This is separate from the normal transmit case as the logic is quite
+ * a bit different.
+ *
+ * We have the following mechanisms:
+ *  1. SACK tells us where to send how much data RFC3517
+ *  2. TCP congestion window validation RFC2861
+ *  3. Update to RFC3517 draft-ietf-tcpm-sack-recovery-entry-01.txt
+ */
+static int
+tcp_retransmit_sack(struct tcpcb *tp, struct socket *so, struct tcpopt *to,
+    u_char *opt, int optlen, int *lenp, int rwin, int dlen, int slen, int flags)
 {
 	int error = 0, off, rlen = 0, rxmit;
 	struct tcphdr ths, *th = &ths;
 
 	/*
+	 * SACK based retransmission RFC3517
 	 * Retransmit over the SACK holes.
 	 * Do not retransmit too many times.
 	 * Limit the amount of data that is inflight.
 	 * Retransmit only the stuff that was not SACK'ed.
-	 *
-	 * The fast recovery algorithms start their work by the third
-	 * duplicate ACK.
-	 *
-	 * We have the following mechanisms:
-	 *  1. Fast recovery: After we get three duplicate ACKs RFC2581
-	 *  2. NewReno RFC3782
-	 *  3. Limited transmit RFC3042
-	 *  4. SACK tells us where to send how much data RFC3517
-	 *  5. TCP congestion window validation RFC2861
+	 * With SACK we may be able to retransmit more than one segment
+	 * from different blocks.
 	 */
-
 	do {
 		/* Calculate amount of data we may inject into the pipe (C). */
 		rxmit = imax(0, tp->snd_cwnd - tp->snd_pipe);
@@ -814,9 +931,9 @@
 			if (rlen == 0) {
 				if (slen > 0 && (rxmit >= tp->snd_mss ||
 				    (rxmit >= slen && dlen == slen)))
-					*len = min(slen, rxmit);
+					*lenp = min(slen, rxmit);
 				else
-					*len = 0;
+					*lenp = 0;
 				break;
 			}
 			/*
@@ -832,17 +949,13 @@
 		else
 			break;
 
-		/*
-		 * Fill in headers.
-		 */
+		/* Fill in headers. */
 		th->th_win = (u_short)rwin;
 		th->th_seq = tp->snd_rxmit;
 		th->th_flags = flags;
 		th->th_ack = tp->rcv_nxt;
 
-		/*
-		 * If resending a SYN or FIN, be sure NOT to use a new sequence number.
-		 */
+		/* If resending a SYN or FIN, be sure NOT to use a new sequence number. */
 		if ((flags & TH_SYN) && (tp->t_flags & TF_SENTSYN))
  			th->th_seq--;
 		if ((flags & TH_FIN) && (tp->t_flags & TF_SENTFIN) &&
@@ -868,15 +981,11 @@
 	}
 
 	if (rlen > 0 || error == 0) {
-		/*
-		 * Update last send timestamp.
-		 */
+		/* Update last send timestamp. */
 		if ((tp->t_flags & TF_TIMESTAMP) && tp->snd_tslast != to->to_tsval)
 			tp->snd_tslast = to->to_tsval;
 
-		/*
-		 * Last ACK we sent.
-		 */
+		/* Last ACK we sent. */
 		if (tp->snd_lastack != th->th_ack)
 			tp->snd_lastack = th->th_ack;
 	}
@@ -898,12 +1007,8 @@
 	case EHOSTDOWN:
 	case EHOSTUNREACH:
 		break;
-	case 0:
-		/*
-		 * No error.
-		 */
+	case 0:	/* No error. */
 		break;
-
 	default:
 		KASSERT(0 == 1,
 		    ("%s: unkown error %i", __func__, error));


More information about the p4-projects mailing list