PERFORCE change 166158 for review

Andre Oppermann andre at FreeBSD.org
Thu Jul 16 08:42:17 UTC 2009


http://perforce.freebsd.org/chv.cgi?CH=166158

Change 166158 by andre at andre_t61 on 2009/07/16 08:41:58

	Flush the queue when a RFC2018 reneg is detected.
	Add RFC2883 DSACK detection support.
	Change sysctl names to better reflect their usage.
	Move tcp_sack_flush up to tcp_sack_free.
	Add more comments.

Affected files ...

.. //depot/projects/tcp_new/netinet/tcp_sack.c#7 edit

Differences ...

==== //depot/projects/tcp_new/netinet/tcp_sack.c#7 (text+ko) ====

@@ -67,7 +67,14 @@
 #endif /* TCPDEBUG */
 
 /*
- * Store all SACK blocks of the scoreboard in a ranged red-black tree.
+ * Implementation of Selective Acknowledgements (SACK) as described in
+ * RFC2018.
+ *
+ * This file implements the data sender part of SACK.  It stores all
+ * received SACK blocks in a scoreboard built on a ranged red-black tree.
+ * 
+ * The data receiver part (RFC2018: section 4) is part of the reassembly
+ * queue.
  */
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
@@ -75,17 +82,20 @@
 int tcp_do_sack = 1;
 SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
     &tcp_do_sack, 0, "Enable/Disable TCP SACK support");
-TUNABLE_INT("net.inet.tcp.sack.enable", &tcp_do_sack);
+
+int tcp_do_dsack = 1;
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, dsack, CTLFLAG_RW,
+    &tcp_do_dsack, 0, "Enable TCP duplicate D-SACK support");
 
-static int tcp_sack_maxholes = 128;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW,
-    &tcp_sack_maxholes, 0, 
-    "Maximum number of TCP SACK holes allowed per connection");
+static int tcp_sack_maxblocks = 32;
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxblocks, CTLFLAG_RW,
+    &tcp_sack_maxholes, 0,
+    "Per connection limit on the number of SACK blocks");
 
 static int tcp_sack_globalmaxholes = 65536;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW,
-    &tcp_sack_globalmaxholes, 0, 
-    "Global maximum number of TCP SACK holes");
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxblocks, CTLFLAG_RW,
+    &tcp_sack_globalmaxblocks, 0,
+    "Global total limit on the number of SACK blocks");
 
 static uma_zone_t	tcp_sackblock_zone;
 
@@ -94,9 +104,14 @@
 {
 	tcp_sackblock_zone = uma_zcreate("tcpsackblk", sizeof(struct tcp_sack_block),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	/* XXXAO: limit is not adjusted when changed at runtime. */
 	uma_zone_set_max(tcp_sackblock_zone, tcp_sack_globalmaxholes);
 }
 
+/*
+ * Compare function implementing the ranged lookup on the RB tree.
+ * NB: The tree must never have any overlapping elements.
+ */
 static __inline int
 tcp_sack_cmp(struct tcp_sack_block *a, struct tcp_sack_block *b)
 {
@@ -111,6 +126,26 @@
 RB_PROTOTYPE_STATIC(tcp_sackblocks, tcp_sack_block, tsb_rb, tcp_sack_cmp);
 RB_GENERATE_STATIC(tcp_sackblocks, tcp_sack_block, tsb_rb, tcp_sack_cmp);
 
+/*
+ * Verify the integrity of the ranged RB tree.
+ */
+#ifdef INVARIANTS
+static int
+tcp_sack_verify(struct tcpcb *tp)
+{
+	struct tcp_sack_block *tsb, *tsbn;
+
+	RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) {
+		if (SEQ_GEQ(tsb->tsb_blk.start, tsb->tsb_blk.end) ||
+		    SEQ_LEQ(tsb->tsb_blk.start, tp->snd_una) ||
+		    SEQ_GT(tsb->tsb_blk.end, tp->snd_nxt) ||
+		    (tsbn != NULL && SEQ_GEQ(tsb->tsb_blk.end, tsbn->tsb_blk.start)))
+			return (0);
+	}
+	return (1);
+}
+#endif
+
 static void
 tcp_sack_free(struct tcpcb *tp, struct tcp_sack_block *tsb)
 {
@@ -121,23 +156,21 @@
 	uma_zfree(tcp_sackblock_zone, tsb);
 }
 
-#ifdef INVARIANTS
-static int
-tcp_sack_verify(struct tcpcb *tp)
+void
+tcp_sack_flush(struct tcpcb *tp)
 {
 	struct tcp_sack_block *tsb, *tsbn;
 
 	RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) {
-		if (SEQ_GEQ(tsb->tsb_blk.start, tsb->tsb_blk.end) ||
-		    SEQ_LEQ(tsb->tsb_blk.start, tp->snd_una) ||
-		    SEQ_GT(tsb->tsb_blk.end, tp->snd_nxt) ||
-		    (tsbn != NULL && SEQ_GEQ(tsb->tsb_blk.end, tsbn->tsb_blk.start)))
-			return (0);
+		tcp_sack_free(tp, tsb);
 	}
-	return (1);
 }
-#endif
 
+/*
+ * Update the scoreboard to remember which sequence number ranges
+ * the receiver has reported as sucessfully received.
+ *  RFC2018: section 5
+ */
 int
 tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 {
@@ -154,11 +187,10 @@
 			tcp_sack_free(tp, tsb);
 	}
 
-	if ((to->to_flags & TOF_SACK) && to->to_nsacks == 0) {
-		/* remove all sack blocks, strange reneg */
-		tcp_sack_flush(tp);
+	/* SACK header but no blocks. */
+	if ((to->to_flags & TOF_SACK) && to->to_nsacks == 0)
 		return (0);
-	} else if (!(to->to_flags & TOF_SACK))
+	else if (!(to->to_flags & TOF_SACK))
 		return (0);
 
 	/* Integrate SACK blocks from segment. */
@@ -169,24 +201,45 @@
 		sack.tsb_blk.start = ntohl(sack.tsb_blk.start);
 		sack.tsb_blk.end = ntohl(sack.tsb_blk.end);
 
-		/* Validity checks on SACK blocks as received from sender. */
-		if (SEQ_GT(sack.tsb_blk.start, sack.tsb_blk.end) ||
+		/*
+		 * The receiver has reneged, that is flushed the previously
+		 * reported data from its reassembly queue.
+		 * It is a bit difficult to reliably detect this condition.
+		 * We try our best here to avoid false positives.
+		 *  RFC2018: section 8
+		 */
+		if (to->to_nsacks == 1 && !RB_EMPTY(tp->snd_sackblocks) &&
+		    to->to_len <= TCP_MAXOLEN - TCPOLEN_SACK &&
+		    SEQ_DELTA(sack.tsb_blk.start, sack.tsb_blk.end) <= tp->snd_mss)
+			tcp_sack_flush(tp);
+		}
+
+		/* XXXAO: Experimental explicit reneg. */
+		if (sack.tsb_blk.start == sack.tsb_blk.end &&
+		    i == 0 && to->to_nsacks > 1 &&
+		    SEQ_GT(sack.tsb_blk.start, tp->snd_una) &&
+		    SEQ_LEQ(sack.tsb_blk.start, tp->snd_nxt)) {
+			tcp_sack_flush(tp);
+		}
+
+		/* Sanity checks on SACK block. */
+		if (SEQ_GEQ(sack.tsb_blk.start, sack.tsb_blk.end) ||
 		    SEQ_LEQ(sack.tsb_blk.start, th_ack) ||
 		    SEQ_GT(sack.tsb_blk.end, tp->snd_nxt))
 			continue;
 
-		/* XXXAO: Implicit-explicit reneg. */
-		if (sack.tsb_blk.start == sack.tsb_blk.end) {
-			/* Remove all sackblocks. */
-			tcp_sack_flush(tp);
-			continue;
-		}
-
 		/* Return match that has at least partial overlap to either side. */
 		if ((tsb = RB_FIND(tcp_sackblocks, &tp->snd_sackblocks, &sack)) != NULL) {
-			/* within a block, was a duplicate retransmit, D-SACK. */
+			/* Within an already known block, common case. */
 			if (SEQ_GEQ(sack.tsb_blk.start, tsb->tsb_blk.start) &&
 			    SEQ_LEQ(sack.tsb_blk.end, tsb->tsb_blk.end)) {
+				/*
+				 * D-SACK, was a duplicate retransmit.
+				 *  RFC2883: section 5
+				 */
+				if (i == 0 && SEQ_DELTA(sack.tsb_blk.start, sack.tsb_blk.end) <= tp->snd_mss) {
+					//TCPSTAT_INC();
+				}
 				continue;
 			}
 			/* Extends the end, common case. */
@@ -229,16 +282,6 @@
 	return (sacked);
 }
 
-void
-tcp_sack_flush(struct tcpcb *tp)
-{
-	struct tcp_sack_block *tsb, *tsbn;
-
-	RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) {
-		tcp_sack_free(tp, tsb);
-	}
-}
-
 #ifdef DDB
 static void
 db_print_sackblocks(struct tcpcb *tp)


More information about the p4-projects mailing list