PERFORCE change 166158 for review
Andre Oppermann
andre at FreeBSD.org
Thu Jul 16 08:42:17 UTC 2009
http://perforce.freebsd.org/chv.cgi?CH=166158
Change 166158 by andre at andre_t61 on 2009/07/16 08:41:58
Flush the queue when a RFC2018 reneg is detected.
Add RFC2883 DSACK detection support.
Change sysctl names to better reflect their usage.
Move tcp_sack_flush up to tcp_sack_free.
Add more comments.
Affected files ...
.. //depot/projects/tcp_new/netinet/tcp_sack.c#7 edit
Differences ...
==== //depot/projects/tcp_new/netinet/tcp_sack.c#7 (text+ko) ====
@@ -67,7 +67,14 @@
#endif /* TCPDEBUG */
/*
- * Store all SACK blocks of the scoreboard in a ranged red-black tree.
+ * Implementation of Selective Acknowledgements (SACK) as described in
+ * RFC2018.
+ *
+ * This file implements the data sender part of SACK. It stores all
+ * received SACK blocks in a scoreboard built on a ranged red-black tree.
+ *
+ * The data receiver part (RFC2018: section 4) is part of the reassembly
+ * queue.
*/
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
@@ -75,17 +82,20 @@
int tcp_do_sack = 1;
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
&tcp_do_sack, 0, "Enable/Disable TCP SACK support");
-TUNABLE_INT("net.inet.tcp.sack.enable", &tcp_do_sack);
+
+int tcp_do_dsack = 1;
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, dsack, CTLFLAG_RW,
+ &tcp_do_dsack, 0, "Enable TCP duplicate D-SACK support");
-static int tcp_sack_maxholes = 128;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW,
- &tcp_sack_maxholes, 0,
- "Maximum number of TCP SACK holes allowed per connection");
+static int tcp_sack_maxblocks = 32;
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxblocks, CTLFLAG_RW,
+ &tcp_sack_maxholes, 0,
+ "Per connection limit on the number of SACK blocks");
static int tcp_sack_globalmaxholes = 65536;
-SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW,
- &tcp_sack_globalmaxholes, 0,
- "Global maximum number of TCP SACK holes");
+SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxblocks, CTLFLAG_RW,
+ &tcp_sack_globalmaxblocks, 0,
+ "Global total limit on the number of SACK blocks");
static uma_zone_t tcp_sackblock_zone;
@@ -94,9 +104,14 @@
{
tcp_sackblock_zone = uma_zcreate("tcpsackblk", sizeof(struct tcp_sack_block),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ /* XXXAO: limit is not adjusted when changed at runtime. */
uma_zone_set_max(tcp_sackblock_zone, tcp_sack_globalmaxholes);
}
+/*
+ * Compare function implementing the ranged lookup on the RB tree.
+ * NB: The tree must never have any overlapping elements.
+ */
static __inline int
tcp_sack_cmp(struct tcp_sack_block *a, struct tcp_sack_block *b)
{
@@ -111,6 +126,26 @@
RB_PROTOTYPE_STATIC(tcp_sackblocks, tcp_sack_block, tsb_rb, tcp_sack_cmp);
RB_GENERATE_STATIC(tcp_sackblocks, tcp_sack_block, tsb_rb, tcp_sack_cmp);
+/*
+ * Verify the integrity of the ranged RB tree.
+ */
+#ifdef INVARIANTS
+static int
+tcp_sack_verify(struct tcpcb *tp)
+{
+ struct tcp_sack_block *tsb, *tsbn;
+
+ RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) {
+ if (SEQ_GEQ(tsb->tsb_blk.start, tsb->tsb_blk.end) ||
+ SEQ_LEQ(tsb->tsb_blk.start, tp->snd_una) ||
+ SEQ_GT(tsb->tsb_blk.end, tp->snd_nxt) ||
+ (tsbn != NULL && SEQ_GEQ(tsb->tsb_blk.end, tsbn->tsb_blk.start)))
+ return (0);
+ }
+ return (1);
+}
+#endif
+
static void
tcp_sack_free(struct tcpcb *tp, struct tcp_sack_block *tsb)
{
@@ -121,23 +156,21 @@
uma_zfree(tcp_sackblock_zone, tsb);
}
-#ifdef INVARIANTS
-static int
-tcp_sack_verify(struct tcpcb *tp)
+void
+tcp_sack_flush(struct tcpcb *tp)
{
struct tcp_sack_block *tsb, *tsbn;
RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) {
- if (SEQ_GEQ(tsb->tsb_blk.start, tsb->tsb_blk.end) ||
- SEQ_LEQ(tsb->tsb_blk.start, tp->snd_una) ||
- SEQ_GT(tsb->tsb_blk.end, tp->snd_nxt) ||
- (tsbn != NULL && SEQ_GEQ(tsb->tsb_blk.end, tsbn->tsb_blk.start)))
- return (0);
+ tcp_sack_free(tp, tsb);
}
- return (1);
}
-#endif
+/*
+ * Update the scoreboard to remember which sequence number ranges
+ * the receiver has reported as sucessfully received.
+ * RFC2018: section 5
+ */
int
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
@@ -154,11 +187,10 @@
tcp_sack_free(tp, tsb);
}
- if ((to->to_flags & TOF_SACK) && to->to_nsacks == 0) {
- /* remove all sack blocks, strange reneg */
- tcp_sack_flush(tp);
+ /* SACK header but no blocks. */
+ if ((to->to_flags & TOF_SACK) && to->to_nsacks == 0)
return (0);
- } else if (!(to->to_flags & TOF_SACK))
+ else if (!(to->to_flags & TOF_SACK))
return (0);
/* Integrate SACK blocks from segment. */
@@ -169,24 +201,45 @@
sack.tsb_blk.start = ntohl(sack.tsb_blk.start);
sack.tsb_blk.end = ntohl(sack.tsb_blk.end);
- /* Validity checks on SACK blocks as received from sender. */
- if (SEQ_GT(sack.tsb_blk.start, sack.tsb_blk.end) ||
+ /*
+ * The receiver has reneged, that is flushed the previously
+ * reported data from its reassembly queue.
+ * It is a bit difficult to reliably detect this condition.
+ * We try our best here to avoid false positives.
+ * RFC2018: section 8
+ */
+ if (to->to_nsacks == 1 && !RB_EMPTY(tp->snd_sackblocks) &&
+ to->to_len <= TCP_MAXOLEN - TCPOLEN_SACK &&
+ SEQ_DELTA(sack.tsb_blk.start, sack.tsb_blk.end) <= tp->snd_mss)
+ tcp_sack_flush(tp);
+ }
+
+ /* XXXAO: Experimental explicit reneg. */
+ if (sack.tsb_blk.start == sack.tsb_blk.end &&
+ i == 0 && to->to_nsacks > 1 &&
+ SEQ_GT(sack.tsb_blk.start, tp->snd_una) &&
+ SEQ_LEQ(sack.tsb_blk.start, tp->snd_nxt)) {
+ tcp_sack_flush(tp);
+ }
+
+ /* Sanity checks on SACK block. */
+ if (SEQ_GEQ(sack.tsb_blk.start, sack.tsb_blk.end) ||
SEQ_LEQ(sack.tsb_blk.start, th_ack) ||
SEQ_GT(sack.tsb_blk.end, tp->snd_nxt))
continue;
- /* XXXAO: Implicit-explicit reneg. */
- if (sack.tsb_blk.start == sack.tsb_blk.end) {
- /* Remove all sackblocks. */
- tcp_sack_flush(tp);
- continue;
- }
-
/* Return match that has at least partial overlap to either side. */
if ((tsb = RB_FIND(tcp_sackblocks, &tp->snd_sackblocks, &sack)) != NULL) {
- /* within a block, was a duplicate retransmit, D-SACK. */
+ /* Within an already known block, common case. */
if (SEQ_GEQ(sack.tsb_blk.start, tsb->tsb_blk.start) &&
SEQ_LEQ(sack.tsb_blk.end, tsb->tsb_blk.end)) {
+ /*
+ * D-SACK, was a duplicate retransmit.
+ * RFC2883: section 5
+ */
+ if (i == 0 && SEQ_DELTA(sack.tsb_blk.start, sack.tsb_blk.end) <= tp->snd_mss) {
+ //TCPSTAT_INC();
+ }
continue;
}
/* Extends the end, common case. */
@@ -229,16 +282,6 @@
return (sacked);
}
-void
-tcp_sack_flush(struct tcpcb *tp)
-{
- struct tcp_sack_block *tsb, *tsbn;
-
- RB_FOREACH_SAFE(tsb, tcp_sackblocks, &tp->snd_sackblocks, tsbn) {
- tcp_sack_free(tp, tsb);
- }
-}
-
#ifdef DDB
static void
db_print_sackblocks(struct tcpcb *tp)
More information about the p4-projects
mailing list