PERFORCE change 166292 for review

Andre Oppermann andre at FreeBSD.org
Sun Jul 19 22:15:52 UTC 2009


http://perforce.freebsd.org/chv.cgi?CH=166292

Change 166292 by andre at andre_t61 on 2009/07/19 22:15:42

	Rename tcp_reass_enabled to tcp_reass_enable.
	Change tcp_reass_maxblocks to a limit per connection.
	Add tcp_reass_globalmaxblocks as global zonelimit.
	Change tcp_reass_qtimo to tcp_reass_timeout as fixed timeout in milliseconds.
	Update sysctl descriptions.
	Decouple reassembly block zonelimit from nmbclusters.
	Add per connection block counter, tracking and limiting code.
	Combine all exit cases where the mbuf is freed into goto label 'done'.
	Differenciate between space and time efficiency through the use of m_collapse().
	Add RFC2883 D-SACK support for duplicate retransmits.
	Fix merging of next/previous block test.

Affected files ...

.. //depot/projects/tcp_reass/netinet/tcp_reass.c#36 edit
.. //depot/projects/tcp_reass/netinet/tcp_var.h#20 edit

Differences ...

==== //depot/projects/tcp_reass/netinet/tcp_reass.c#36 (text+ko) ====

@@ -107,25 +107,30 @@
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
     "TCP Segment Reassembly Queue");
 
-static int tcp_reass_enabled = 1;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, enable, CTLFLAG_WR,
-    &tcp_reass_enabled, 0,
-    "Enable/disable use of TCP Reassembly Queue");
+static int tcp_reass_enable = 1;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, enable, CTLFLAG_RW,
+    &tcp_reass_enable, 0,
+    "Enable/disable use of TCP reassembly queue");
 
-static int tcp_reass_maxblocks = 65535;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxblocks, CTLFLAG_RDTUN,
+static int tcp_reass_maxblocks = 32;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxblocks, CTLFLAG_RW,
     &tcp_reass_maxblocks, 0,
-    "Global maximum number of TCP Segment Blocks in Reassembly Queue");
+    "Per connection limit of TCP segment blocks in reassembly queue");
+
+static int tcp_reass_globalmaxblocks = 65535;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, globalmaxblocks, CTLFLAG_RDTUN,
+    &tcp_reass_globalmaxblocks, 0,
+    "Global limit of TCP segment blocks in reassembly queue");
 
-static int tcp_reass_qtimo = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, queue_timeout, CTLFLAG_RW,
-    &tcp_reass_qtimo, 0,
-    "Reassembly Queue Timeout in multiples of the Retransmission Timeout");
+static int tcp_reass_timeout = 30 * HZ;
+SYSCTL_PROC(_net_inet_tcp_reass, OID_AUTO, timeout, CTLTYPE_INT|CTLFLAG_RW,
+    &tcp_reass_timeout, NULL, sysctl_msec_to_ticks, "I",
+    "Reassembly queue flush timeout in milliseconds");
 
 static int tcp_reass_spacetime = 0;
 SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, space_time, CTLFLAG_RW,
     &tcp_reass_spacetime, 0,
-    "Reassembly Queue strategy of space vs. time efficiency");
+    "Reassembly queue strategy of space vs. time efficiency");
 
 static struct tcp_reass_block *
     tcp_reass_merge(struct tcp_reass_block *, struct tcp_reass_block *);
@@ -158,30 +163,17 @@
 #endif
 
 /*
- * Adjust TCP reassembly zone limits when the nmbclusters zone changes.
- */
-static void
-tcp_reass_zone_change(void *tag)
-{
-
-	tcp_reass_maxblocks = nmbclusters / 16;
-	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxblocks);
-}
-
-/*
  * Initialize TCP reassembly zone on startup.
  */
 void
 tcp_reass_init(void)
 {
 
-	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxblocks",
-	    &tcp_reass_maxblocks);
+	TUNABLE_INT_FETCH("net.inet.tcp.reass.globalmaxblocks",
+	    &tcp_reass_globalmaxblocks);
 	tcp_reass_zone = uma_zcreate("tcpreass", sizeof(struct tcp_reass_block),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-	uma_zone_set_max(tcp_reass_zone, tcp_reass_maxblocks);
-	EVENTHANDLER_REGISTER(nmbclusters_change,
-	    tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+	uma_zone_set_max(tcp_reass_zone, tcp_reass_globalmaxblocks);
 }
 
 /*
@@ -252,6 +244,7 @@
 	if (trb->trb_m != NULL)
 		m_freem(trb->trb_m);
 	tp->rcv_reass_size -= SEQ_DELTA(trb->trb_seqs, trb->trb_seqe);
+	tp->rcv_reass_blocks--;
 	uma_zfree(tcp_reass_zone, trb);
 }
 
@@ -326,11 +319,8 @@
 	th = NULL;		/* Prevent further use. */
 
 	/* Check if it is really neccessary to do all the work. */
-	if (!tcp_reass_enabled && RB_EMPTY(&tp->rcv_reass)) {
-		*tlenp = 0;
-		m_freem(m);
-		return (0);
-	}
+	if (!tcp_reass_enable && RB_EMPTY(&tp->rcv_reass))
+		goto done;
 
 	KASSERT(SEQ_LT(tp->rcv_nxt, th_seq),
 	    ("%s: sequence number below rcv_nxt", __func__));
@@ -359,16 +349,13 @@
 	 * buffer vs. actual real data with 2k clusters and 1500 byte
 	 * packets by introducing a correction factor of 11/8th.
 	 */
-	/*
 	if (th_seq != tp->rcv_nxt &&
-	    tp->t_trqmcnt > (sbspace(&so->so_rcv) / 8 * 11)) {
+	    tp->rcv_reass_blocks > tcp_reass_maxblocks) {
+		//(sbspace(&so->so_rcv) / 8 * 11)
 		TCPSTAT_INC(tcps_reass_overflow);
 		TCPSTAT_INC(tcps_rcvmemdrop);
-		m_freem(m);
-		*tlenp = 0;
-		return (0);
+		goto done;
 	}
-	*/
 
 	/*
 	 * FIN handling is a bit tricky.
@@ -390,10 +377,9 @@
 			tcp_timer_activate(tp, TT_REASS, 0);
 			return (thflags);
 		}
-	} else if (*tlenp == 0) {
-		m_freem(m);
-		return (0);
-	} else
+	} else if (*tlenp == 0)
+		goto done;
+	else
 		thflags &= ~TH_FIN;
 
 	/* Get rid of packet header and mtags. */
@@ -401,7 +387,8 @@
 	/* Trim empty mbufs from head of chain. */
 	m = m_trimhead(m);
 	/* Compact mbuf chain. */
-	m = m_collapse(m, M_DONTWAIT, 1024);
+	if (tcp_reass_spacetime)
+		m = m_collapse(m, M_DONTWAIT, 1024);
 
 	KASSERT(m != NULL, ("%s: m is NULL after collapse", __func__));
 
@@ -420,9 +407,9 @@
 		if (SEQ_GEQ(trbs.trb_seqs, trb->trb_seqs) &&
 		    SEQ_LEQ(trbs.trb_seqe, trb->trb_seqe)) {
 			tcp_reass_sacktrack(tp, trb);
-			m_freem(m);
-			*tlenp = 0;
-			return (0);
+			tp->rcv_reass_dsack.start = trbs.trb_seqs;
+			tp->rcv_reass_dsack.end = trbs.trb_seqe;
+			goto done;
 		}
 		tp->rcv_reass_size += SEQ_DELTA(trb->trb_seqs, trb->trb_seqe);
 
@@ -433,7 +420,7 @@
 
 			/* Merge in next blocks if there is overlap. */
 			while ((trbn = RB_NEXT(tcp_ra, &tp->rcv_reass, trb)) != NULL &&
-			    SEQ_GEQ(trbn->trb_seqs, trb->trb_seqe)) {
+			    SEQ_GEQ(trb->trb_seqe, trbn->trb_seqs)) {
 				trbn = tcp_reass_merge(trb, trbn);
 				tcp_reass_free(tp, trbn);
 			}
@@ -446,7 +433,7 @@
 
 			/* Merge in previous blocks if there is overlap. */
 			while ((trbn = RB_PREV(tcp_ra, &tp->rcv_reass, trb)) != NULL &&
-			    SEQ_GEQ(trbn->trb_seqe, trb->trb_seqs)) {
+			    SEQ_LEQ(trb->trb_seqs, trbn->trb_seqe)) {
 				trbn = tcp_reass_merge(trb, trbn);
 				tcp_reass_free(tp, trbn);
 			}
@@ -460,6 +447,7 @@
 		KASSERT(trbn == NULL, ("%s: RB_INSERT failed", __func__));
 		tcp_reass_sacktrack(tp, trb);
 		tp->rcv_reass_size += SEQ_DELTA(trb->trb_seqs, trb->trb_seqe);
+		tp->rcv_reass_blocks++;
 	} else if (tp->rcv_nxt == th_seq) {
 		trbn = RB_INSERT(tcp_ra, &tp->rcv_reass, &trbs);
 		KASSERT(trbn == NULL, ("%s: RB_INSERT failed", __func__));
@@ -483,7 +471,7 @@
 	TCPSTAT_INC(tcps_reass_missingseg);
 
 	SOCKBUF_LOCK(&so->so_rcv);
-
+	/* We can only ever dequeue one block. */
 	trb = RB_MIN(tcp_ra, &tp->rcv_reass);
 	if (!(so->so_rcv.sb_state & SBS_CANTRCVMORE)) {
 		sbappendstream_locked(&so->so_rcv, trb->trb_m);
@@ -506,14 +494,19 @@
 	 * the sequence space and if queue is not empty.  Otherwise
 	 * deactivate it.
 	 */
-	if (tcp_reass_qtimo && !RB_EMPTY(&tp->rcv_reass))
+	if (tcp_reass_timeout && !RB_EMPTY(&tp->rcv_reass))
 		tcp_timer_activate(tp, TT_REASS,
-		    tp->t_rxtcur * tcp_reass_qtimo);
+		    tp->t_rxtcur * tcp_reass_timeout);
 	else
 		tcp_timer_activate(tp, TT_REASS, 0);
 
 	ND6_HINT(tp);
 	return (thflags);
+
+done:
+	m_freem(m);
+	*tlenp = 0;
+	return (0);
 }
 
 /*
@@ -538,7 +531,11 @@
 		}
 		trb->trb_seqe = trbn->trb_seqe;
 		trb->trb_mt->m_next = trbn->trb_m;
-		trb->trb_mt = trbn->trb_mt;
+		if (tcp_reass_spacetime) {
+			trb->trb_mt = m_collapse(trb->trb_mt, M_DONTWAIT, 1024);
+			trb->trb_mt = m_last(trb->trb_mt, NULL);
+		} else
+			trb->trb_mt = trbn->trb_mt;
 	} else if (SEQ_LEQ(trb->trb_seqs, trbn->trb_seqe)) {
 		if (SEQ_LEQ(trb->trb_seqs, trbn->trb_seqs))
 			return (trbn);
@@ -547,8 +544,12 @@
 			trb->trb_m = m_trimhead(trb->trb_m);
 		}
 		trb->trb_seqs = trbn->trb_seqs;
+		trb->trb_m = trbn->trb_m;
 		trbn->trb_mt->m_next = trb->trb_m;
-		trb->trb_m = trbn->trb_m;
+		if (tcp_reass_spacetime) {
+			trbn->trb_mt = m_collapse(trbn->trb_mt, M_DONTWAIT, 1024);
+			trb->trb_mt = m_last(trbn->trb_mt, NULL);
+		}
 	} else
 		return (NULL);
 
@@ -562,13 +563,15 @@
 /*
  * Put the sequence number of the reassembly queue blocks into
  * the SACK options of an outgoing segment.
+ *  RFC2018: section ...
+ *  RFC2883: section ...
  */
 int
 tcp_reass_sack(struct tcpcb *tp, u_char *optp, int numsacks)
 {
+	int nsacks = 0;
+	tcp_seq sack_seq;
 	struct tcp_reass_block *trb;
-	tcp_seq sack_seq;
-	int nsacks = 0;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(numsacks > 0,
@@ -576,9 +579,24 @@
 	KASSERT(!LIST_EMPTY(&tp->rcv_reass_sack),
 	    ("%s: sack list empty", __func__));
 
+	/* DSACK */
+	if (tp->rcv_reass_dsack.start == tp->rcv_reass_dsack.end) {
+		sack_seq = htonl(tp->rcv_reass_dsack.start);
+		bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+		optp += sizeof(sack_seq);
+		sack_seq = htonl(tp->rcv_reass_dsack.end);
+		bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+		optp += sizeof(sack_seq);
+		tp->rcv_reass_dsack.start = 0;
+		tp->rcv_reass_dsack.end = 0;
+		numsacks--;
+		nsacks++;
+	}
+
 	/*
-	 * The most recent block must appear first.  RFC2018, Section 4.
-	 * Add the other blocks in most recent created or updated order.
+	 * The most recent block must appear first.  Add the other
+	 * blocks in most recent created or updated order.
+	 *  RFC2018: section 4
 	 */
 	LIST_FOREACH(trb, &tp->rcv_reass_sack, trb_sack) {
 		if (numsacks < 1)

==== //depot/projects/tcp_reass/netinet/tcp_var.h#20 (text+ko) ====

@@ -106,8 +106,10 @@
  */
 struct tcpcb {
 	RB_HEAD(tcp_ra, tcp_reass_block) rcv_reass;	/* segment reassembly queue */
+	int	rcv_reass_size;		/* segment reassembly memory usage */
+	int	rcv_reass_blocks;	/* blocks in reassembly queue */
 	LIST_HEAD(tcp_ras, tcp_reass_block) rcv_reass_sack;	/* last additions to reass queue */
-	int	rcv_reass_size;		/* segment reassembly memory usage */
+	struct sackblk	rcv_reass_dsack;	/* DSACK block */
 
 	int	t_dupacks;		/* consecutive dup acks recd */
 


More information about the p4-projects mailing list