PERFORCE change 180312 for review

Andre Oppermann andre at FreeBSD.org
Tue Jun 29 08:23:49 UTC 2010


http://p4web.freebsd.org/@@180312?ac=10

Change 180312 by andre at andre_t61 on 2010/06/29 08:22:48

	Update and improve comments and add more RFC references.
	Change logic for reassembly timer activation on first block.

Affected files ...

.. //depot/projects/tcp_reass/netinet/tcp_reass.c#56 edit

Differences ...

==== //depot/projects/tcp_reass/netinet/tcp_reass.c#56 (text+ko) ====

@@ -64,7 +64,8 @@
  * queue has O(n) worst case behavior whereas the red-black tree is
  * O(log n).  This prevents complexity attacks where a long chain of
  * blocks would have to be traversed to find the right place for the new
- * segment.
+ * segment.  Especially with high bandwidth*delay product links and large
+ * socket buffers this is a valid concern.
  *
  * For the segment merging into a block queue structure the operator can
  * chose between time and space efficiency.  For time efficiency only the
@@ -80,26 +81,26 @@
  * reasons and because can't easily know at DMA time how large the packet
  * effectively actually is.
  *
- * Limits, timeout. XXX
+ * To prevent resource exhaustion attacks a local and global limit governs
+ * the number of reassembly blocks.  The local limit prevents single connections
+ * from monopolizing the global limit.  When used in space efficient mode
+ * the total memory consumption of the reassembly queue can't be more than
+ * the receive socket buffer size.  To prevent lost connections from holding
+ * on for too long a timeout causes flushing of all queued data.
  *
- * The reassembly queue block structure is also used to track SACK
- * information as the data receiver.  A double-linked list is added
- * that tracks the blocks LIFO order of their arrival or updating.
+ * The reassembly queue block structure is also used to track SACK information
+ * as the data receiver.  A double-linked list is added that tracks the blocks
+ * LIFO order of their arrival or updating.
  *
  * Implemented / relevant RFC's:
  *  RFC793: Transmission Control Protocol
- *  RFC1123: 
- *  RFC2018: This makes us fully compliant to RFC2018 Section 4 including all optional parts marked as "SHOULD".
- *  RFC2883:
+ *  RFC1122: section 4.2.2.20 and section 4.2.2.21
+ *  RFC2018: SACK, section 4 including all optional parts marked as "SHOULD"
+ *  RFC2883: D-SACK, section 4
  *
  * TODO:
- * - Improve comments and annotate RFC references.
- * - Style improvements.
- * - Activate timeout on first insert.
- * - Partial D-SACK support.
  * - D-SACK when only one SACK slot available?
- * - Direct pointer to last block in RB-tree.
- * - Return flags should be same minus FIN.
+ * - Direct pointer to highest seqnum block in RB-tree?
  * - Remove T/TCP gonk.
  * - Lots of testing.
  */
@@ -298,7 +299,7 @@
 
 /*
  * Move block to front of SACK list to report SACK blocks in LIFO order.
- *  RFC2018: section x
+ *  RFC2018: section 4
  */
 static __inline void
 tcp_reass_sacktrack(struct tcpcb *tp, struct tcp_reass_block *trb)
@@ -311,8 +312,9 @@
 }
 
 /*
- * Insert segment into the reassembly queue and 
- * XXX append to socket buffer.
+ * Integrate the new segment into the reassembly queue.  When the segment
+ * matches RCV.NXT append it to the socket buffer including all eglible
+ * data from the reassembly queue.
  *
  * NB: We must always consume the mbuf.  Either by appeding it to
  * the queue or by freeing it.
@@ -454,7 +456,7 @@
 	 * Find a block that has at least partial overlap to either side.
 	 * If no block is found either insert a new one or use the stack
 	 * if the segment directly fits rcv_nxt.
-	 *  RFC793: xxx
+	 *  RFC793: section 3.9, page 69-76
 	 *  RFC2018: section 3
 	 */
 	if ((trb = RB_FIND(tcp_ra, &tp->rcv_reass, &trbs)) != NULL) {
@@ -488,9 +490,8 @@
 		tcp_reass_sacktrack(tp, trb);
 
 		/*
-		 * Update XXX
+		 * Update the D-SACK information.
 		 *  RFC2883: section 4.2,  Reporting Partial Duplicate Segments
-		 * XXXAO: Add D-SACK block.
 		 */
 		if ((len = SEQ_DELTA(trbs.trb_seqs, trbs.trb_seqe)) > 0) {
 			tp->rcv_reass_size -= len;
@@ -516,8 +517,15 @@
 			TCPSTAT_INC(tcps_reass_merge);
 		}
 	} else if (tp->rcv_nxt == th_seq) {
+		/*
+		 * For segments attaching to RCV.NXT do not allocate
+		 * a new block structure to prevent failure under tight
+		 * memory conditions.  Instead use temporary stack based
+		 * storage.
+		 */
 		trb = &trbs;
 	} else if ((trb = (struct tcp_reass_block *)uma_zalloc(tcp_reass_zone, (M_NOWAIT|M_ZERO))) != NULL) {
+		/* Insert new block as no eglible existing block for merging was found. */
 		trb->trb_seqs = trbs.trb_seqs;
 		trb->trb_seqe = trbs.trb_seqe;
 		trb->trb_m = trbs.trb_m;
@@ -527,9 +535,9 @@
 		LIST_INSERT_HEAD(&tp->rcv_reass_sack, trb, trb_sack);
 		tp->rcv_reass_size += SEQ_DELTA(trbs.trb_seqs, trbs.trb_seqe);
 		tp->rcv_reass_blocks++;
-		if (RB_EMPTY(&tp->rcv_reass)) {
+		if (tp->rcv_reass_blocks == 1)) {
 			KASSERT(tcp_timer_active(tp, TT_REASS) == 0,
-			    ("%s: ", __func__));
+			    ("%s: reassembly timer already active", __func__));
 			tcp_timer_activate(tp, TT_REASS, tcp_reass_timeout);
 		}
 		TCPSTAT_INC(tcps_reass_blocks);
@@ -541,9 +549,7 @@
 	KASSERT(tcp_reass_verify(tp, 1),
 	    ("%s: reassembly queue went inconsistent", __func__));
 
-	/*
-	 * Deliver data if we've got the missing segment.
-	 */
+	/* Deliver data if we've got the missing segment. */
 	if (trb->trb_seqs == tp->rcv_nxt)
 		goto present;
 
@@ -574,7 +580,7 @@
 	}
 
 	if (trb == &trbs)
-		m_freem(trb->trb_m);
+		m_freem(trb->trb_m);	/* NB: trb_m can be =! NULL */
 	else
 		tcp_reass_free(tp, trb);
 


More information about the p4-projects mailing list