PERFORCE change 180312 for review
Andre Oppermann
andre at FreeBSD.org
Tue Jun 29 08:23:49 UTC 2010
http://p4web.freebsd.org/@@180312?ac=10
Change 180312 by andre at andre_t61 on 2010/06/29 08:22:48
Update and improve comments and add more RFC references.
Change logic for reassembly timer activation on first block.
Affected files ...
.. //depot/projects/tcp_reass/netinet/tcp_reass.c#56 edit
Differences ...
==== //depot/projects/tcp_reass/netinet/tcp_reass.c#56 (text+ko) ====
@@ -64,7 +64,8 @@
* queue has O(n) worst case behavior whereas the red-black tree is
* O(log n). This prevents complexity attacks where a long chain of
* blocks would have to be traversed to find the right place for the new
- * segment.
+ * segment. Especially with high bandwidth*delay product links and large
+ * socket buffers this is a valid concern.
*
* For the segment merging into a block queue structure the operator can
* chose between time and space efficiency. For time efficiency only the
@@ -80,26 +81,26 @@
* reasons and because can't easily know at DMA time how large the packet
* effectively actually is.
*
- * Limits, timeout. XXX
+ * To prevent resource exhaustion attacks a local and global limit governs
+ * the number of reassembly blocks. The local limit prevents single connections
+ * from monopolizing the global limit. When used in space efficient mode
+ * the total memory consumption of the reassembly queue can't be more than
+ * the receive socket buffer size. To prevent lost connections from holding
+ * on for too long a timeout causes flushing of all queued data.
*
- * The reassembly queue block structure is also used to track SACK
- * information as the data receiver. A double-linked list is added
- * that tracks the blocks LIFO order of their arrival or updating.
+ * The reassembly queue block structure is also used to track SACK information
+ * as the data receiver. A double-linked list is added that tracks the blocks
+ * LIFO order of their arrival or updating.
*
* Implemented / relevant RFC's:
* RFC793: Transmission Control Protocol
- * RFC1123:
- * RFC2018: This makes us fully compliant to RFC2018 Section 4 including all optional parts marked as "SHOULD".
- * RFC2883:
+ * RFC1122: section 4.2.2.20 and section 4.2.2.21
+ * RFC2018: SACK, section 4 including all optional parts marked as "SHOULD"
+ * RFC2883: D-SACK, section 4
*
* TODO:
- * - Improve comments and annotate RFC references.
- * - Style improvements.
- * - Activate timeout on first insert.
- * - Partial D-SACK support.
* - D-SACK when only one SACK slot available?
- * - Direct pointer to last block in RB-tree.
- * - Return flags should be same minus FIN.
+ * - Direct pointer to highest seqnum block in RB-tree?
* - Remove T/TCP gonk.
* - Lots of testing.
*/
@@ -298,7 +299,7 @@
/*
* Move block to front of SACK list to report SACK blocks in LIFO order.
- * RFC2018: section x
+ * RFC2018: section 4
*/
static __inline void
tcp_reass_sacktrack(struct tcpcb *tp, struct tcp_reass_block *trb)
@@ -311,8 +312,9 @@
}
/*
- * Insert segment into the reassembly queue and
- * XXX append to socket buffer.
+ * Integrate the new segment into the reassembly queue. When the segment
+ * matches RCV.NXT append it to the socket buffer including all eglible
+ * data from the reassembly queue.
*
* NB: We must always consume the mbuf. Either by appeding it to
* the queue or by freeing it.
@@ -454,7 +456,7 @@
* Find a block that has at least partial overlap to either side.
* If no block is found either insert a new one or use the stack
* if the segment directly fits rcv_nxt.
- * RFC793: xxx
+ * RFC793: section 3.9, page 69-76
* RFC2018: section 3
*/
if ((trb = RB_FIND(tcp_ra, &tp->rcv_reass, &trbs)) != NULL) {
@@ -488,9 +490,8 @@
tcp_reass_sacktrack(tp, trb);
/*
- * Update XXX
+ * Update the D-SACK information.
* RFC2883: section 4.2, Reporting Partial Duplicate Segments
- * XXXAO: Add D-SACK block.
*/
if ((len = SEQ_DELTA(trbs.trb_seqs, trbs.trb_seqe)) > 0) {
tp->rcv_reass_size -= len;
@@ -516,8 +517,15 @@
TCPSTAT_INC(tcps_reass_merge);
}
} else if (tp->rcv_nxt == th_seq) {
+ /*
+ * For segments attaching to RCV.NXT do not allocate
+ * a new block structure to prevent failure under tight
+ * memory conditions. Instead use temporary stack based
+ * storage.
+ */
trb = &trbs;
} else if ((trb = (struct tcp_reass_block *)uma_zalloc(tcp_reass_zone, (M_NOWAIT|M_ZERO))) != NULL) {
+ /* Insert new block as no eglible existing block for merging was found. */
trb->trb_seqs = trbs.trb_seqs;
trb->trb_seqe = trbs.trb_seqe;
trb->trb_m = trbs.trb_m;
@@ -527,9 +535,9 @@
LIST_INSERT_HEAD(&tp->rcv_reass_sack, trb, trb_sack);
tp->rcv_reass_size += SEQ_DELTA(trbs.trb_seqs, trbs.trb_seqe);
tp->rcv_reass_blocks++;
- if (RB_EMPTY(&tp->rcv_reass)) {
+ if (tp->rcv_reass_blocks == 1)) {
KASSERT(tcp_timer_active(tp, TT_REASS) == 0,
- ("%s: ", __func__));
+ ("%s: reassembly timer already active", __func__));
tcp_timer_activate(tp, TT_REASS, tcp_reass_timeout);
}
TCPSTAT_INC(tcps_reass_blocks);
@@ -541,9 +549,7 @@
KASSERT(tcp_reass_verify(tp, 1),
("%s: reassembly queue went inconsistent", __func__));
- /*
- * Deliver data if we've got the missing segment.
- */
+ /* Deliver data if we've got the missing segment. */
if (trb->trb_seqs == tp->rcv_nxt)
goto present;
@@ -574,7 +580,7 @@
}
if (trb == &trbs)
- m_freem(trb->trb_m);
+ m_freem(trb->trb_m); /* NB: trb_m can be =! NULL */
else
tcp_reass_free(tp, trb);
More information about the p4-projects
mailing list