git: 11bf4f2fbb3c - stable/12 - pf: Split the fragment reassembly queue into smaller parts

Kristof Provost kp at FreeBSD.org
Sun Feb 28 16:04:01 UTC 2021


The branch stable/12 has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=11bf4f2fbb3c10ceeda799893b61cb2d3d41521d

commit 11bf4f2fbb3c10ceeda799893b61cb2d3d41521d
Author:     Kristof Provost <kp at FreeBSD.org>
AuthorDate: 2018-11-02 15:26:51 +0000
Commit:     Kristof Provost <kp at FreeBSD.org>
CommitDate: 2021-02-28 15:36:18 +0000

    pf: Split the fragment reassembly queue into smaller parts
    
    Remember 16 entry points based on the fragment offset.  Instead of
    a worst case of 8196 list traversals we now check a maximum of 512
    list entries or 16 array elements.
    
    Obtained from:  OpenBSD
    Differential Revision:  https://reviews.freebsd.org/D17733
    
    (cherry picked from commit fd2ea405e601bd5e240153c5de0f7c264946ce6f)
---
 sys/net/pfvar.h          |   6 ++
 sys/netpfil/pf/pf_norm.c | 181 ++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 168 insertions(+), 19 deletions(-)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index dcc5bf51fdf6..d6c2bf9120e9 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1005,6 +1005,12 @@ struct pf_divert {
 #define PFFRAG_FRENT_HIWAT	5000	/* Number of fragment entries */
 #define PFR_KENTRY_HIWAT	200000	/* Number of table entries */
 
+/*
+ * Limit the length of the fragment queue traversal.  Remember
+ * search entry points based on the fragment offset.
+ */
+#define PF_FRAG_ENTRY_POINTS		16
+
 /*
  * ioctl parameter structures
  */
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index cd94d1de7cf7..0e2fdca4c2ce 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -87,6 +87,7 @@ struct pf_fragment {
 #define fr_af	fr_key.frc_af
 #define fr_proto	fr_key.frc_proto
 
+	struct pf_frent	*fr_firstoff[PF_FRAG_ENTRY_POINTS];
 	RB_ENTRY(pf_fragment) fr_entry;
 	TAILQ_ENTRY(pf_fragment) frag_next;
 	uint32_t	fr_timeout;
@@ -136,6 +137,13 @@ static struct pf_frent *pf_create_fragment(u_short *);
 static int	pf_frent_holes(struct pf_frent *frent);
 static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
 		    struct pf_frag_tree *tree);
+static inline int	pf_frent_index(struct pf_frent *);
+static void	pf_frent_insert(struct pf_fragment *,
+			    struct pf_frent *, struct pf_frent *);
+void			pf_frent_remove(struct pf_fragment *,
+			    struct pf_frent *);
+struct pf_frent		*pf_frent_previous(struct pf_fragment *,
+			    struct pf_frent *);
 static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
 		    struct pf_frent *, u_short *);
 static struct mbuf *pf_join_fragment(struct pf_fragment *);
@@ -308,6 +316,7 @@ pf_remove_fragment(struct pf_fragment *frag)
 {
 
 	PF_FRAG_ASSERT();
+	KASSERT(frag, ("frag != NULL"));
 
 	RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
 	TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
@@ -367,9 +376,150 @@ pf_frent_holes(struct pf_frent *frent)
 	return holes;
 }
 
+static inline int
+pf_frent_index(struct pf_frent *frent)
+{
+	/*
+	 * We have an array of 16 entry points to the queue.  A full size
+	 * 65535 octet IP packet can have 8192 fragments.  So the queue
+	 * traversal length is at most 512 and at most 16 entry points are
+	 * checked.  We need 128 additional bytes on a 64 bit architecture.
+	 */
+	CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) ==
+	    16 - 1);
+	CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1);
+
+	return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS);
+}
+
+static void
+pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent,
+    struct pf_frent *prev)
+{
+	int index;
+
+	if (prev == NULL) {
+		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+	} else {
+		KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off,
+		    ("overlapping fragment"));
+		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+	}
+
+	index = pf_frent_index(frent);
+	if (frag->fr_firstoff[index] == NULL) {
+		KASSERT(prev == NULL || pf_frent_index(prev) < index,
+		    ("prev == NULL || pf_frent_index(pref) < index"));
+		frag->fr_firstoff[index] = frent;
+	} else {
+		if (frent->fe_off < frag->fr_firstoff[index]->fe_off) {
+			KASSERT(prev == NULL || pf_frent_index(prev) < index,
+			    ("prev == NULL || pf_frent_index(pref) < index"));
+			frag->fr_firstoff[index] = frent;
+		} else {
+			KASSERT(prev != NULL, ("prev != NULL"));
+			KASSERT(pf_frent_index(prev) == index,
+			    ("pf_frent_index(prev) == index"));
+		}
+	}
+
+	frag->fr_holes += pf_frent_holes(frent);
+}
+
+void
+pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent)
+{
+	struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next);
+	struct pf_frent *next = TAILQ_NEXT(frent, fr_next);
+	int index;
+
+	frag->fr_holes -= pf_frent_holes(frent);
+
+	index = pf_frent_index(frent);
+	KASSERT(frag->fr_firstoff[index] != NULL, ("frent not found"));
+	if (frag->fr_firstoff[index]->fe_off == frent->fe_off) {
+		if (next == NULL) {
+			frag->fr_firstoff[index] = NULL;
+		} else {
+			KASSERT(frent->fe_off + frent->fe_len <= next->fe_off,
+			    ("overlapping fragment"));
+			if (pf_frent_index(next) == index) {
+				frag->fr_firstoff[index] = next;
+			} else {
+				frag->fr_firstoff[index] = NULL;
+			}
+		}
+	} else {
+		KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off,
+		    ("frag->fr_firstoff[index]->fe_off < frent->fe_off"));
+		KASSERT(prev != NULL, ("prev != NULL"));
+		KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off,
+		    ("overlapping fragment"));
+		KASSERT(pf_frent_index(prev) == index,
+		    ("pf_frent_index(prev) == index"));
+	}
+
+	TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+}
+
+struct pf_frent *
+pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent)
+{
+	struct pf_frent *prev, *next;
+	int index;
+
+	/*
+	 * If there are no fragments after frag, take the final one.  Assume
+	 * that the global queue is not empty.
+	 */
+	prev = TAILQ_LAST(&frag->fr_queue, pf_fragq);
+	KASSERT(prev != NULL, ("prev != NULL"));
+	if (prev->fe_off <= frent->fe_off)
+		return prev;
+	/*
+	 * We want to find a fragment entry that is before frag, but still
+	 * close to it.  Find the first fragment entry that is in the same
+	 * entry point or in the first entry point after that.  As we have
+	 * already checked that there are entries behind frag, this will
+	 * succeed.
+	 */
+	for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS;
+	    index++) {
+		prev = frag->fr_firstoff[index];
+		if (prev != NULL)
+			break;
+	}
+	KASSERT(prev != NULL, ("prev != NULL"));
+	/*
+	 * In prev we may have a fragment from the same entry point that is
+	 * before frent, or one that is just one position behind frent.
+	 * In the latter case, we go back one step and have the predecessor.
+	 * There may be none if the new fragment will be the first one.
+	 */
+	if (prev->fe_off > frent->fe_off) {
+		prev = TAILQ_PREV(prev, pf_fragq, fr_next);
+		if (prev == NULL)
+			return NULL;
+		KASSERT(prev->fe_off <= frent->fe_off,
+		    ("prev->fe_off <= frent->fe_off"));
+		return prev;
+	}
+	/*
+	 * In prev is the first fragment of the entry point.  The offset
+	 * of frag is behind it.  Find the closest previous fragment.
+	 */
+	for (next = TAILQ_NEXT(prev, fr_next); next != NULL;
+	    next = TAILQ_NEXT(next, fr_next)) {
+		if (next->fe_off > frent->fe_off)
+			break;
+		prev = next;
+	}
+	return prev;
+}
+
 static struct pf_fragment *
 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
-		u_short *reason)
+    u_short *reason)
 {
 	struct pf_frent		*after, *next, *prev;
 	struct pf_fragment	*frag;
@@ -416,6 +566,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
 		}
 
 		*(struct pf_fragment_cmp *)frag = *key;
+		memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff));
 		frag->fr_timeout = time_uptime;
 		frag->fr_maxlen = frent->fe_len;
 		frag->fr_holes = 1;
@@ -425,8 +576,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
 		TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 
 		/* We do not have a previous fragment. */
-		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
-		frag->fr_holes += pf_frent_holes(frent);
+		pf_frent_insert(frag, frent, NULL);
 
 		return (frag);
 	}
@@ -455,17 +605,15 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
 			goto bad_fragment;
 	}
 
-	/* Find a fragment after the current one. */
-	prev = NULL;
-	TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
-		if (after->fe_off > frent->fe_off)
-			break;
-		prev = after;
+	/* Find neighbors for newly inserted fragment */
+	prev = pf_frent_previous(frag, frent);
+	if (prev == NULL) {
+		after = TAILQ_FIRST(&frag->fr_queue);
+		KASSERT(after != NULL, ("after != NULL"));
+	} else {
+		after = TAILQ_NEXT(prev, fr_next);
 	}
 
-	KASSERT(prev != NULL || after != NULL,
-	    ("prev != NULL || after != NULL"));
-
 	if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
 		uint16_t precut;
 
@@ -493,17 +641,12 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
 
 		/* This fragment is completely overlapped, lose it. */
 		next = TAILQ_NEXT(after, fr_next);
-		frag->fr_holes -= pf_frent_holes(after);
+		pf_frent_remove(frag, after);
 		m_freem(after->fe_m);
-		TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
 		uma_zfree(V_pf_frent_z, after);
 	}
 
-	if (prev == NULL)
-		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
-	else
-		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
-	frag->fr_holes += pf_frent_holes(frent);
+	pf_frent_insert(frag, frent, prev);
 
 	return (frag);
 


More information about the dev-commits-src-all mailing list