git: ce02bfa524b2 - stable/13 - pf: depessimize table handling

Mateusz Guzik mjg at FreeBSD.org
Wed Jul 14 14:54:40 UTC 2021


The branch stable/13 has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=ce02bfa524b279ae0d5436b38b15353e523f5a92

commit ce02bfa524b279ae0d5436b38b15353e523f5a92
Author:     Mateusz Guzik <mjg at FreeBSD.org>
AuthorDate: 2021-07-02 11:19:56 +0000
Commit:     Mateusz Guzik <mjg at FreeBSD.org>
CommitDate: 2021-07-14 14:50:12 +0000

    pf: depessimize table handling
    
    Creating tables and zeroing their counters induces excessive IPIs (14
    per table), which in turns kills single- and multi-threaded performance.
    
    Work around the problem by extending per-CPU counters with a general
    counter populated on "zeroing" requests -- it stores the currently found
    sum. Then requests to report the current value are the sum of per-CPU
    counters subtracted by the saved value.
    
    Sample timings when loading a config with 100k tables on a 104-way box:
    
    stock:
    
    pfctl -f tables100000.conf  0.39s user 69.37s system 99% cpu 1:09.76 total
    pfctl -f tables100000.conf  0.40s user 68.14s system 99% cpu 1:08.54 total
    
    patched:
    
    pfctl -f tables100000.conf  0.35s user 6.41s system 99% cpu 6.771 total
    pfctl -f tables100000.conf  0.48s user 6.47s system 99% cpu 6.949 total
    
    Reviewed by:    kp (previous version)
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    
    (cherry picked from commit f92c21a28cd856834249a008771b2f002e477a39)
---
 sys/net/pfvar.h           | 62 ++++++++++++++++++++++++++++++++++++++++++++---
 sys/netpfil/pf/pf_table.c | 58 +++++++++++++++++++++-----------------------
 2 files changed, 85 insertions(+), 35 deletions(-)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 5d4860ec981a..3485592ffec7 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -814,16 +814,70 @@ struct pfr_tstats {
 	int		 pfrts_refcnt[PFR_REFCNT_MAX];
 };
 
+#ifdef _KERNEL
+
+struct pfr_kstate_counter {
+	counter_u64_t	pkc_pcpu;
+	u_int64_t	pkc_zero;
+};
+
+static inline int
+pfr_kstate_counter_init(struct pfr_kstate_counter *pfrc, int flags)
+{
+
+	pfrc->pkc_zero = 0;
+	pfrc->pkc_pcpu = counter_u64_alloc(flags);
+	if (pfrc->pkc_pcpu == NULL)
+		return (ENOMEM);
+	return (0);
+}
+
+static inline void
+pfr_kstate_counter_deinit(struct pfr_kstate_counter *pfrc)
+{
+
+	counter_u64_free(pfrc->pkc_pcpu);
+}
+
+static inline u_int64_t
+pfr_kstate_counter_fetch(struct pfr_kstate_counter *pfrc)
+{
+	u_int64_t c;
+
+	c = counter_u64_fetch(pfrc->pkc_pcpu);
+	c -= pfrc->pkc_zero;
+	return (c);
+}
+
+static inline void
+pfr_kstate_counter_zero(struct pfr_kstate_counter *pfrc)
+{
+	u_int64_t c;
+
+	c = counter_u64_fetch(pfrc->pkc_pcpu);
+	pfrc->pkc_zero = c;
+}
+
+static inline void
+pfr_kstate_counter_add(struct pfr_kstate_counter *pfrc, int64_t n)
+{
+
+	counter_u64_add(pfrc->pkc_pcpu, n);
+}
+
 struct pfr_ktstats {
 	struct pfr_table pfrts_t;
-	counter_u64_t	 pfrkts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
-	counter_u64_t	 pfrkts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
-	counter_u64_t	 pfrkts_match;
-	counter_u64_t	 pfrkts_nomatch;
+	struct pfr_kstate_counter	 pfrkts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+	struct pfr_kstate_counter	 pfrkts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX];
+	struct pfr_kstate_counter	 pfrkts_match;
+	struct pfr_kstate_counter	 pfrkts_nomatch;
 	long		 pfrkts_tzero;
 	int		 pfrkts_cnt;
 	int		 pfrkts_refcnt[PFR_REFCNT_MAX];
 };
+
+#endif /* _KERNEL */
+
 #define	pfrts_name	pfrts_t.pfrt_name
 #define pfrts_flags	pfrts_t.pfrt_flags
 
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index f643790ff620..cd7d96eacd13 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -1326,15 +1326,15 @@ pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size,
 		for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
 			for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
 				tbl->pfrts_packets[pfr_dir][pfr_op] =
-				    counter_u64_fetch(
-					p->pfrkt_packets[pfr_dir][pfr_op]);
+				    pfr_kstate_counter_fetch(
+					&p->pfrkt_packets[pfr_dir][pfr_op]);
 				tbl->pfrts_bytes[pfr_dir][pfr_op] =
-				    counter_u64_fetch(
-					p->pfrkt_bytes[pfr_dir][pfr_op]);
+				    pfr_kstate_counter_fetch(
+					&p->pfrkt_bytes[pfr_dir][pfr_op]);
 			}
 		}
-		tbl->pfrts_match = counter_u64_fetch(p->pfrkt_match);
-		tbl->pfrts_nomatch = counter_u64_fetch(p->pfrkt_nomatch);
+		tbl->pfrts_match = pfr_kstate_counter_fetch(&p->pfrkt_match);
+		tbl->pfrts_nomatch = pfr_kstate_counter_fetch(&p->pfrkt_nomatch);
 		tbl->pfrts_tzero = p->pfrkt_tzero;
 		tbl->pfrts_cnt = p->pfrkt_cnt;
 		for (pfr_op = 0; pfr_op < PFR_REFCNT_MAX; pfr_op++)
@@ -1870,12 +1870,12 @@ pfr_clstats_ktable(struct pfr_ktable *kt, long tzero, int recurse)
 	}
 	for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
 		for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
-			counter_u64_zero(kt->pfrkt_packets[pfr_dir][pfr_op]);
-			counter_u64_zero(kt->pfrkt_bytes[pfr_dir][pfr_op]);
+			pfr_kstate_counter_zero(&kt->pfrkt_packets[pfr_dir][pfr_op]);
+			pfr_kstate_counter_zero(&kt->pfrkt_bytes[pfr_dir][pfr_op]);
 		}
 	}
-	counter_u64_zero(kt->pfrkt_match);
-	counter_u64_zero(kt->pfrkt_nomatch);
+	pfr_kstate_counter_zero(&kt->pfrkt_match);
+	pfr_kstate_counter_zero(&kt->pfrkt_nomatch);
 	kt->pfrkt_tzero = tzero;
 }
 
@@ -1905,28 +1905,24 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset)
 
 	for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
 		for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
-			kt->pfrkt_packets[pfr_dir][pfr_op] =
-			    counter_u64_alloc(M_NOWAIT);
-			if (! kt->pfrkt_packets[pfr_dir][pfr_op]) {
+			if (pfr_kstate_counter_init(
+			    &kt->pfrkt_packets[pfr_dir][pfr_op], M_NOWAIT) != 0) {
 				pfr_destroy_ktable(kt, 0);
 				return (NULL);
 			}
-			kt->pfrkt_bytes[pfr_dir][pfr_op] =
-			    counter_u64_alloc(M_NOWAIT);
-			if (! kt->pfrkt_bytes[pfr_dir][pfr_op]) {
+			if (pfr_kstate_counter_init(
+			    &kt->pfrkt_bytes[pfr_dir][pfr_op], M_NOWAIT) != 0) {
 				pfr_destroy_ktable(kt, 0);
 				return (NULL);
 			}
 		}
 	}
-	kt->pfrkt_match = counter_u64_alloc(M_NOWAIT);
-	if (! kt->pfrkt_match) {
+	if (pfr_kstate_counter_init(&kt->pfrkt_match, M_NOWAIT) != 0) {
 		pfr_destroy_ktable(kt, 0);
 		return (NULL);
 	}
 
-	kt->pfrkt_nomatch = counter_u64_alloc(M_NOWAIT);
-	if (! kt->pfrkt_nomatch) {
+	if (pfr_kstate_counter_init(&kt->pfrkt_nomatch, M_NOWAIT) != 0) {
 		pfr_destroy_ktable(kt, 0);
 		return (NULL);
 	}
@@ -1977,12 +1973,12 @@ pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr)
 	}
 	for (pfr_dir = 0; pfr_dir < PFR_DIR_MAX; pfr_dir ++) {
 		for (pfr_op = 0; pfr_op < PFR_OP_TABLE_MAX; pfr_op ++) {
-			counter_u64_free(kt->pfrkt_packets[pfr_dir][pfr_op]);
-			counter_u64_free(kt->pfrkt_bytes[pfr_dir][pfr_op]);
+			pfr_kstate_counter_deinit(&kt->pfrkt_packets[pfr_dir][pfr_op]);
+			pfr_kstate_counter_deinit(&kt->pfrkt_bytes[pfr_dir][pfr_op]);
 		}
 	}
-	counter_u64_free(kt->pfrkt_match);
-	counter_u64_free(kt->pfrkt_nomatch);
+	pfr_kstate_counter_deinit(&kt->pfrkt_match);
+	pfr_kstate_counter_deinit(&kt->pfrkt_nomatch);
 
 	free(kt, M_PFTABLE);
 }
@@ -2052,9 +2048,9 @@ pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af)
 	}
 	match = (ke && !ke->pfrke_not);
 	if (match)
-		counter_u64_add(kt->pfrkt_match, 1);
+		pfr_kstate_counter_add(&kt->pfrkt_match, 1);
 	else
-		counter_u64_add(kt->pfrkt_nomatch, 1);
+		pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1);
 	return (match);
 }
 
@@ -2109,8 +2105,8 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af,
 			    ("pfr_update_stats: assertion failed.\n"));
 		op_pass = PFR_OP_XPASS;
 	}
-	counter_u64_add(kt->pfrkt_packets[dir_out][op_pass], 1);
-	counter_u64_add(kt->pfrkt_bytes[dir_out][op_pass], len);
+	pfr_kstate_counter_add(&kt->pfrkt_packets[dir_out][op_pass], 1);
+	pfr_kstate_counter_add(&kt->pfrkt_bytes[dir_out][op_pass], len);
 	if (ke != NULL && op_pass != PFR_OP_XPASS &&
 	    (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) {
 		counter_u64_add(pfr_kentry_counter(&ke->pfrke_counters,
@@ -2206,7 +2202,7 @@ pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
 _next_block:
 	ke = pfr_kentry_byidx(kt, idx, af);
 	if (ke == NULL) {
-		counter_u64_add(kt->pfrkt_nomatch, 1);
+		pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1);
 		return (1);
 	}
 	pfr_prepare_network(&umask, af, ke->pfrke_net);
@@ -2231,7 +2227,7 @@ _next_block:
 		/* this is a single IP address - no possible nested block */
 		PF_ACPY(counter, addr, af);
 		*pidx = idx;
-		counter_u64_add(kt->pfrkt_match, 1);
+		pfr_kstate_counter_add(&kt->pfrkt_match, 1);
 		return (0);
 	}
 	for (;;) {
@@ -2251,7 +2247,7 @@ _next_block:
 			/* lookup return the same block - perfect */
 			PF_ACPY(counter, addr, af);
 			*pidx = idx;
-			counter_u64_add(kt->pfrkt_match, 1);
+			pfr_kstate_counter_add(&kt->pfrkt_match, 1);
 			return (0);
 		}
 


More information about the dev-commits-src-all mailing list