git: 20c4899a8eea - main - pf: Do not hold PF_RULES_RLOCK while processing Ethernet rules

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Wed, 02 Mar 2022 16:00:43 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=20c4899a8eea4417b1717c06a47bfc0494d64085

commit 20c4899a8eea4417b1717c06a47bfc0494d64085
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2021-02-10 12:28:14 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2022-03-02 16:00:03 +0000

    pf: Do not hold PF_RULES_RLOCK while processing Ethernet rules
    
    Avoid the overhead of acquiring a (read) RULES lock when processing the
    Ethernet rules.
    We can get away with that because when rules are modified they're staged
    in V_pf_keth_inactive. We take care to ensure the swap to V_pf_keth is
    atomic, so that pf_test_eth_rule() always sees either the old rules, or
    the new ruleset.
    
    We need to take care not to delete the old ruleset until we're sure no
    pf_test_eth_rule() is still running with those. We accomplish that by
    using NET_EPOCH_CALL() to actually free the old rules.
    
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:  https://reviews.freebsd.org/D31739
---
 sys/net/pfvar.h             |  3 +++
 sys/netpfil/pf/pf.c         | 20 ++++++--------------
 sys/netpfil/pf/pf_ioctl.c   | 36 +++++++++++++++++++++++++++++++++---
 sys/netpfil/pf/pf_ruleset.c |  1 +
 4 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 51b7623a5c61..a0b23759857a 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -39,6 +39,7 @@
 #include <sys/queue.h>
 #include <sys/counter.h>
 #include <sys/cpuset.h>
+#include <sys/epoch.h>
 #include <sys/malloc.h>
 #include <sys/nv.h>
 #include <sys/refcount.h>
@@ -624,6 +625,8 @@ struct pf_keth_settings {
 	struct pf_keth_rules	rules;
 	uint32_t		ticket;
 	int			open;
+	struct vnet		*vnet;
+	struct epoch_context	epoch_ctx;
 };
 
 union pf_krule_ptr {
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 8993e5a8698d..c45880a6974b 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -3712,18 +3712,18 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf *m)
 	struct ether_header *e;
 	struct pf_keth_rule *r, *rm;
 	struct pf_mtag *mtag;
+	struct pf_keth_settings *settings;
 	uint8_t action;
 
-	PF_RULES_RLOCK_TRACKER;
+	NET_EPOCH_ASSERT();
 
 	MPASS(kif->pfik_ifp->if_vnet == curvnet);
 	NET_EPOCH_ASSERT();
 
 	e = mtod(m, struct ether_header *);
 
-	PF_RULES_RLOCK();
-
-	r = TAILQ_FIRST(&V_pf_keth->rules);
+	settings = ck_pr_load_ptr(&V_pf_keth);
+	r = TAILQ_FIRST(&settings->rules);
 	rm = NULL;
 
 	while (r != NULL) {
@@ -3753,26 +3753,21 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf *m)
 	r = rm;
 
 	/* Default to pass. */
-	if (r == NULL) {
-		PF_RULES_RUNLOCK();
+	if (r == NULL)
 		return (PF_PASS);
-	}
 
 	/* Execute action. */
 	counter_u64_add(r->packets[dir == PF_OUT], 1);
 	counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL));
 
 	/* Shortcut. Don't tag if we're just going to drop anyway. */
-	if (r->action == PF_DROP) {
-		PF_RULES_RUNLOCK();
+	if (r->action == PF_DROP)
 		return (PF_DROP);
-	}
 
 	if (r->tag > 0) {
 		mtag = pf_get_mtag(m);
 		if (mtag == NULL) {
 			counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
-			PF_RULES_RUNLOCK();
 			return (PF_DROP);
 		}
 		mtag->tag = r->tag;
@@ -3782,7 +3777,6 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf *m)
 		mtag = pf_get_mtag(m);
 		if (mtag == NULL) {
 			counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
-			PF_RULES_RUNLOCK();
 			return (PF_DROP);
 		}
 		mtag->qid = r->qid;
@@ -3790,8 +3784,6 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf *m)
 
 	action = r->action;
 
-	PF_RULES_RUNLOCK();
-
 	return (action);
 }
 
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index d16a52c79b26..b2537720bb7e 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -107,6 +107,7 @@ static void		 pf_empty_kpool(struct pf_kpalist *);
 static int		 pfioctl(struct cdev *, u_long, caddr_t, int,
 			    struct thread *);
 static int		 pf_begin_eth(uint32_t *);
+static void		 pf_rollback_eth_cb(struct epoch_context *);
 static int		 pf_rollback_eth(uint32_t);
 static int		 pf_commit_eth(uint32_t);
 static void		 pf_free_eth_rule(struct pf_keth_rule *);
@@ -701,6 +702,12 @@ pf_begin_eth(uint32_t *ticket)
 
 	PF_RULES_WASSERT();
 
+	if (V_pf_keth_inactive->open) {
+		/* We may be waiting for NET_EPOCH_CALL(pf_rollback_eth_cb) to
+		 * finish. */
+		return (EBUSY);
+	}
+
 	/* Purge old inactive rules. */
 	TAILQ_FOREACH_SAFE(rule, &V_pf_keth_inactive->rules, entries, tmp) {
 		TAILQ_REMOVE(&V_pf_keth_inactive->rules, rule, entries);
@@ -713,6 +720,24 @@ pf_begin_eth(uint32_t *ticket)
 	return (0);
 }
 
+static void
+pf_rollback_eth_cb(struct epoch_context *ctx)
+{
+	struct pf_keth_settings *settings;
+
+	settings = __containerof(ctx, struct pf_keth_settings, epoch_ctx);
+
+	CURVNET_SET(settings->vnet);
+
+	MPASS(settings == V_pf_keth_inactive);
+
+	PF_RULES_WLOCK();
+	pf_rollback_eth(V_pf_keth_inactive->ticket);
+	PF_RULES_WUNLOCK();
+
+	CURVNET_RESTORE();
+}
+
 static int
 pf_rollback_eth(uint32_t ticket)
 {
@@ -780,15 +805,20 @@ pf_commit_eth(uint32_t ticket)
 	    ticket != V_pf_keth_inactive->ticket)
 		return (EBUSY);
 
+	PF_RULES_WASSERT();
+
 	pf_eth_calc_skip_steps(&V_pf_keth_inactive->rules);
 
 	settings = V_pf_keth;
-	V_pf_keth = V_pf_keth_inactive;
+	ck_pr_store_ptr(&V_pf_keth, V_pf_keth_inactive);
 	V_pf_keth_inactive = settings;
 	V_pf_keth_inactive->ticket = V_pf_keth->ticket;
 
-	/* Clean up inactive rules. */
-	return (pf_rollback_eth(ticket));
+	/* Clean up inactive rules (i.e. previously active rules), only when
+	 * we're sure they're no longer used. */
+	NET_EPOCH_CALL(pf_rollback_eth_cb, &V_pf_keth_inactive->epoch_ctx);
+
+	return (0);
 }
 
 #ifdef ALTQ
diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c
index 66335b5b104e..038d82bf2d81 100644
--- a/sys/netpfil/pf/pf_ruleset.c
+++ b/sys/netpfil/pf/pf_ruleset.c
@@ -154,6 +154,7 @@ pf_init_keth(struct pf_keth_settings *settings)
 	TAILQ_INIT(&settings->rules);
 	settings->ticket = 0;
 	settings->open = 0;
+	settings->vnet = curvnet;
 }
 
 struct pf_kruleset *