git: 650607380c58 - stable/13 - pf: protect the rpool from races

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Fri, 04 Feb 2022 14:23:22 UTC
The branch stable/13 has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=650607380c58115ad1bb757a965087f9f35b2f88

commit 650607380c58115ad1bb757a965087f9f35b2f88
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2022-01-10 16:49:26 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2022-02-04 10:37:14 +0000

    pf: protect the rpool from races
    
    The roundrobin pool stores its state in the rule, which could
    potentially lead to invalid addresses being returned.
    
    For example, thread A just executed PF_AINC(&rpool->counter) and
    immediately afterwards thread B executes PF_ACPY(naddr, &rpool->counter)
    (i.e. after the pf_match_addr() check of rpool->counter).
    
    Lock the rpool with its own mutex to prevent these races. The
    performance impact of this is expected to be low, as each rule has its
    own lock, and the lock is also only relevant when state is being created
    (so only for the initial packets of a connection, not for all traffic).
    
    See also:       https://redmine.pfsense.org/issues/12660
    Reviewed by:    glebius
    MFC after:      3 weeks
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:  https://reviews.freebsd.org/D33874
    
    (cherry picked from commit 5f5e32f1b3945087a687c5962071d3f46e34e1ff)
---
 sys/net/pfvar.h           |  1 +
 sys/netpfil/pf/pf_ioctl.c |  4 ++++
 sys/netpfil/pf/pf_lb.c    | 46 +++++++++++++++++++---------------------------
 3 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index f9071546cbce..8bf01eb11f4f 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -547,6 +547,7 @@ struct pf_kpooladdr {
 TAILQ_HEAD(pf_kpalist, pf_kpooladdr);
 
 struct pf_kpool {
+	struct mtx		 mtx;
 	struct pf_kpalist	 list;
 	struct pf_kpooladdr	*cur;
 	struct pf_poolhashkey	 key;
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index 6fdce31ad308..0122389c0b08 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -1551,6 +1551,8 @@ pf_krule_free(struct pf_krule *rule)
 	counter_u64_free(rule->states_cur);
 	counter_u64_free(rule->states_tot);
 	counter_u64_free(rule->src_nodes);
+
+	mtx_destroy(&rule->rpool.mtx);
 	free(rule, M_PFRULE);
 }
 
@@ -2146,6 +2148,8 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
 	TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
 	    rule, entries);
 	ruleset->rules[rs_num].inactive.rcount++;
+
+	mtx_init(&rule->rpool.mtx, "pf_krule_pool", NULL, MTX_DEF);
 	PF_RULES_WUNLOCK();
 
 	return (0);
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 3da27c7df26d..3190e5311ff5 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -374,36 +374,45 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 		return (0);
 	}
 
+	mtx_lock(&rpool->mtx);
 	/* Find the route using chosen algorithm. Store the found route
 	   in src_node if it was given or found. */
-	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
+	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
+		mtx_unlock(&rpool->mtx);
 		return (1);
+	}
 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
 			    (rpool->opts & PF_POOL_TYPEMASK) !=
-			    PF_POOL_ROUNDROBIN)
+			    PF_POOL_ROUNDROBIN) {
+				mtx_unlock(&rpool->mtx);
 				return (1);
-			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
-			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
+			}
+			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
+			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
 			    (rpool->opts & PF_POOL_TYPEMASK) !=
-			    PF_POOL_ROUNDROBIN)
+			    PF_POOL_ROUNDROBIN) {
+				mtx_unlock(&rpool->mtx);
 				return (1);
+			}
 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
 			break;
 #endif /* INET6 */
 		}
 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
-		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
+		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
+			mtx_unlock(&rpool->mtx);
 			return (1); /* unsupported */
+		}
 	} else {
 		raddr = &rpool->cur->addr.v.a.addr;
 		rmask = &rpool->cur->addr.v.a.mask;
@@ -467,27 +476,6 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 	    {
 		struct pf_kpooladdr *acur = rpool->cur;
 
-		/*
-		 * XXXGL: in the round-robin case we need to store
-		 * the round-robin machine state in the rule, thus
-		 * forwarding thread needs to modify rule.
-		 *
-		 * This is done w/o locking, because performance is assumed
-		 * more important than round-robin precision.
-		 *
-		 * In the simpliest case we just update the "rpool->cur"
-		 * pointer. However, if pool contains tables or dynamic
-		 * addresses, then "tblidx" is also used to store machine
-		 * state. Since "tblidx" is int, concurrent access to it can't
-		 * lead to inconsistence, only to lost of precision.
-		 *
-		 * Things get worse, if table contains not hosts, but
-		 * prefixes. In this case counter also stores machine state,
-		 * and for IPv6 address, counter can't be updated atomically.
-		 * Probably, using round-robin on a table containing IPv6
-		 * prefixes (or even IPv4) would cause a panic.
-		 */
-
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
 			    &rpool->tblidx, &rpool->counter, af))
@@ -511,6 +499,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
+				mtx_unlock(&rpool->mtx);
 				return (1);
 			}
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
@@ -520,6 +509,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
+				mtx_unlock(&rpool->mtx);
 				return (1);
 			}
 		} else {
@@ -539,6 +529,8 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 	if (*sn != NULL)
 		PF_ACPY(&(*sn)->raddr, naddr, af);
 
+	mtx_unlock(&rpool->mtx);
+
 	if (V_pf_status.debug >= PF_DEBUG_MISC &&
 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
 		printf("pf_map_addr: selected address ");