git: 04dcbb44340f - main - pf: Prevent infinite looping over tables in round-robin pools

From: Kajetan Staszkiewicz <ks_at_FreeBSD.org>
Date: Sun, 13 Jul 2025 13:11:40 UTC
The branch main has been updated by ks:

URL: https://cgit.FreeBSD.org/src/commit/?id=04dcbb44340f3e42b130282112e9f741fccffc9f

commit 04dcbb44340f3e42b130282112e9f741fccffc9f
Author:     Kajetan Staszkiewicz <ks@FreeBSD.org>
AuthorDate: 2025-06-10 14:22:43 +0000
Commit:     Kajetan Staszkiewicz <ks@FreeBSD.org>
CommitDate: 2025-07-13 13:11:21 +0000

    pf: Prevent infinite looping over tables in round-robin pools
    
    In FreeBSD each redirection pool (struct pf_kpool) consists of multiple
    hosts (struct pf_addr_wrap). In OpenBSD that is not the case, and a
    round-robin pool having a table as a host loops infinitely only over
    that single table.
    
    In FreeBSD once all addresses from a table are returned the pool must
    iterate to the next host. Add a custom flag to have pfr_pool_get() break
    its loop once it reaches the last index. Use this flag in round-robin
    pools. When changing pool's host set index to 0 to always start
    iterating each table from beginning.
    
    Reviewed by:  kp
    Approved by:  kp
    Sponsored by: InnoGames GmbH
    Differential Revision:    https://reviews.freebsd.org/D50779
---
 sys/net/pfvar.h                  |  2 +-
 sys/netpfil/pf/pf_lb.c           | 16 ++++----
 sys/netpfil/pf/pf_table.c        |  4 +-
 tests/sys/netpfil/pf/route_to.sh | 82 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+), 10 deletions(-)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 8ee4d00daaff..1f2011634695 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -2500,7 +2500,7 @@ int	pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
 void	pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t,
 	    u_int64_t, int, int, int);
 int	pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t,
-	    pf_addr_filter_func_t);
+	    pf_addr_filter_func_t, bool);
 void	pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *);
 struct pfr_ktable *
 	pfr_attach_table(struct pf_kruleset *, char *);
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 9db1d88ce52e..26f7ab41eef4 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -617,7 +617,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 				rpool->tblidx = (int)arc4random_uniform(cnt);
 			memset(&rpool->counter, 0, sizeof(rpool->counter));
 			if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
-			    af, pf_islinklocal)) {
+			    af, pf_islinklocal, false)) {
 				reason = PFRES_MAPFAILED;
 				goto done_pool_mtx; /* unsupported */
 			}
@@ -684,7 +684,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 				rpool->tblidx = (int)(hashidx % cnt);
 			memset(&rpool->counter, 0, sizeof(rpool->counter));
 			if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
-			    af, pf_islinklocal)) {
+			    af, pf_islinklocal, false)) {
 				reason = PFRES_MAPFAILED;
 				goto done_pool_mtx; /* unsupported */
 			}
@@ -701,11 +701,12 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
-			    &rpool->tblidx, &rpool->counter, af, NULL))
+			    &rpool->tblidx, &rpool->counter, af, NULL, true))
 				goto get_addr;
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
-			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
+			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal,
+			    true))
 				goto get_addr;
 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
 			goto get_addr;
@@ -715,9 +716,10 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 			rpool->cur = TAILQ_FIRST(&rpool->list);
 		else
 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+		rpool->tblidx = -1;
 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
-			    &rpool->tblidx, &rpool->counter, af, NULL)) {
+			    &rpool->tblidx, &rpool->counter, af, NULL, true)) {
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
@@ -725,9 +727,9 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
 				goto done_pool_mtx;
 			}
 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
-			rpool->tblidx = -1;
 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
-			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
+			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal,
+			    true)) {
 				/* table contains no address of type 'af' */
 				if (rpool->cur != acur)
 					goto try_next;
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index 2034f4422ef1..e3f3ab9025f7 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -2294,7 +2294,7 @@ pfr_detach_table(struct pfr_ktable *kt)
 
 int
 pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter,
-    sa_family_t af, pf_addr_filter_func_t filter)
+    sa_family_t af, pf_addr_filter_func_t filter, bool loop_once)
 {
 	struct pf_addr		*addr, cur, mask, umask_addr;
 	union sockaddr_union	 uaddr, umask;
@@ -2339,7 +2339,7 @@ _next_block:
 	ke = pfr_kentry_byidx(kt, idx, af);
 	if (ke == NULL) {
 		/* we don't have this idx, try looping */
-		if (loop || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) {
+		if ((loop || loop_once) || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) {
 			pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1);
 			return (1);
 		}
diff --git a/tests/sys/netpfil/pf/route_to.sh b/tests/sys/netpfil/pf/route_to.sh
index 91c12c8ce2ae..fd1653cce311 100644
--- a/tests/sys/netpfil/pf/route_to.sh
+++ b/tests/sys/netpfil/pf/route_to.sh
@@ -893,6 +893,87 @@ empty_pool_cleanup()
 }
 
 
+atf_test_case "table_loop" "cleanup"
+
+table_loop_head()
+{
+	atf_set descr 'Check that iterating over tables poperly loops'
+	atf_set require.user root
+}
+
+table_loop_body()
+{
+	setup_router_server_nat64
+
+	# Clients will connect from another network behind the router.
+	# This allows for using multiple source addresses.
+	jexec router route add -6 ${net_clients_6}::/${net_clients_6_mask} ${net_tester_6_host_tester}
+	jexec router route add    ${net_clients_4}.0/${net_clients_4_mask} ${net_tester_4_host_tester}
+
+	# The servers are reachable over additional IP addresses for
+	# testing of tables and subnets. The addresses are noncontinougnus
+	# for pf_map_addr() counter tests.
+	for i in 0 1 4 5; do
+		a1=$((24 + i))
+		jexec server1 ifconfig ${epair_server1}b inet  ${net_server1_4}.${a1}/32 alias
+		jexec server1 ifconfig ${epair_server1}b inet6 ${net_server1_6}::42:${i}/128 alias
+		a2=$((40 + i))
+		jexec server2 ifconfig ${epair_server2}b inet  ${net_server2_4}.${a2}/32 alias
+		jexec server2 ifconfig ${epair_server2}b inet6 ${net_server2_6}::42:${i}/128 alias
+	done
+
+	jexec router pfctl -e
+	pft_set_rules router \
+		"set debug loud" \
+		"set reassemble yes" \
+		"set state-policy if-bound" \
+		"table <rt_targets_1> { ${net_server1_6}::42:4/127 ${net_server1_6}::42:0/127 }" \
+		"table <rt_targets_2> { ${net_server2_6}::42:4/127 }" \
+		"pass in on ${epair_tester}b \
+			route-to { \
+			(${epair_server1}a <rt_targets_1>) \
+			(${epair_server2}a <rt_targets_2_empty>) \
+			(${epair_server2}a <rt_targets_2>) \
+			} \
+		inet6 proto tcp \
+		keep state"
+
+	# Both hosts of the pool are tables. Each table gets iterated over once,
+	# then the pool iterates to the next host, which is also iterated,
+	# then the pool loops back to the 1st host. If an empty table is found,
+	# it is skipped. Unless that's the only table, that is tested by
+	# the "empty_pool" test.
+	for port in $(seq 1 7); do
+	port=$((4200 + port))
+		atf_check -s exit:0 ${common_dir}/pft_ping.py \
+			--sendif ${epair_tester}a --replyif ${epair_tester}a \
+			--fromaddr ${net_clients_6}::1 --to ${host_server_6} \
+			--ping-type=tcp3way --send-sport=${port}
+	done
+
+	states=$(mktemp) || exit 1
+	jexec router pfctl -qvvss | normalize_pfctl_s > $states
+	cat $states
+
+	for state_regexp in \
+		"${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4201\] .* route-to: ${net_server1_6}::42:0@${epair_server1}a" \
+		"${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4202\] .* route-to: ${net_server1_6}::42:1@${epair_server1}a" \
+		"${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4203\] .* route-to: ${net_server1_6}::42:4@${epair_server1}a" \
+		"${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4204\] .* route-to: ${net_server1_6}::42:5@${epair_server1}a" \
+		"${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4205\] .* route-to: ${net_server2_6}::42:4@${epair_server2}a" \
+		"${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4206\] .* route-to: ${net_server2_6}::42:5@${epair_server2}a" \
+		"${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4207\] .* route-to: ${net_server1_6}::42:0@${epair_server1}a" \
+	; do
+		grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'"
+	done
+}
+
+table_loop_cleanup()
+{
+	pft_cleanup
+}
+
+
 atf_init_test_cases()
 {
 	atf_add_test_case "v4"
@@ -912,4 +993,5 @@ atf_init_test_cases()
 	atf_add_test_case "sticky"
 	atf_add_test_case "ttl"
 	atf_add_test_case "empty_pool"
+	atf_add_test_case "table_loop"
 }