git: c2346c3d3ab7 - main - pf: support source-hash and random with tables and dynifs, not just pools
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 20 Feb 2025 09:35:28 UTC
The branch main has been updated by kp:
URL: https://cgit.FreeBSD.org/src/commit/?id=c2346c3d3ab754299d1e75705997db6fb9a49420
commit c2346c3d3ab754299d1e75705997db6fb9a49420
Author: Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2025-02-13 13:14:56 +0000
Commit: Kristof Provost <kp@FreeBSD.org>
CommitDate: 2025-02-20 08:25:51 +0000
pf: support source-hash and random with tables and dynifs, not just pools
This finally allows to use source-hash for dynamic loadbalancing, eg.
"rdr-to <hosts> source-hash", instead of just round-robin and least-states.
An older pre-siphash version of this diff was tested by many people.
OK tedu@ benno@
Obtained from: OpenBSD, reyk <reyk@openbsd.org>, 252a05523f
Sponsored by: Rubicon Communications, LLC ("Netgate")
---
sbin/pfctl/parse.y | 43 ++++++++++--------------
share/man/man5/pf.conf.5 | 10 +++---
sys/netpfil/pf/pf.h | 5 +++
sys/netpfil/pf/pf_lb.c | 74 ++++++++++++++++++++++++++++++++++-------
tests/sys/netpfil/pf/nat64.sh | 76 +++++++++++++++++++++++++++++++++----------
5 files changed, 148 insertions(+), 60 deletions(-)
diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
index f3334c961909..8f732bdf268a 100644
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -2747,16 +2747,16 @@ pfrule : action dir logquick interface route af proto fromto
$5.host->addr.type == PF_ADDR_TABLE ||
DYNIF_MULTIADDR($5.host->addr)))
r.route.opts |= PF_POOL_ROUNDROBIN;
- if ((r.route.opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN &&
- disallow_table($5.host, "tables are only "
- "supported in round-robin routing pools"))
- YYERROR;
- if ((r.route.opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN &&
- disallow_alias($5.host, "interface (%s) "
- "is only supported in round-robin "
- "routing pools"))
+ if ($5.host->next != NULL &&
+ !PF_POOL_DYNTYPE(r.route.opts)) {
+ yyerror("address pool option "
+ "not supported by type");
+ }
+ if (!PF_POOL_DYNTYPE(r.route.opts) &&
+ (disallow_table($5.host,
+ "tables are not supported by pool type") ||
+ disallow_alias($5.host,
+ "interface (%s) is not supported by pool type")))
YYERROR;
if ($5.host->next != NULL) {
if ((r.route.opts & PF_POOL_TYPEMASK) !=
@@ -2829,10 +2829,9 @@ pfrule : action dir logquick interface route af proto fromto
r.nat.opts = $9.nat.pool_opts.type;
r.nat.opts |= $9.nat.pool_opts.opts;
- if ((r.nat.opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN &&
- disallow_table($9.nat.rdr->host, "tables are only "
- "supported in round-robin pools"))
+ if (!PF_POOL_DYNTYPE(r.nat.opts) &&
+ disallow_table($9.nat.rdr->host, "tables are not "
+ "supported by pool type"))
YYERROR;
}
@@ -4916,17 +4915,11 @@ natrule : nataction interface af proto fromto tag tagged rtable
$9->host->addr.type == PF_ADDR_TABLE ||
DYNIF_MULTIADDR($9->host->addr)))
r.rdr.opts = PF_POOL_ROUNDROBIN;
- if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN &&
- disallow_table($9->host, "tables are only "
- "supported in round-robin redirection "
- "pools"))
- YYERROR;
- if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN &&
- disallow_alias($9->host, "interface (%s) "
- "is only supported in round-robin "
- "redirection pools"))
+ if (!PF_POOL_DYNTYPE(r.rdr.opts) &&
+ (disallow_table($9->host,
+ "tables are not supported by pool type") ||
+ disallow_alias($9->host,
+ "interface (%s) is not supported by pool type")))
YYERROR;
if ($9->host->next != NULL) {
if ((r.rdr.opts & PF_POOL_TYPEMASK) !=
diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5
index 75cb0b39272f..1192cd3d02e8 100644
--- a/share/man/man5/pf.conf.5
+++ b/share/man/man5/pf.conf.5
@@ -27,7 +27,7 @@
.\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd February 11, 2025
+.Dd February 13, 2025
.Dt PF.CONF 5
.Os
.Sh NAME
@@ -132,8 +132,8 @@ Tables can also be used for the redirect address of
.Ar nat
and
.Ar rdr
-rules and in the routing options of filter rules, but only for
-.Ar round-robin
+and in the routing options of filter rules, but not for
+.Ar bitmask
pools.
.Pp
Tables can be defined with any of the following
@@ -2335,8 +2335,8 @@ The
option loops through the redirection address(es).
.Pp
When more than one redirection address is specified,
-.Ar round-robin
-is the only permitted pool type.
+.Ar bitmask
+is not permitted as a pool type.
.It Ar static-port
With
.Ar nat
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index dfa86e7f1d6d..b5c0eeaa8f01 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -133,6 +133,11 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
#define PF_WSCALE_FLAG 0x80
#define PF_WSCALE_MASK 0x0f
+#define PF_POOL_DYNTYPE(_o) \
+ ((((_o) & PF_POOL_TYPEMASK) == PF_POOL_ROUNDROBIN) || \
+ (((_o) & PF_POOL_TYPEMASK) == PF_POOL_RANDOM) || \
+ (((_o) & PF_POOL_TYPEMASK) == PF_POOL_SRCHASH))
+
#define PF_LOG 0x01
#define PF_LOG_ALL 0x02
#define PF_LOG_SOCKET_LOOKUP 0x04
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 83dd63fd2290..d1ba2495dc30 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -73,7 +73,7 @@ VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
#define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
-static void pf_hash(struct pf_addr *, struct pf_addr *,
+static uint64_t pf_hash(struct pf_addr *, struct pf_addr *,
struct pf_poolhashkey *, sa_family_t);
static struct pf_krule *pf_match_translation(struct pf_pdesc *,
int, struct pf_kanchor_stackframe *);
@@ -84,7 +84,7 @@ static int pf_get_sport(struct pf_pdesc *, struct pf_krule *,
pf_sn_types_t);
static bool pf_islinklocal(const sa_family_t, const struct pf_addr *);
-static void
+static uint64_t
pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
struct pf_poolhashkey *key, sa_family_t af)
{
@@ -95,20 +95,23 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
uint32_t hash32[2];
} h;
#endif
+ uint64_t res = 0;
_Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, "");
switch (af) {
#ifdef INET
case AF_INET:
- hash->addr32[0] = SipHash24(&ctx, (const uint8_t *)key,
+ res = SipHash24(&ctx, (const uint8_t *)key,
&inaddr->addr32[0], sizeof(inaddr->addr32[0]));
+ hash->addr32[0] = res;
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
- h.hash64 = SipHash24(&ctx, (const uint8_t *)key,
+ res = SipHash24(&ctx, (const uint8_t *)key,
&inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0]));
+ h.hash64 = res;
hash->addr32[0] = h.hash32[0];
hash->addr32[1] = h.hash32[1];
/*
@@ -120,6 +123,7 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
break;
#endif /* INET6 */
}
+ return (res);
}
static struct pf_krule *
@@ -459,6 +463,8 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
{
u_short reason = PFRES_MATCH;
struct pf_addr *raddr = NULL, *rmask = NULL;
+ uint64_t hashidx;
+ int cnt;
mtx_lock(&rpool->mtx);
/* Find the route using chosen algorithm. Store the found route
@@ -472,8 +478,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
#ifdef INET
case AF_INET:
if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
- (rpool->opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN) {
+ !PF_POOL_DYNTYPE(rpool->opts)) {
reason = PFRES_MAPFAILED;
goto done_pool_mtx;
}
@@ -484,8 +489,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
#ifdef INET6
case AF_INET6:
if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
- (rpool->opts & PF_POOL_TYPEMASK) !=
- PF_POOL_ROUNDROBIN) {
+ !PF_POOL_DYNTYPE(rpool->opts)) {
reason = PFRES_MAPFAILED;
goto done_pool_mtx;
}
@@ -495,7 +499,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
#endif /* INET6 */
}
} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
- if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
+ if (!PF_POOL_DYNTYPE(rpool->opts)) {
reason = PFRES_MAPFAILED;
goto done_pool_mtx; /* unsupported */
}
@@ -512,7 +516,28 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
PF_POOLMASK(naddr, raddr, rmask, saddr, af);
break;
case PF_POOL_RANDOM:
- if (init_addr != NULL && PF_AZERO(init_addr, af)) {
+ if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+ cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
+ rpool->tblidx = (int)arc4random_uniform(cnt);
+ memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (pfr_pool_get(rpool->cur->addr.p.tbl,
+ &rpool->tblidx, &rpool->counter, af, NULL)) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx; /* unsupported */
+ }
+ PF_ACPY(naddr, &rpool->counter, af);
+ } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+ cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
+ rpool->tblidx = (int)arc4random_uniform(cnt);
+ memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter, af,
+ pf_islinklocal)) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx; /* unsupported */
+ }
+ PF_ACPY(naddr, &rpool->counter, af);
+ } else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
switch (af) {
#ifdef INET
case AF_INET:
@@ -554,8 +579,33 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
{
unsigned char hash[16];
- pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
- PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
+ hashidx =
+ pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
+ if (rpool->cur->addr.type == PF_ADDR_TABLE) {
+ cnt = rpool->cur->addr.p.tbl->pfrkt_cnt;
+ rpool->tblidx = (int)(hashidx % cnt);
+ memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (pfr_pool_get(rpool->cur->addr.p.tbl,
+ &rpool->tblidx, &rpool->counter, af, NULL)) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx; /* unsupported */
+ }
+ PF_ACPY(naddr, &rpool->counter, af);
+ } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
+ cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
+ rpool->tblidx = (int)(hashidx % cnt);
+ memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter, af,
+ pf_islinklocal)) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx; /* unsupported */
+ }
+ PF_ACPY(naddr, &rpool->counter, af);
+ } else {
+ PF_POOLMASK(naddr, raddr, rmask,
+ (struct pf_addr *)&hash, af);
+ }
break;
}
case PF_POOL_ROUNDROBIN:
diff --git a/tests/sys/netpfil/pf/nat64.sh b/tests/sys/netpfil/pf/nat64.sh
index 9cc6aececc42..68d9191cacb9 100644
--- a/tests/sys/netpfil/pf/nat64.sh
+++ b/tests/sys/netpfil/pf/nat64.sh
@@ -409,7 +409,7 @@ pool_cleanup()
atf_test_case "table"
table_head()
{
- atf_set descr 'Tables require round-robin'
+ atf_set descr 'Check table restrictions'
atf_set require.user root
}
@@ -417,9 +417,18 @@ table_body()
{
pft_init
- echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr>" | \
+ # Round-robin and random are allowed
+ echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> round-robin" | \
+ atf_check -s exit:0 \
+ pfctl -f -
+ echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> random" | \
+ atf_check -s exit:0 \
+ pfctl -f -
+
+ # bitmask is not
+ echo "pass in on epair inet6 from any to 64:ff9b::/96 af-to inet from <wanaddr> bitmask" | \
atf_check -s exit:1 \
- -e match:"tables are only supported in round-robin pools" \
+ -e match:"tables are not supported by pool type" \
pfctl -f -
}
@@ -489,15 +498,11 @@ table_range_cleanup()
pft_cleanup
}
-atf_test_case "table_round_robin" "cleanup"
-table_round_robin_head()
+table_common_body()
{
- atf_set descr 'Use a table of IPv4 addresses in round-robin mode'
- atf_set require.user root
-}
+ pool_type=$1
+echo pool_type=${pool_type}
-table_round_robin_body()
-{
pft_init
epair_link=$(vnet_mkepair)
@@ -527,7 +532,7 @@ table_round_robin_body()
"set reassemble yes" \
"set state-policy if-bound" \
"table <wanaddrs> { 192.0.2.1, 192.0.2.3, 192.0.2.4 }" \
- "pass in on ${epair}b inet6 from any to 64:ff9b::/96 af-to inet from <wanaddrs> round-robin"
+ "pass in on ${epair}b inet6 from any to 64:ff9b::/96 af-to inet from <wanaddrs> ${pool_type}"
# Use pf to count sources
jexec dst pfctl -e
@@ -541,13 +546,30 @@ table_round_robin_body()
atf_check -s exit:0 -o ignore \
ping6 -c 1 64:ff9b::192.0.2.2
- # Verify on dst that we saw different source addresses
- atf_check -s exit:0 -o match:".*192.0.2.1.*" \
- jexec dst pfctl -ss
- atf_check -s exit:0 -o match:".*192.0.2.3.*" \
- jexec dst pfctl -ss
- atf_check -s exit:0 -o match:".*192.0.2.4.*" \
- jexec dst pfctl -ss
+ # XXX We can't reasonably check pool type random because it's random. It may end
+ # up choosing the same source IP for all three connections.
+ if [ "${pool_type}" == "round-robin" ];
+ then
+ # Verify on dst that we saw different source addresses
+ atf_check -s exit:0 -o match:".*192.0.2.1.*" \
+ jexec dst pfctl -ss
+ atf_check -s exit:0 -o match:".*192.0.2.3.*" \
+ jexec dst pfctl -ss
+ atf_check -s exit:0 -o match:".*192.0.2.4.*" \
+ jexec dst pfctl -ss
+ fi
+}
+
+atf_test_case "table_round_robin" "cleanup"
+table_round_robin_head()
+{
+ atf_set descr 'Use a table of IPv4 addresses in round-robin mode'
+ atf_set require.user root
+}
+
+table_round_robin_body()
+{
+ table_common_body round-robin
}
table_round_robin_cleanup()
@@ -555,6 +577,23 @@ table_round_robin_cleanup()
pft_cleanup
}
+atf_test_case "table_random" "cleanup"
+table_random_head()
+{
+ atf_set descr 'Use a table of IPv4 addresses in random mode'
+ atf_set require.user root
+}
+
+table_random_body()
+{
+ table_common_body random
+}
+
+table_random_cleanup()
+{
+ pft_cleanup
+}
+
atf_test_case "dummynet" "cleanup"
dummynet_head()
{
@@ -775,6 +814,7 @@ atf_init_test_cases()
atf_add_test_case "table"
atf_add_test_case "table_range"
atf_add_test_case "table_round_robin"
+ atf_add_test_case "table_random"
atf_add_test_case "dummynet"
atf_add_test_case "gateway6"
atf_add_test_case "route_to"