git: 65c318630123 - main - pf: Add prefer-ipv6-nexthop option for route-to pools
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 29 Aug 2025 09:23:16 UTC
The branch main has been updated by ks:
URL: https://cgit.FreeBSD.org/src/commit/?id=65c318630123fcf2b6f491bf4d02a5cad3031d20
commit 65c318630123fcf2b6f491bf4d02a5cad3031d20
Author: Kajetan Staszkiewicz <ks@FreeBSD.org>
AuthorDate: 2025-08-01 19:01:37 +0000
Commit: Kajetan Staszkiewicz <ks@FreeBSD.org>
CommitDate: 2025-08-29 07:58:40 +0000
pf: Add prefer-ipv6-nexthop option for route-to pools
Now that pf is aware of address family of each pool address and source
tracking uses distinct address family for source and redirection
adddresses it is possible to add a new pool option prefer-ipv6-nexthop
which enables routing of IPv4 packets over IPv6 next hops for rules
with the route-to option.
Add a pool option flag PF_POOL_IPV6NH, apply it to pools with a keyword
prefer-ipv6-nexthop.
Modify pf_map_addr() to handle pools with addresses of different
families. Use *naf as a hint about what address family the forwarded
packet is, then pick from the pool addresses of family that can be used
as a next hop for the forwarded packet, controlled by the PF_POOL_IPV6NH
flag. For NAT pools this flag is never set and thus pf_map_addr()
will return an IP address of the same family as the forwarded packet.
For route-to pools when the flag is enabled IPv6 addresses can be
returned or IPv4 packets.
In pf_route() check rt_af, it is not guaranteed to be AF_INET anymore
because pf_map_addr() could have changed it (as *naf).
Add tests for behaviour of pf_map_addr() both with PF_POOL_IPV6NH and
without, for single IP addresses, prefixes and subnets.
Reviewed by: kp
Sponsored by: InnoGames GmbH
Differential Revision: https://reviews.freebsd.org/D50781
---
sbin/pfctl/parse.y | 50 ++-
sbin/pfctl/pfctl_parser.c | 6 +-
sbin/pfctl/tests/files/pf1073.in | 1 +
sbin/pfctl/tests/files/pf1073.ok | 1 +
sbin/pfctl/tests/files/pf1074.fail | 1 +
sbin/pfctl/tests/files/pf1074.in | 1 +
sbin/pfctl/tests/pfctl_test_list.inc | 2 +
share/man/man5/pf.conf.5 | 18 +-
sys/net/pfvar.h | 3 +-
sys/netpfil/pf/if_pfsync.c | 8 +-
sys/netpfil/pf/pf.c | 63 ++-
sys/netpfil/pf/pf.h | 1 +
sys/netpfil/pf/pf_ioctl.c | 1 +
sys/netpfil/pf/pf_lb.c | 151 ++++++--
tests/sys/netpfil/pf/route_to.sh | 716 ++++++++++++++++++++++++++++++++++-
tests/sys/netpfil/pf/src_track.sh | 36 +-
tests/sys/netpfil/pf/utils.subr | 4 +-
17 files changed, 953 insertions(+), 110 deletions(-)
diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
index 00c36b218055..59c27d1f5d7c 100644
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -238,6 +238,7 @@ static struct pool_opts {
#define POM_TYPE 0x01
#define POM_STICKYADDRESS 0x02
#define POM_ENDPI 0x04
+#define POM_IPV6NH 0x08
u_int8_t opts;
int type;
int staticport;
@@ -543,7 +544,7 @@ int parseport(char *, struct range *r, int);
%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW ALLOW_RELATED
%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS
%token DIVERTTO DIVERTREPLY BRIDGE_TO RECEIVEDON NE LE GE AFTO NATTO RDRTO
-%token BINATTO MAXPKTRATE MAXPKTSIZE
+%token BINATTO MAXPKTRATE MAXPKTSIZE IPV6NH
%token <v.string> STRING
%token <v.number> NUMBER
%token <v.i> PORTBINARY
@@ -2648,13 +2649,16 @@ pfrule : action dir logquick interface route af proto fromto
YYERROR;
}
r.rt = $5.rt;
- decide_address_family($5.redirspec->host, &r.af);
- if (!(r.rule_flag & PFRULE_AFTO))
- remove_invalid_hosts(&($5.redirspec->host), &r.af);
- if ($5.redirspec->host == NULL) {
- yyerror("no routing address with "
- "matching address family found.");
- YYERROR;
+
+ if (!($5.redirspec->pool_opts.opts & PF_POOL_IPV6NH)) {
+ decide_address_family($5.redirspec->host, &r.af);
+ if (!(r.rule_flag & PFRULE_AFTO))
+ remove_invalid_hosts(&($5.redirspec->host), &r.af);
+ if ($5.redirspec->host == NULL) {
+ yyerror("no routing address with "
+ "matching address family found.");
+ YYERROR;
+ }
}
}
#ifdef __FreeBSD__
@@ -2978,7 +2982,8 @@ filter_opt : USER uids {
filter_opts.nat = $4;
filter_opts.nat->af = $2;
- if ($4->af && $4->af != $2) {
+ remove_invalid_hosts(&($4->host), &(filter_opts.nat->af));
+ if ($4->host == NULL) {
yyerror("af-to addresses must be in the "
"target address family");
YYERROR;
@@ -2998,8 +3003,9 @@ filter_opt : USER uids {
filter_opts.nat->af = $2;
filter_opts.rdr = $6;
filter_opts.rdr->af = $2;
- if (($4->af && $4->host->af != $2) ||
- ($6->af && $6->host->af != $2)) {
+ remove_invalid_hosts(&($4->host), &(filter_opts.nat->af));
+ remove_invalid_hosts(&($6->host), &(filter_opts.rdr->af));
+ if ($4->host == NULL || $6->host == NULL) {
yyerror("af-to addresses must be in the "
"target address family");
YYERROR;
@@ -4674,6 +4680,14 @@ pool_opt : BITMASK {
pool_opts.marker |= POM_ENDPI;
pool_opts.opts |= PF_POOL_ENDPI;
}
+ | IPV6NH {
+ if (pool_opts.marker & POM_IPV6NH) {
+ yyerror("prefer-ipv6-nexthop cannot be redefined");
+ YYERROR;
+ }
+ pool_opts.marker |= POM_IPV6NH;
+ pool_opts.opts |= PF_POOL_IPV6NH;
+ }
| MAPEPORTSET number '/' number '/' number {
if (pool_opts.mape.offset) {
yyerror("map-e-portset cannot be redefined");
@@ -4813,6 +4827,12 @@ natrule : nataction interface af proto fromto tag tagged rtable
"address'");
YYERROR;
}
+ if ($9->pool_opts.opts & PF_POOL_IPV6NH) {
+ yyerror("The prefer-ipv6-nexthop option "
+ "can't be used for nat/rdr/binat pools"
+ );
+ YYERROR;
+ }
if (!r.af && ! $9->host->ifindex)
r.af = $9->host->af;
@@ -5074,13 +5094,6 @@ route_host : STRING {
route_host_list : route_host optnl { $$ = $1; }
| route_host_list comma route_host optnl {
- if ($1->af == 0)
- $1->af = $3->af;
- if ($1->af != $3->af) {
- yyerror("all pool addresses must be in the "
- "same address family");
- YYERROR;
- }
$1->tail->next = $3;
$1->tail = $3->tail;
$$ = $1;
@@ -6678,6 +6691,7 @@ lookup(char *s)
{ "pass", PASS},
{ "pflow", PFLOW},
{ "port", PORT},
+ { "prefer-ipv6-nexthop", IPV6NH},
{ "prio", PRIO},
{ "priority", PRIORITY},
{ "priq", PRIQ},
diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c
index 18b78a150c28..3c4f9f6b4334 100644
--- a/sbin/pfctl/pfctl_parser.c
+++ b/sbin/pfctl/pfctl_parser.c
@@ -508,6 +508,8 @@ print_pool(struct pfctl_pool *pool, u_int16_t p1, u_int16_t p2, int id)
if (pool->mape.offset > 0)
printf(" map-e-portset %u/%u/%u",
pool->mape.offset, pool->mape.psidlen, pool->mape.psid);
+ if (pool->opts & PF_POOL_IPV6NH)
+ printf(" prefer-ipv6-nexthop");
}
void
@@ -1438,7 +1440,7 @@ ifa_add_groups_to_map(char *ifa_name)
ENTRY item;
ENTRY *ret_item;
int *answer;
-
+
item.key = ifg->ifgrq_group;
if (hsearch_r(item, FIND, &ret_item, &isgroup_map) == 0) {
struct ifgroupreq ifgr2;
@@ -1580,7 +1582,7 @@ is_a_group(char *name)
{
ENTRY item;
ENTRY *ret_item;
-
+
item.key = name;
if (hsearch_r(item, FIND, &ret_item, &isgroup_map) == 0)
return (0);
diff --git a/sbin/pfctl/tests/files/pf1073.in b/sbin/pfctl/tests/files/pf1073.in
new file mode 100644
index 000000000000..477995893ac3
--- /dev/null
+++ b/sbin/pfctl/tests/files/pf1073.in
@@ -0,0 +1 @@
+pass in on vtnet0 route-to ( vtnet1 2001:db8::1 ) prefer-ipv6-nexthop inet
diff --git a/sbin/pfctl/tests/files/pf1073.ok b/sbin/pfctl/tests/files/pf1073.ok
new file mode 100644
index 000000000000..f34867508c75
--- /dev/null
+++ b/sbin/pfctl/tests/files/pf1073.ok
@@ -0,0 +1 @@
+pass in on vtnet0 route-to (vtnet1 2001:db8::1) prefer-ipv6-nexthop inet all flags S/SA keep state
diff --git a/sbin/pfctl/tests/files/pf1074.fail b/sbin/pfctl/tests/files/pf1074.fail
new file mode 100644
index 000000000000..afe8ee3c458f
--- /dev/null
+++ b/sbin/pfctl/tests/files/pf1074.fail
@@ -0,0 +1 @@
+no routing address with matching address family found.
diff --git a/sbin/pfctl/tests/files/pf1074.in b/sbin/pfctl/tests/files/pf1074.in
new file mode 100644
index 000000000000..5d285bc5d6e8
--- /dev/null
+++ b/sbin/pfctl/tests/files/pf1074.in
@@ -0,0 +1 @@
+pass in on vtnet0 route-to ( vtnet1 2001:db8::1 ) inet
diff --git a/sbin/pfctl/tests/pfctl_test_list.inc b/sbin/pfctl/tests/pfctl_test_list.inc
index 3a68cc06ec74..8bfccddf50e5 100644
--- a/sbin/pfctl/tests/pfctl_test_list.inc
+++ b/sbin/pfctl/tests/pfctl_test_list.inc
@@ -181,3 +181,5 @@ PFCTL_TEST(1069, "max-pkt-size")
PFCTL_TEST_FAIL(1070, "include line number")
PFCTL_TEST(1071, "mask length on (lo0)")
PFCTL_TEST_FAIL(1072, "Invalid port range")
+PFCTL_TEST(1073, "Filter AF different than route-to AF, with prefer-ipv6-nexthop")
+PFCTL_TEST_FAIL(1074, "Filter AF different than route-to AF, without prefer-ipv6-nexthop")
diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5
index a9ae823257a4..bdd8a843d72a 100644
--- a/share/man/man5/pf.conf.5
+++ b/share/man/man5/pf.conf.5
@@ -2470,7 +2470,13 @@ NAT address and port.
This feature implements "full-cone" NAT behavior.
.El
.Pp
-Additionally, the
+Additionally, options
+.Ar sticky-address
+and
+.Ar prefer-ipv6-nexthop
+can be specified to influence how IP addresses selected from pools.
+.Pp
+The
.Ar sticky-address
option can be specified to help ensure that multiple connections from the
same source are mapped to the same redirection address.
@@ -2486,6 +2492,14 @@ beyond the lifetime of the states, increase the global options with
See
.Sx STATEFUL TRACKING OPTIONS
for more ways to control the source tracking.
+.Pp
+The
+.Ar prefer-ipv6-nexthop
+option allows for IPv6 addresses to be used as the nexthop
+for IPv4 packets routed with the
+.Ar route-to
+rule option. If a table is used with IPv4 and IPv6 addresses, first the IPv6 addresses
+will be used in round-robin fashion, then IPv4 addresses.
.Sh STATE MODULATION
Much of the security derived from TCP is attributable to how well the
initial sequence numbers (ISNs) are chosen.
@@ -3580,7 +3594,7 @@ limit-item = ( "states" | "frags" | "src-nodes" ) number
pooltype = ( "bitmask" | "random" |
"source-hash" [ ( hex-key | string-key ) ] |
- "round-robin" ) [ sticky-address ]
+ "round-robin" ) [ sticky-address | prefer-ipv6-nexthop ]
subqueue = string | "{" queue-list "}"
queue-list = string [ [ "," ] string ]
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index d6c13470f2eb..cf6d2508cf65 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -645,6 +645,7 @@ struct pf_kpool {
int tblidx;
u_int16_t proxy_port[2];
u_int8_t opts;
+ sa_family_t ipv6_nexthop_af;
};
struct pf_rule_actions {
@@ -2680,7 +2681,7 @@ u_short pf_map_addr(sa_family_t, struct pf_krule *,
struct pf_addr *, struct pf_kpool *);
u_short pf_map_addr_sn(u_int8_t, struct pf_krule *,
struct pf_addr *, struct pf_addr *,
- sa_family_t *, struct pfi_kkif **nkif,
+ sa_family_t *, struct pfi_kkif **,
struct pf_addr *, struct pf_kpool *,
pf_sn_types_t);
int pf_get_transaddr_af(struct pf_krule *,
diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c
index 585c196391c0..cfc300d99396 100644
--- a/sys/netpfil/pf/if_pfsync.c
+++ b/sys/netpfil/pf/if_pfsync.c
@@ -605,7 +605,8 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
rt_kif = rpool_first->kif;
/*
* Guess the AF of the route address, FreeBSD 13 does
- * not support af-to so it should be safe.
+ * not support af-to nor prefer-ipv6-nexthop
+ * so it should be safe.
*/
rt_af = r->af;
} else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) {
@@ -634,8 +635,9 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version)
}
rt = sp->pfs_1400.rt;
/*
- * Guess the AF of the route address, FreeBSD 13 does
- * not support af-to so it should be safe.
+ * Guess the AF of the route address, FreeBSD 14 does
+ * not support af-to nor prefer-ipv6-nexthop
+ * so it should be safe.
*/
rt_af = sp->pfs_1400.af;
}
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 8cd4fff95b15..4325835c7671 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -5960,7 +5960,9 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
if (r->rt) {
/*
* Set act.rt here instead of in pf_rule_to_actions() because
- * it is applied only from the last pass rule.
+ * it is applied only from the last pass rule. For rules
+ * with the prefer-ipv6-nexthop option act.rt_af is a hint
+ * about AF of the forwarded packet and might be changed.
*/
pd->act.rt = r->rt;
if (r->rt == PF_REPLYTO)
@@ -8974,9 +8976,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
struct pf_kstate *s, struct pf_pdesc *pd, struct inpcb *inp)
{
struct mbuf *m0, *m1, *md;
- struct route ro;
- const struct sockaddr *gw = &ro.ro_dst;
- struct sockaddr_in *dst;
+ struct route_in6 ro;
+ union sockaddr_union rt_gw;
+ const union sockaddr_union *gw = (const union sockaddr_union *)&ro.ro_dst;
+ union sockaddr_union *dst;
struct ip *ip;
struct ifnet *ifp = NULL;
int error = 0;
@@ -9071,10 +9074,35 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
ip = mtod(m0, struct ip *);
bzero(&ro, sizeof(ro));
- dst = (struct sockaddr_in *)&ro.ro_dst;
- dst->sin_family = AF_INET;
- dst->sin_len = sizeof(struct sockaddr_in);
- dst->sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
+ dst = (union sockaddr_union *)&ro.ro_dst;
+ dst->sin.sin_family = AF_INET;
+ dst->sin.sin_len = sizeof(struct sockaddr_in);
+ dst->sin.sin_addr = ip->ip_dst;
+ if (ifp) { /* Only needed in forward direction and route-to */
+ bzero(&rt_gw, sizeof(rt_gw));
+ ro.ro_flags |= RT_HAS_GW;
+ gw = &rt_gw;
+ switch (pd->act.rt_af) {
+#ifdef INET
+ case AF_INET:
+ rt_gw.sin.sin_family = AF_INET;
+ rt_gw.sin.sin_len = sizeof(struct sockaddr_in);
+ rt_gw.sin.sin_addr.s_addr = pd->act.rt_addr.v4.s_addr;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ rt_gw.sin6.sin6_family = AF_INET6;
+ rt_gw.sin6.sin6_len = sizeof(struct sockaddr_in6);
+ pf_addrcpy((struct pf_addr *)&rt_gw.sin6.sin6_addr,
+ &pd->act.rt_addr, AF_INET6);
+ break;
+#endif /* INET6 */
+ default:
+ /* Normal af-to without route-to */
+ break;
+ }
+ }
if (pd->dir == PF_IN) {
if (ip->ip_ttl <= IPTTLDEC) {
@@ -9098,10 +9126,10 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
/* Use the gateway if needed. */
if (nh->nh_flags & NHF_GATEWAY) {
- gw = &nh->gw_sa;
+ gw = (const union sockaddr_union *)&nh->gw_sa;
ro.ro_flags |= RT_HAS_GW;
} else {
- dst->sin_addr = ip->ip_dst;
+ dst->sin.sin_addr = ip->ip_dst;
}
/*
@@ -9126,6 +9154,9 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
PF_STATE_UNLOCK(s);
}
+ /* It must have been either set from rt_af or from fib4_lookup */
+ KASSERT(gw->sin.sin_family != 0, ("%s: gw address family undetermined", __func__));
+
if (ifp == NULL) {
m0 = pd->m;
pd->m = NULL;
@@ -9210,9 +9241,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
m_clrprotoflags(m0); /* Avoid confusing lower layers. */
md = m0;
- error = pf_dummynet_route(pd, s, r, ifp, gw, &md);
+ error = pf_dummynet_route(pd, s, r, ifp,
+ (const struct sockaddr *)gw, &md);
if (md != NULL) {
- error = (*ifp->if_output)(ifp, md, gw, &ro);
+ error = (*ifp->if_output)(ifp, md,
+ (const struct sockaddr *)gw, (struct route *)&ro);
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
}
goto done;
@@ -9253,9 +9286,11 @@ pf_route(struct pf_krule *r, struct ifnet *oifp,
md = m0;
pd->pf_mtag = pf_find_mtag(md);
error = pf_dummynet_route(pd, s, r, ifp,
- gw, &md);
+ (const struct sockaddr *)gw, &md);
if (md != NULL) {
- error = (*ifp->if_output)(ifp, md, gw, &ro);
+ error = (*ifp->if_output)(ifp, md,
+ (const struct sockaddr *)gw,
+ (struct route *)&ro);
SDT_PROBE2(pf, ip, route_to, output, ifp, error);
}
} else
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
index 51b3fd6390e1..8edd5a5110a1 100644
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -131,6 +131,7 @@ enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL,
#define PF_POOL_TYPEMASK 0x0f
#define PF_POOL_STICKYADDR 0x20
#define PF_POOL_ENDPI 0x40
+#define PF_POOL_IPV6NH 0x80
#define PF_WSCALE_FLAG 0x80
#define PF_WSCALE_MASK 0x0f
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index e5da05a958f6..d395730d6a54 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -2276,6 +2276,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
rule->nat.cur = TAILQ_FIRST(&rule->nat.list);
rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list);
rule->route.cur = TAILQ_FIRST(&rule->route.list);
+ rule->route.ipv6_nexthop_af = AF_INET6;
TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
rule, entries);
ruleset->rules[rs_num].inactive.rcount++;
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index bc9e1dc72902..b8b5157c9b15 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -545,11 +545,18 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
uint64_t hashidx;
int cnt;
sa_family_t wanted_af;
+ u_int8_t pool_type;
+ bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH;
KASSERT(saf != 0, ("%s: saf == 0", __func__));
KASSERT(naf != NULL, ("%s: naf = NULL", __func__));
KASSERT((*naf) != 0, ("%s: *naf = 0", __func__));
+ /*
+ * Given (*naf) is a hint about AF of the forwarded packet.
+ * It might be changed if prefer_ipv6_nexthop is enabled and
+ * the combination of nexthop AF and packet AF allows for it.
+ */
wanted_af = (*naf);
mtx_lock(&rpool->mtx);
@@ -594,19 +601,38 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
} else {
raddr = &rpool->cur->addr.v.a.addr;
rmask = &rpool->cur->addr.v.a.mask;
- /*
- * For single addresses check their address family. Unless they
- * have none, which happens when addresses are added with
- * the old ioctl mechanism. In such case trust that the address
- * has the proper AF.
- */
- if (rpool->cur->af && rpool->cur->af != wanted_af) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
+ }
+
+ /*
+ * For pools with a single host with the prefer-ipv6-nexthop option
+ * we can return pool address of any AF, unless the forwarded packet
+ * is IPv6, then we can return only if pool address is IPv6.
+ * For non-prefer-ipv6-nexthop we can return pool address only
+ * of wanted AF, unless the pool address'es AF is unknown, which
+ * happens in case old ioctls have been used to set up the pool.
+ *
+ * Round-robin pools have their own logic for retrying next addresses.
+ */
+ pool_type = rpool->opts & PF_POOL_TYPEMASK;
+ if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK ||
+ ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) &&
+ rpool->cur->addr.type != PF_ADDR_TABLE &&
+ rpool->cur->addr.type != PF_ADDR_DYNIFTL)) {
+ if (prefer_ipv6_nexthop) {
+ if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
+ wanted_af = rpool->cur->af;
+ } else {
+ if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
}
- switch (rpool->opts & PF_POOL_TYPEMASK) {
+ switch (pool_type) {
case PF_POOL_NONE:
pf_addrcpy(naddr, raddr, wanted_af);
break;
@@ -631,10 +657,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
else
rpool->tblidx = (int)arc4random_uniform(cnt);
memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (prefer_ipv6_nexthop)
+ wanted_af = AF_INET6;
+ retry_other_af_random:
if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
wanted_af, pf_islinklocal, false)) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx; /* unsupported */
+ /* Retry with IPv4 nexthop for IPv4 traffic */
+ if (prefer_ipv6_nexthop &&
+ wanted_af == AF_INET6 &&
+ (*naf) == AF_INET) {
+ wanted_af = AF_INET;
+ goto retry_other_af_random;
+ } else {
+ /* no hosts in wanted AF */
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
pf_addrcpy(naddr, &rpool->counter, wanted_af);
} else if (init_addr != NULL && PF_AZERO(init_addr,
@@ -702,10 +740,22 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
else
rpool->tblidx = (int)(hashidx % cnt);
memset(&rpool->counter, 0, sizeof(rpool->counter));
+ if (prefer_ipv6_nexthop)
+ wanted_af = AF_INET6;
+ retry_other_af_srchash:
if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter,
wanted_af, pf_islinklocal, false)) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx; /* unsupported */
+ /* Retry with IPv4 nexthop for IPv4 traffic */
+ if (prefer_ipv6_nexthop &&
+ wanted_af == AF_INET6 &&
+ (*naf) == AF_INET) {
+ wanted_af = AF_INET;
+ goto retry_other_af_srchash;
+ } else {
+ /* no hosts in wanted AF */
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
}
pf_addrcpy(naddr, &rpool->counter, wanted_af);
} else {
@@ -718,6 +768,9 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
{
struct pf_kpooladdr *acur = rpool->cur;
+ retry_other_af_rr:
+ if (prefer_ipv6_nexthop)
+ wanted_af = rpool->ipv6_nexthop_af;
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
if (!pfr_pool_get(rpool->cur->addr.p.tbl,
&rpool->tblidx, &rpool->counter, wanted_af,
@@ -728,46 +781,55 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
&rpool->tblidx, &rpool->counter, wanted_af,
pf_islinklocal, true))
goto get_addr;
- } else if (pf_match_addr(0, raddr, rmask, &rpool->counter,
- wanted_af))
+ } else if (rpool->cur->af == wanted_af &&
+ pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af))
goto get_addr;
-
+ if (prefer_ipv6_nexthop &&
+ (*naf) == AF_INET && wanted_af == AF_INET6) {
+ /* Reset table index when changing wanted AF. */
+ rpool->tblidx = -1;
+ rpool->ipv6_nexthop_af = AF_INET;
+ goto retry_other_af_rr;
+ }
try_next:
+ /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */
+ rpool->ipv6_nexthop_af = AF_INET6;
if (TAILQ_NEXT(rpool->cur, entries) == NULL)
rpool->cur = TAILQ_FIRST(&rpool->list);
else
rpool->cur = TAILQ_NEXT(rpool->cur, entries);
+ try_next_ipv6_nexthop_rr:
+ /* Reset table index when iterating pools or changing wanted AF. */
rpool->tblidx = -1;
+ if (prefer_ipv6_nexthop)
+ wanted_af = rpool->ipv6_nexthop_af;
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
- if (pfr_pool_get(rpool->cur->addr.p.tbl,
+ if (!pfr_pool_get(rpool->cur->addr.p.tbl,
&rpool->tblidx, &rpool->counter, wanted_af, NULL,
- true)) {
- /* table contains no address of type 'wanted_af' */
- if (rpool->cur != acur)
- goto try_next;
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
- }
+ true))
+ goto get_addr;
} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
- if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
- &rpool->tblidx, &rpool->counter, wanted_af,
- pf_islinklocal, true)) {
- /* interface has no address of type 'wanted_af' */
- if (rpool->cur != acur)
- goto try_next;
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
- }
+ if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
+ &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal,
+ true))
+ goto get_addr;
} else {
- raddr = &rpool->cur->addr.v.a.addr;
- rmask = &rpool->cur->addr.v.a.mask;
- if (rpool->cur->af && rpool->cur->af != wanted_af) {
- reason = PFRES_MAPFAILED;
- goto done_pool_mtx;
+ if (rpool->cur->af == wanted_af) {
+ raddr = &rpool->cur->addr.v.a.addr;
+ rmask = &rpool->cur->addr.v.a.mask;
+ pf_addrcpy(&rpool->counter, raddr, wanted_af);
+ goto get_addr;
}
- pf_addrcpy(&rpool->counter, raddr, wanted_af);
}
-
+ if (prefer_ipv6_nexthop &&
+ (*naf) == AF_INET && wanted_af == AF_INET6) {
+ rpool->ipv6_nexthop_af = AF_INET;
+ goto try_next_ipv6_nexthop_rr;
+ }
+ if (rpool->cur != acur)
+ goto try_next;
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
get_addr:
pf_addrcpy(naddr, &rpool->counter, wanted_af);
if (init_addr != NULL && PF_AZERO(init_addr, wanted_af))
@@ -777,9 +839,16 @@ pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr,
}
}
+ if (wanted_af == 0) {
+ reason = PFRES_MAPFAILED;
+ goto done_pool_mtx;
+ }
+
if (nkif)
*nkif = rpool->cur->kif;
+ (*naf) = wanted_af;
+
done_pool_mtx:
mtx_unlock(&rpool->mtx);
diff --git a/tests/sys/netpfil/pf/route_to.sh b/tests/sys/netpfil/pf/route_to.sh
index 765403dcb79c..872de0dcbb91 100644
--- a/tests/sys/netpfil/pf/route_to.sh
+++ b/tests/sys/netpfil/pf/route_to.sh
@@ -28,6 +28,75 @@
common_dir=$(atf_get_srcdir)/../common
+# We need to somehow test if the random algorithm of pf_map_addr() is working.
+# The table or prefix contains multiple IP next-hop addresses, for each one try
+# to establish up to 10 connections. Fail the test if with this many attempts
+# the "good" target has not been chosen. However this choice is random,
+# the check might still ocasionally fail.
+check_random() {
+ if [ "$1" = "IPv4" ]; then
+ ping_from="${net_clients_4}.1"
+ ping_to="${host_server_4}"
+ else
+ ping_from="${net_clients_6}::1"
+ ping_to="${host_server_6}"
+ fi
+ good_targets="$2"
+ bad_targets="$3"
+
+ port=42000
+ states=$(mktemp) || exit 1
+ for good_target in $good_targets; do
+ found="no"
+ for attempt in $(seq 1 10); do
+ port=$(( port + 1 ))
+ jexec router pfctl -Fs
+ atf_check -s exit:0 ${common_dir}/pft_ping.py \
+ --sendif ${epair_tester}a --replyif ${epair_tester}a \
+ --fromaddr ${ping_from} --to ${ping_to} \
+ --ping-type=tcp3way --send-sport=${port}
+ jexec router pfctl -qvvss | normalize_pfctl_s > $states
+ cat $states
+ if [ -n "${bad_targets}" ]; then
+ for bad_target in $bad_targets; do
+ if grep -qE "route-to: ${bad_target}@" $states; then
+ atf_fail "Bad target ${bad_target} selected!"
+ fi
+ done
+ fi;
+ if grep -qE "route-to: ${good_target}@" $states; then
+ found=yes
+ break
+ fi
+ done
+ if [ "${found}" = "no" ]; then
+ atf_fail "Target ${good_target} not selected after ${attempt} attempts!"
+ fi
+ done
+}
+
+pf_map_addr_common()
+{
+ setup_router_server_nat64
+
+ # Clients will connect from another network behind the router.
+ # This allows for using multiple source addresses.
+ jexec router route add -6 ${net_clients_6}::/${net_clients_6_mask} ${net_tester_6_host_tester}
+ jexec router route add ${net_clients_4}.0/${net_clients_4_mask} ${net_tester_4_host_tester}
+
+ # The servers are reachable over additional IP addresses for
+ # testing of tables and subnets. The addresses are noncontinougnus
+ # for pf_map_addr() counter tests.
+ for i in 0 1 4 5; do
+ a1=$((24 + i))
+ jexec server1 ifconfig ${epair_server1}b inet ${net_server1_4}.${a1}/32 alias
+ jexec server1 ifconfig ${epair_server1}b inet6 ${net_server1_6}::42:${i}/128 alias
+ a2=$((40 + i))
+ jexec server2 ifconfig ${epair_server2}b inet ${net_server2_4}.${a2}/32 alias
+ jexec server2 ifconfig ${epair_server2}b inet6 ${net_server2_6}::42:${i}/128 alias
+ done
+}
+
atf_test_case "v4" "cleanup"
v4_head()
{
@@ -893,36 +962,17 @@ empty_pool_cleanup()
pft_cleanup
}
-
atf_test_case "table_loop" "cleanup"
table_loop_head()
{
atf_set descr 'Check that iterating over tables poperly loops'
atf_set require.user root
- atf_set require.progs python3 scapy
}
table_loop_body()
{
- setup_router_server_nat64
-
- # Clients will connect from another network behind the router.
- # This allows for using multiple source addresses.
- jexec router route add -6 ${net_clients_6}::/${net_clients_6_mask} ${net_tester_6_host_tester}
- jexec router route add ${net_clients_4}.0/${net_clients_4_mask} ${net_tester_4_host_tester}
-
- # The servers are reachable over additional IP addresses for
- # testing of tables and subnets. The addresses are noncontinougnus
- # for pf_map_addr() counter tests.
- for i in 0 1 4 5; do
- a1=$((24 + i))
- jexec server1 ifconfig ${epair_server1}b inet ${net_server1_4}.${a1}/32 alias
- jexec server1 ifconfig ${epair_server1}b inet6 ${net_server1_6}::42:${i}/128 alias
- a2=$((40 + i))
- jexec server2 ifconfig ${epair_server2}b inet ${net_server2_4}.${a2}/32 alias
- jexec server2 ifconfig ${epair_server2}b inet6 ${net_server2_6}::42:${i}/128 alias
- done
+ pf_map_addr_common
jexec router pfctl -e
pft_set_rules router \
@@ -976,6 +1026,612 @@ table_loop_cleanup()
}
+atf_test_case "roundrobin" "cleanup"
+
+roundrobin_head()
+{
+ atf_set descr 'multiple gateways of mixed AF, including prefixes and tables, for IPv6 packets'
+ atf_set require.user root
+}
+
+roundrobin_body()
+{
+ pf_map_addr_common
+
+ # The rule is defined as "inet6 proto tcp" so directly given IPv4 hosts
+ # will be removed from the pool by pfctl. Tables will still be loaded
+ # and pf_map_addr() will only use IPv6 addresses from them. It will
+ # iterate over members of the pool and inside of tables and prefixes.
+
+ jexec router pfctl -e
+ pft_set_rules router \
+ "set debug loud" \
+ "set reassemble yes" \
+ "set state-policy if-bound" \
+ "table <rt_targets> { ${net_server2_4}.40/31 ${net_server2_4}.44 ${net_server2_6}::42:0/127 ${net_server2_6}::42:4 }" \
+ "pass in on ${epair_tester}b \
+ route-to { \
+ (${epair_server1}a ${net_server1_4_host_server}) \
+ (${epair_server2}a <rt_targets_empty>) \
+ (${epair_server1}a ${net_server1_6}::42:0/127) \
+ (${epair_server2}a <rt_targets_empty>) \
+ (${epair_server2}a <rt_targets>) \
+ } \
+ inet6 proto tcp \
+ keep state"
+
+ for port in $(seq 1 6); do
+ port=$((4200 + port))
+ atf_check -s exit:0 ${common_dir}/pft_ping.py \
+ --sendif ${epair_tester}a --replyif ${epair_tester}a \
+ --fromaddr ${net_clients_6}::1 --to ${host_server_6} \
+ --ping-type=tcp3way --send-sport=${port}
+ done
+
+ states=$(mktemp) || exit 1
+ jexec router pfctl -qvvss | normalize_pfctl_s > $states
+
+ for state_regexp in \
+ "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4201\] .* route-to: ${net_server1_6}::42:0@${epair_server1}a" \
+ "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4202\] .* route-to: ${net_server1_6}::42:1@${epair_server1}a" \
+ "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4203\] .* route-to: ${net_server2_6}::42:0@${epair_server2}a" \
+ "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4204\] .* route-to: ${net_server2_6}::42:1@${epair_server2}a" \
+ "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4205\] .* route-to: ${net_server2_6}::42:4@${epair_server2}a" \
+ "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4206\] .* route-to: ${net_server1_6}::42:0@${epair_server1}a" \
+ ; do
+ grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'"
+ done
+}
+
+roundrobin_cleanup()
+{
+ pft_cleanup
+}
+
+atf_test_case "random_table" "cleanup"
+
+random_table_head()
+{
+ atf_set descr 'Pool with random flag and a table for IPv6'
+ atf_set require.user root
+}
+
+random_table_body()
+{
+ pf_map_addr_common
+
+ # The "random" flag will pick random hosts from the table but will
+ # not dive into prefixes, always choosing the 0th address.
+ # Proper address family will be choosen.
+
+ jexec router pfctl -e
+ pft_set_rules router \
+ "set debug loud" \
+ "set reassemble yes" \
+ "set state-policy if-bound" \
+ "table <rt_targets> { ${net_server2_4}.40/31 ${net_server2_4}.44 ${net_server2_6}::42:0/127 ${net_server2_6}::42:4 }" \
+ "pass in on ${epair_tester}b \
+ route-to { (${epair_server2}a <rt_targets>) } random \
+ inet6 proto tcp \
+ keep state"
+
+ good_targets="${net_server2_6}::42:0 ${net_server2_6}::42:4"
+ bad_targets="${net_server2_6}::42:1"
+ check_random IPv6 "${good_targets}" "${bad_targets}"
+}
+
+random_table_cleanup()
+{
+ pft_cleanup
+}
+
+atf_test_case "random_prefix" "cleanup"
+
+random_prefix_head()
+{
+ atf_set descr 'Pool with random flag and a table for IPv4'
+ atf_set require.user root
+}
+
+random_prefix_body()
+{
+ pf_map_addr_common
+
+ # The "random" flag will pick random hosts from given prefix.
+ # The choice being random makes testing it non-trivial. We do 10
+ # attempts to have each target chosen. Hopefully this is enough to have
+ # this test pass often enough.
+
+ jexec router pfctl -e
+ pft_set_rules router \
+ "set debug loud" \
+ "set reassemble yes" \
+ "set state-policy if-bound" \
+ "pass in on ${epair_tester}b \
+ route-to { (${epair_server2}a ${net_server2_6}::42:0/127) } random \
+ inet6 proto tcp \
+ keep state"
+
+ good_targets="${net_server2_6}::42:0 ${net_server2_6}::42:1"
+ check_random IPv6 "${good_targets}"
+}
+
+random_prefix_cleanup()
+{
+ pft_cleanup
+}
+
+atf_test_case "prefer_ipv6_nexthop_single_ipv4" "cleanup"
+
+prefer_ipv6_nexthop_single_ipv4_head()
+{
+ atf_set descr 'prefer-ipv6-nexthop option for a single IPv4 gateway'
+ atf_set require.user root
+}
+
+prefer_ipv6_nexthop_single_ipv4_body()
+{
+ pf_map_addr_common
+
+ # Basic forwarding test for prefer-ipv6-nexthop pool option.
+ # A single IPv4 gateway will work only for IPv4 traffic.
+
+ jexec router pfctl -e
+ pft_set_rules router \
+ "set reassemble yes" \
*** 565 LINES SKIPPED ***