git: fe96610ba943 - main - pf: rework anchor handling to recurse
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 02 Jun 2025 15:30:34 UTC
The branch main has been updated by kp:
URL: https://cgit.FreeBSD.org/src/commit/?id=fe96610ba94332c7c4c59f3f82920d1ef1177bf2
commit fe96610ba94332c7c4c59f3f82920d1ef1177bf2
Author: Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2025-05-22 10:02:49 +0000
Commit: Kristof Provost <kp@FreeBSD.org>
CommitDate: 2025-06-02 15:30:18 +0000
pf: rework anchor handling to recurse
- percpu anchor stacks
we actually don't need to pre-allocate per_anchor_stack[], if we use
a 'natural' recursion, when doing anchor tree traversal.
O.K. mikeb@, mpi@
Extended because FreeBSD pf still has separate NAT rules processing, which also
needed this change.
Obtained from: OpenBSD, sashan <sashan@openbsd.org>, e236f0fa7b
Obtained from: OpenBSD, sashan <sashan@openbsd.org>, 5e4ad307dc
Sponsored by: Rubicon Communications, LLC ("Netgate")
Differential Revision: https://reviews.freebsd.org/D50579
---
sys/net/pfvar.h | 51 ++++-
sys/netpfil/pf/pf.c | 557 ++++++++++++++++++++++++-------------------------
sys/netpfil/pf/pf_lb.c | 146 +++++++++----
3 files changed, 422 insertions(+), 332 deletions(-)
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 82d74e08c14f..b3e3260d80a6 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1151,6 +1151,45 @@ struct pf_kstate {
* Try to not grow the struct beyond that.
*/
_Static_assert(sizeof(struct pf_kstate) <= 384, "pf_kstate size crosses 384 bytes");
+
+enum pf_test_status {
+ PF_TEST_FAIL = -1,
+ PF_TEST_OK,
+ PF_TEST_QUICK
+};
+
+struct pf_test_ctx {
+ enum pf_test_status test_status;
+ struct pf_pdesc *pd;
+ struct pf_rule_actions act;
+ uint8_t icmpcode;
+ uint8_t icmptype;
+ int icmp_dir;
+ int state_icmp;
+ int tag;
+ int rewrite;
+ u_short reason;
+ struct pf_src_node *sns[PF_SN_MAX];
+ struct pf_krule_slist rules;
+ struct pf_krule *nr;
+ struct pf_krule *tr;
+ struct pf_krule **rm;
+ struct pf_krule *a;
+ struct pf_krule **am;
+ struct pf_kruleset **rsm;
+ struct pf_kruleset *arsm;
+ struct pf_kruleset *aruleset;
+ struct pf_state_key *sk;
+ struct pf_state_key *nk;
+ struct tcphdr *th;
+ struct pf_udp_mapping *udp_mapping;
+ struct pf_kpool *nat_pool;
+ uint16_t virtual_type;
+ uint16_t virtual_id;
+ int depth;
+};
+
+#define PF_ANCHOR_STACK_MAX 32
#endif
/*
@@ -1411,7 +1450,6 @@ RB_PROTOTYPE(pf_kanchor_node, pf_kanchor, entry_node, pf_kanchor_compare);
PFR_TFLAG_REFDANCHOR | \
PFR_TFLAG_COUNTERS)
-struct pf_kanchor_stackframe;
struct pf_keth_anchor_stackframe;
struct pfr_table {
@@ -2678,12 +2716,8 @@ int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t);
#ifdef _KERNEL
void pf_print_host(struct pf_addr *, u_int16_t, sa_family_t);
-void pf_step_into_anchor(struct pf_kanchor_stackframe *, int *,
- struct pf_kruleset **, int, struct pf_krule **,
- struct pf_krule **);
-int pf_step_out_of_anchor(struct pf_kanchor_stackframe *, int *,
- struct pf_kruleset **, int, struct pf_krule **,
- struct pf_krule **, int *);
+enum pf_test_status pf_step_into_anchor(struct pf_test_ctx *, struct pf_krule *);
+int pf_match_rule(struct pf_test_ctx *, struct pf_kruleset *);
void pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *,
int *, struct pf_keth_ruleset **,
struct pf_keth_rule **, struct pf_keth_rule **,
@@ -2706,8 +2740,7 @@ int pf_get_transaddr_af(struct pf_krule *,
struct pf_pdesc *);
u_short pf_get_translation(struct pf_pdesc *,
int, struct pf_state_key **, struct pf_state_key **,
- struct pf_kanchor_stackframe *, struct pf_krule **,
- struct pf_udp_mapping **udp_mapping);
+ struct pf_test_ctx *, struct pf_udp_mapping **udp_mapping);
u_short pf_get_transaddr(struct pf_pdesc *,
struct pf_state_key **, struct pf_state_key **,
struct pf_krule *, struct pf_udp_mapping **,
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 5c572dfcd425..a5f51d69ab2b 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -4629,13 +4629,6 @@ pf_tag_packet(struct pf_pdesc *pd, int tag)
return (0);
}
-#define PF_ANCHOR_STACKSIZE 32
-struct pf_kanchor_stackframe {
- struct pf_kruleset *rs;
- struct pf_krule *r; /* XXX: + match bit */
- struct pf_kanchor *child;
-};
-
/*
* XXX: We rely on malloc(9) returning pointer aligned addresses.
*/
@@ -4649,80 +4642,42 @@ struct pf_kanchor_stackframe {
((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH); \
} while (0)
-void
-pf_step_into_anchor(struct pf_kanchor_stackframe *stack, int *depth,
- struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a)
+enum pf_test_status
+pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r)
{
- struct pf_kanchor_stackframe *f;
+ enum pf_test_status rv;
PF_RULES_RASSERT();
- if (*depth >= PF_ANCHOR_STACKSIZE) {
+ if (ctx->depth >= PF_ANCHOR_STACK_MAX) {
printf("%s: anchor stack overflow on %s\n",
- __func__, (*r)->anchor->name);
- *r = TAILQ_NEXT(*r, entries);
- return;
- } else if (*depth == 0 && a != NULL)
- *a = *r;
- f = stack + (*depth)++;
- f->rs = *rs;
- f->r = *r;
- if ((*r)->anchor_wildcard) {
- struct pf_kanchor_node *parent = &(*r)->anchor->children;
-
- if ((f->child = RB_MIN(pf_kanchor_node, parent)) == NULL) {
- *r = NULL;
- return;
- }
- *rs = &f->child->ruleset;
- } else {
- f->child = NULL;
- *rs = &(*r)->anchor->ruleset;
+ __func__, r->anchor->name);
+ return (PF_TEST_FAIL);
}
- *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
-}
-int
-pf_step_out_of_anchor(struct pf_kanchor_stackframe *stack, int *depth,
- struct pf_kruleset **rs, int n, struct pf_krule **r, struct pf_krule **a,
- int *match)
-{
- struct pf_kanchor_stackframe *f;
- struct pf_krule *fr;
- int quick = 0;
-
- PF_RULES_RASSERT();
+ ctx->depth++;
- do {
- if (*depth <= 0)
- break;
- f = stack + *depth - 1;
- fr = PF_ANCHOR_RULE(f);
- if (f->child != NULL) {
- f->child = RB_NEXT(pf_kanchor_node,
- &fr->anchor->children, f->child);
- if (f->child != NULL) {
- *rs = &f->child->ruleset;
- *r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
- if (*r == NULL)
- continue;
- else
- break;
+ if (r->anchor_wildcard) {
+ struct pf_kanchor *child;
+ rv = PF_TEST_OK;
+ RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) {
+ rv = pf_match_rule(ctx, &child->ruleset);
+ if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) {
+ /*
+ * we either hit a rule qith quick action
+ * (more likely), or hit some runtime
+ * error (e.g. pool_get() faillure).
+ */
+ break;
}
}
- (*depth)--;
- if (*depth == 0 && a != NULL)
- *a = NULL;
- *rs = f->rs;
- if (match != NULL && *match > *depth) {
- *match = *depth;
- if (f->r->quick)
- quick = 1;
- }
- *r = TAILQ_NEXT(fr, entries);
- } while (*r == NULL);
+ } else {
+ rv = pf_match_rule(ctx, &r->anchor->ruleset);
+ }
- return (quick);
+ ctx->depth--;
+
+ return (rv);
}
struct pf_keth_anchor_stackframe {
@@ -4749,7 +4704,7 @@ pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
if (match)
*match = 0;
- if (*depth >= PF_ANCHOR_STACKSIZE) {
+ if (*depth >= PF_ANCHOR_STACK_MAX) {
printf("%s: anchor stack overflow on %s\n",
__func__, (*r)->anchor->name);
*r = TAILQ_NEXT(*r, entries);
@@ -5240,7 +5195,7 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
int asd = 0, match = 0;
int tag = -1;
uint8_t action;
- struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
+ struct pf_keth_anchor_stackframe anchor_stack[PF_ANCHOR_STACK_MAX];
MPASS(kif->pfik_ifp->if_vnet == curvnet);
NET_EPOCH_ASSERT();
@@ -5495,12 +5450,11 @@ pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf **m0)
return (action);
}
-#define PF_TEST_ATTRIB(t, a)\
- do { \
- if (t) { \
- r = a; \
- goto nextrule; \
- } \
+#define PF_TEST_ATTRIB(t, a) \
+ if (t) { \
+ r = a; \
+ continue; \
+ } else do { \
} while (0)
static __inline u_short
@@ -5555,132 +5509,18 @@ pf_rule_apply_nat(struct pf_pdesc *pd, struct pf_state_key **skp,
return (PFRES_MAX);
}
-static int
-pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
- struct pf_pdesc *pd, struct pf_krule **am,
- struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp)
+enum pf_test_status
+pf_match_rule(struct pf_test_ctx *ctx, struct pf_kruleset *ruleset)
{
- struct pf_krule *nr = NULL;
- struct pf_krule *r, *a = NULL;
- struct pf_kruleset *ruleset = NULL;
- struct pf_krule_slist match_rules;
struct pf_krule_item *ri;
- struct tcphdr *th = &pd->hdr.tcp;
- struct pf_state_key *sk = NULL, *nk = NULL;
+ struct pf_krule *r;
+ struct pf_pdesc *pd = ctx->pd;
u_short transerror;
- int rewrite = 0;
- int tag = -1;
- int asd = 0;
- int match = 0;
- int state_icmp = 0, icmp_dir;
- int action = PF_PASS;
- u_int16_t virtual_type, virtual_id;
- u_int16_t bproto_sum = 0, bip_sum = 0;
- u_int8_t icmptype = 0, icmpcode = 0;
- struct pf_kanchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
- struct pf_udp_mapping *udp_mapping = NULL;
- struct pf_kpool *nat_pool = NULL;
-
- PF_RULES_RASSERT();
-
- PF_ACPY(&pd->nsaddr, pd->src, pd->af);
- PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
-
- SLIST_INIT(&match_rules);
-
- if (inp != NULL) {
- INP_LOCK_ASSERT(inp);
- pd->lookup.uid = inp->inp_cred->cr_uid;
- pd->lookup.gid = inp->inp_cred->cr_groups[0];
- pd->lookup.done = 1;
- }
-
- if (pd->ip_sum)
- bip_sum = *pd->ip_sum;
-
- switch (pd->virtual_proto) {
- case IPPROTO_TCP:
- bproto_sum = th->th_sum;
- pd->nsport = th->th_sport;
- pd->ndport = th->th_dport;
- break;
- case IPPROTO_UDP:
- bproto_sum = pd->hdr.udp.uh_sum;
- pd->nsport = pd->hdr.udp.uh_sport;
- pd->ndport = pd->hdr.udp.uh_dport;
- break;
- case IPPROTO_SCTP:
- pd->nsport = pd->hdr.sctp.src_port;
- pd->ndport = pd->hdr.sctp.dest_port;
- break;
-#ifdef INET
- case IPPROTO_ICMP:
- MPASS(pd->af == AF_INET);
- icmptype = pd->hdr.icmp.icmp_type;
- icmpcode = pd->hdr.icmp.icmp_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
- &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
- pd->nsport = virtual_id;
- pd->ndport = virtual_type;
- } else {
- pd->nsport = virtual_type;
- pd->ndport = virtual_id;
- }
- break;
-#endif /* INET */
-#ifdef INET6
- case IPPROTO_ICMPV6:
- MPASS(pd->af == AF_INET6);
- icmptype = pd->hdr.icmp6.icmp6_type;
- icmpcode = pd->hdr.icmp6.icmp6_code;
- state_icmp = pf_icmp_mapping(pd, icmptype,
- &icmp_dir, &virtual_id, &virtual_type);
- if (icmp_dir == PF_IN) {
- pd->nsport = virtual_id;
- pd->ndport = virtual_type;
- } else {
- pd->nsport = virtual_type;
- pd->ndport = virtual_id;
- }
-
- break;
-#endif /* INET6 */
- default:
- pd->nsport = pd->ndport = 0;
- break;
- }
- pd->osport = pd->nsport;
- pd->odport = pd->ndport;
-
- r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
-
- /* check packet for BINAT/NAT/RDR */
- transerror = pf_get_translation(pd, pd->off, &sk, &nk, anchor_stack,
- &nr, &udp_mapping);
- switch (transerror) {
- default:
- /* A translation error occurred. */
- REASON_SET(reason, transerror);
- goto cleanup;
- case PFRES_MAX:
- /* No match. */
- break;
- case PFRES_MATCH:
- KASSERT(sk != NULL, ("%s: null sk", __func__));
- KASSERT(nk != NULL, ("%s: null nk", __func__));
- if (nr->log) {
- PFLOG_PACKET(nr->action, PFRES_MATCH, nr, a,
- ruleset, pd, 1, NULL);
- }
-
- rewrite += pf_translate_compat(pd, sk, nk, nr, virtual_type);
- nat_pool = &(nr->rdr);
- }
+ r = TAILQ_FIRST(ruleset->rules[PF_RULESET_FILTER].active.ptr);
while (r != NULL) {
- if (pd->related_rule) {
- *rm = pd->related_rule;
+ if (ctx->pd->related_rule) {
+ *ctx->rm = ctx->pd->related_rule;
break;
}
pf_counter_u64_add(&r->evaluations, 1);
@@ -5714,7 +5554,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
break;
case IPPROTO_TCP:
- PF_TEST_ATTRIB((r->flagset & tcp_get_flags(th)) != r->flags,
+ PF_TEST_ATTRIB((r->flagset & tcp_get_flags(ctx->th))
+ != r->flags,
TAILQ_NEXT(r, entries));
/* FALLTHROUGH */
case IPPROTO_SCTP:
@@ -5744,10 +5585,10 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
/* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB(r->type && r->type != icmptype + 1,
+ PF_TEST_ATTRIB(r->type && r->type != ctx->icmptype + 1,
TAILQ_NEXT(r, entries));
/* icmp only. type always 0 in other cases */
- PF_TEST_ATTRIB(r->code && r->code != icmpcode + 1,
+ PF_TEST_ATTRIB(r->code && r->code != ctx->icmpcode + 1,
TAILQ_NEXT(r, entries));
break;
@@ -5762,8 +5603,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
PF_TEST_ATTRIB(r->prob &&
r->prob <= arc4random(),
TAILQ_NEXT(r, entries));
- PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &tag,
- pd->pf_mtag ? pd->pf_mtag->tag : 0),
+ PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r,
+ &ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(pd->m, r) ==
r->rcvifnot),
@@ -5773,21 +5614,21 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY &&
(pd->virtual_proto != IPPROTO_TCP || !pf_osfp_match(
- pf_osfp_fingerprint(pd, th),
+ pf_osfp_fingerprint(pd, ctx->th),
r->os_fingerprint)),
TAILQ_NEXT(r, entries));
/* FALLTHROUGH */
if (r->tag)
- tag = r->tag;
+ ctx->tag = r->tag;
if (r->anchor == NULL) {
if (r->action == PF_MATCH) {
/*
* Apply translations before increasing counters,
* in case it fails.
*/
- transerror = pf_rule_apply_nat(pd, &sk, &nk, r,
- &nr, &udp_mapping, virtual_type, &rewrite,
- &nat_pool);
+ transerror = pf_rule_apply_nat(pd, &ctx->sk, &ctx->nk, r,
+ &ctx->nr, &ctx->udp_mapping, ctx->virtual_type,
+ &ctx->rewrite, &ctx->nat_pool);
switch (transerror) {
case PFRES_MATCH:
/* Translation action found in rule and applied successfully */
@@ -5796,16 +5637,16 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
break;
default:
/* Translation action found in rule but failed to apply */
- REASON_SET(reason, transerror);
- goto cleanup;
+ REASON_SET(&ctx->reason, transerror);
+ return (PF_TEST_FAIL);
}
ri = malloc(sizeof(struct pf_krule_item), M_PF_RULE_ITEM, M_NOWAIT | M_ZERO);
if (ri == NULL) {
- REASON_SET(reason, PFRES_MEMORY);
- goto cleanup;
+ REASON_SET(&ctx->reason, PFRES_MEMORY);
+ return (PF_TEST_FAIL);
}
ri->r = r;
- SLIST_INSERT_HEAD(&match_rules, ri, entry);
+ SLIST_INSERT_HEAD(&ctx->rules, ri, entry);
pf_counter_u64_critical_enter();
pf_counter_u64_add_protected(&r->packets[pd->dir == PF_OUT], 1);
pf_counter_u64_add_protected(&r->bytes[pd->dir == PF_OUT], pd->tot_len);
@@ -5813,36 +5654,183 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
pf_rule_to_actions(r, &pd->act);
if (r->log)
PFLOG_PACKET(r->action, PFRES_MATCH, r,
- a, ruleset, pd, 1, NULL);
+ ctx->a, ruleset, pd, 1, NULL);
} else {
- match = asd;
- *rm = r;
- *am = a;
- *rsm = ruleset;
+ /*
+ * found matching r
+ */
+ *ctx->rm = r;
+ /*
+ * anchor, with ruleset, where r belongs to
+ */
+ *ctx->am = ctx->a;
+ /*
+ * ruleset where r belongs to
+ */
+ *ctx->rsm = ruleset;
+ /*
+ * ruleset, where anchor belongs to.
+ */
+ ctx->arsm = ctx->aruleset;
}
if (pd->act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &match_rules);
- if (r->quick)
+ pf_log_matches(pd, r, ctx->a, ruleset, &ctx->rules);
+ if (r->quick) {
+ ctx->test_status = PF_TEST_QUICK;
break;
- r = TAILQ_NEXT(r, entries);
- } else
- pf_step_into_anchor(anchor_stack, &asd,
- &ruleset, PF_RULESET_FILTER, &r, &a);
-nextrule:
- if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
- &ruleset, PF_RULESET_FILTER, &r, &a, &match))
- break;
+ }
+ } else {
+ ctx->a = r; /* remember anchor */
+ ctx->aruleset = ruleset; /* and its ruleset */
+ if (ctx->a->quick)
+ ctx->test_status = PF_TEST_QUICK;
+ if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) {
+ break;
+ }
+ }
+ r = TAILQ_NEXT(r, entries);
+ }
+
+ return (ctx->test_status);
+}
+
+static int
+pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
+ struct pf_pdesc *pd, struct pf_krule **am,
+ struct pf_kruleset **rsm, u_short *reason, struct inpcb *inp)
+{
+ struct pf_krule *r = NULL;
+ struct pf_kruleset *ruleset = NULL;
+ struct pf_krule_item *ri;
+ struct pf_test_ctx ctx;
+ u_short transerror;
+ int action = PF_PASS;
+ u_int16_t bproto_sum = 0, bip_sum = 0;
+ enum pf_test_status rv;
+
+ PF_RULES_RASSERT();
+
+ bzero(&ctx, sizeof(ctx));
+ ctx.tag = -1;
+ ctx.pd = pd;
+ ctx.rm = rm;
+ ctx.am = am;
+ ctx.rsm = rsm;
+ ctx.th = &pd->hdr.tcp;
+ ctx.reason = *reason;
+ SLIST_INIT(&ctx.rules);
+
+ PF_ACPY(&pd->nsaddr, pd->src, pd->af);
+ PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+
+ if (inp != NULL) {
+ INP_LOCK_ASSERT(inp);
+ pd->lookup.uid = inp->inp_cred->cr_uid;
+ pd->lookup.gid = inp->inp_cred->cr_groups[0];
+ pd->lookup.done = 1;
+ }
+
+ if (pd->ip_sum)
+ bip_sum = *pd->ip_sum;
+
+ switch (pd->virtual_proto) {
+ case IPPROTO_TCP:
+ bproto_sum = ctx.th->th_sum;
+ pd->nsport = ctx.th->th_sport;
+ pd->ndport = ctx.th->th_dport;
+ break;
+ case IPPROTO_UDP:
+ bproto_sum = pd->hdr.udp.uh_sum;
+ pd->nsport = pd->hdr.udp.uh_sport;
+ pd->ndport = pd->hdr.udp.uh_dport;
+ break;
+ case IPPROTO_SCTP:
+ pd->nsport = pd->hdr.sctp.src_port;
+ pd->ndport = pd->hdr.sctp.dest_port;
+ break;
+#ifdef INET
+ case IPPROTO_ICMP:
+ MPASS(pd->af == AF_INET);
+ ctx.icmptype = pd->hdr.icmp.icmp_type;
+ ctx.icmpcode = pd->hdr.icmp.icmp_code;
+ ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+ &ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
+ if (ctx.icmp_dir == PF_IN) {
+ pd->nsport = ctx.virtual_id;
+ pd->ndport = ctx.virtual_type;
+ } else {
+ pd->nsport = ctx.virtual_type;
+ pd->ndport = ctx.virtual_id;
+ }
+ break;
+#endif /* INET */
+#ifdef INET6
+ case IPPROTO_ICMPV6:
+ MPASS(pd->af == AF_INET6);
+ ctx.icmptype = pd->hdr.icmp6.icmp6_type;
+ ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
+ ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
+ &ctx.icmp_dir, &ctx.virtual_id, &ctx.virtual_type);
+ if (ctx.icmp_dir == PF_IN) {
+ pd->nsport = ctx.virtual_id;
+ pd->ndport = ctx.virtual_type;
+ } else {
+ pd->nsport = ctx.virtual_type;
+ pd->ndport = ctx.virtual_id;
+ }
+
+ break;
+#endif /* INET6 */
+ default:
+ pd->nsport = pd->ndport = 0;
+ break;
}
- r = *rm;
- a = *am;
- ruleset = *rsm;
+ pd->osport = pd->nsport;
+ pd->odport = pd->ndport;
- REASON_SET(reason, PFRES_MATCH);
+ /* check packet for BINAT/NAT/RDR */
+ transerror = pf_get_translation(pd, pd->off, &ctx.sk, &ctx.nk, &ctx,
+ &ctx.udp_mapping);
+ switch (transerror) {
+ default:
+ /* A translation error occurred. */
+ REASON_SET(&ctx.reason, transerror);
+ goto cleanup;
+ case PFRES_MAX:
+ /* No match. */
+ break;
+ case PFRES_MATCH:
+ KASSERT(ctx.sk != NULL, ("%s: null sk", __func__));
+ KASSERT(ctx.nk != NULL, ("%s: null nk", __func__));
+ if (ctx.nr->log) {
+ PFLOG_PACKET(ctx.nr->action, PFRES_MATCH, ctx.nr, ctx.a,
+ ruleset, pd, 1, NULL);
+ }
+
+ ctx.rewrite += pf_translate_compat(pd, ctx.sk, ctx.nk, ctx.nr, ctx.virtual_type);
+ ctx.nat_pool = &(ctx.nr->rdr);
+ }
+
+ ruleset = &pf_main_ruleset;
+ rv = pf_match_rule(&ctx, ruleset);
+ if (rv == PF_TEST_FAIL) {
+ /*
+ * Reason has been set in pf_match_rule() already.
+ */
+ goto cleanup;
+ }
+
+ r = *ctx.rm; /* matching rule */
+ ctx.a = *ctx.am; /* rule that defines an anchor containing 'r' */
+ ruleset = *ctx.rsm; /* ruleset of the anchor defined by the rule 'a' */
+ ctx.aruleset = ctx.arsm; /* ruleset of the 'a' rule itself */
+
+ REASON_SET(&ctx.reason, PFRES_MATCH);
/* apply actions for last matching pass/block rule */
pf_rule_to_actions(r, &pd->act);
- transerror = pf_rule_apply_nat(pd, &sk, &nk, r, &nr, &udp_mapping,
- virtual_type, &rewrite, &nat_pool);
+ transerror = pf_rule_apply_nat(pd, &ctx.sk, &ctx.nk, r, &ctx.nr, &ctx.udp_mapping,
+ ctx.virtual_type, &ctx.rewrite, &ctx.nat_pool);
switch (transerror) {
case PFRES_MATCH:
/* Translation action found in rule and applied successfully */
@@ -5851,31 +5839,31 @@ nextrule:
break;
default:
/* Translation action found in rule but failed to apply */
- REASON_SET(reason, transerror);
+ REASON_SET(&ctx.reason, transerror);
goto cleanup;
}
if (r->log) {
- if (rewrite)
+ if (ctx.rewrite)
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
- PFLOG_PACKET(r->action, *reason, r, a, ruleset, pd, 1, NULL);
+ PFLOG_PACKET(r->action, ctx.reason, r, ctx.a, ruleset, pd, 1, NULL);
}
if (pd->act.log & PF_LOG_MATCHES)
- pf_log_matches(pd, r, a, ruleset, &match_rules);
+ pf_log_matches(pd, r, ctx.a, ruleset, &ctx.rules);
if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
(r->action == PF_DROP) &&
((r->rule_flag & PFRULE_RETURNRST) ||
(r->rule_flag & PFRULE_RETURNICMP) ||
(r->rule_flag & PFRULE_RETURN))) {
- pf_return(r, nr, pd, th, bproto_sum,
- bip_sum, reason, r->rtableid);
+ pf_return(r, ctx.nr, pd, ctx.th, bproto_sum,
+ bip_sum, &ctx.reason, r->rtableid);
}
if (r->action == PF_DROP)
goto cleanup;
- if (tag > 0 && pf_tag_packet(pd, tag)) {
- REASON_SET(reason, PFRES_MEMORY);
+ if (ctx.tag > 0 && pf_tag_packet(pd, ctx.tag)) {
+ REASON_SET(&ctx.reason, PFRES_MEMORY);
goto cleanup;
}
if (pd->act.rtableid >= 0)
@@ -5890,31 +5878,32 @@ nextrule:
*/
pd->act.rt = r->rt;
/* Don't use REASON_SET, pf_map_addr increases the reason counters */
- *reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr,
+ ctx.reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr,
&pd->act.rt_kif, NULL, &sn, &snh, &(r->route), PF_SN_ROUTE);
- if (*reason != 0)
+ if (ctx.reason != 0)
goto cleanup;
}
if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
- (!state_icmp && (r->keep_state || nr != NULL ||
+ (!ctx.state_icmp && (r->keep_state || ctx.nr != NULL ||
(pd->flags & PFDESC_TCP_NORM)))) {
bool nat64;
- action = pf_create_state(r, nr, a, pd, nk, sk,
- &rewrite, sm, tag, bproto_sum, bip_sum,
- &match_rules, udp_mapping, nat_pool, reason);
- sk = nk = NULL;
+ action = pf_create_state(r, ctx.nr, ctx.a, pd, ctx.nk, ctx.sk,
+ &ctx.rewrite, sm, ctx.tag, bproto_sum, bip_sum,
+ &ctx.rules, ctx.udp_mapping, ctx.nat_pool, &ctx.reason);
+ ctx.sk = ctx.nk = NULL;
if (action != PF_PASS) {
- pf_udp_mapping_release(udp_mapping);
- if (r->log || (nr != NULL && nr->log) ||
- *reason == PFRES_MEMORY)
+ pf_udp_mapping_release(ctx.udp_mapping);
+ if (r->log || (ctx.nr != NULL && ctx.nr->log) ||
+ ctx.reason == PFRES_MEMORY)
pd->act.log |= PF_LOG_FORCE;
if (action == PF_DROP &&
(r->rule_flag & PFRULE_RETURN))
- pf_return(r, nr, pd, th,
- bproto_sum, bip_sum, reason,
+ pf_return(r, ctx.nr, pd, ctx.th,
+ bproto_sum, bip_sum, &ctx.reason,
pd->act.rtableid);
+ *reason = ctx.reason;
return (action);
}
@@ -5922,69 +5911,73 @@ nextrule:
if (nat64) {
int ret;
- if (sk == NULL)
- sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE];
- if (nk == NULL)
- nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
+ if (ctx.sk == NULL)
+ ctx.sk = (*sm)->key[pd->dir == PF_IN ? PF_SK_STACK : PF_SK_WIRE];
+ if (ctx.nk == NULL)
+ ctx.nk = (*sm)->key[pd->dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
if (pd->dir == PF_IN) {
- ret = pf_translate(pd, &sk->addr[pd->didx],
- sk->port[pd->didx], &sk->addr[pd->sidx],
- sk->port[pd->sidx], virtual_type,
- icmp_dir);
+ ret = pf_translate(pd, &ctx.sk->addr[pd->didx],
+ ctx.sk->port[pd->didx], &ctx.sk->addr[pd->sidx],
+ ctx.sk->port[pd->sidx], ctx.virtual_type,
+ ctx.icmp_dir);
} else {
- ret = pf_translate(pd, &sk->addr[pd->sidx],
- sk->port[pd->sidx], &sk->addr[pd->didx],
- sk->port[pd->didx], virtual_type,
- icmp_dir);
+ ret = pf_translate(pd, &ctx.sk->addr[pd->sidx],
+ ctx.sk->port[pd->sidx], &ctx.sk->addr[pd->didx],
+ ctx.sk->port[pd->didx], ctx.virtual_type,
+ ctx.icmp_dir);
}
if (ret < 0)
goto cleanup;
- rewrite += ret;
+ ctx.rewrite += ret;
- if (rewrite && sk->af != nk->af)
+ if (ctx.rewrite && ctx.sk->af != ctx.nk->af)
action = PF_AFRT;
}
} else {
- while ((ri = SLIST_FIRST(&match_rules))) {
- SLIST_REMOVE_HEAD(&match_rules, entry);
+ while ((ri = SLIST_FIRST(&ctx.rules))) {
+ SLIST_REMOVE_HEAD(&ctx.rules, entry);
free(ri, M_PF_RULE_ITEM);
}
- uma_zfree(V_pf_state_key_z, sk);
- uma_zfree(V_pf_state_key_z, nk);
- sk = nk = NULL;
- pf_udp_mapping_release(udp_mapping);
+ uma_zfree(V_pf_state_key_z, ctx.sk);
+ uma_zfree(V_pf_state_key_z, ctx.nk);
+ ctx.sk = ctx.nk = NULL;
+ pf_udp_mapping_release(ctx.udp_mapping);
}
/* copy back packet headers if we performed NAT operations */
- if (rewrite)
+ if (ctx.rewrite)
m_copyback(pd->m, pd->off, pd->hdrlen, pd->hdr.any);
if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
pd->dir == PF_OUT &&
- V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m))
+ V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, pd->m)) {
/*
* We want the state created, but we dont
* want to send this in case a partner
* firewall has to know about it to allow
* replies through it.
*/
+ *reason = ctx.reason;
return (PF_DEFER);
+ }
+ *reason = ctx.reason;
return (action);
cleanup:
- while ((ri = SLIST_FIRST(&match_rules))) {
- SLIST_REMOVE_HEAD(&match_rules, entry);
+ while ((ri = SLIST_FIRST(&ctx.rules))) {
+ SLIST_REMOVE_HEAD(&ctx.rules, entry);
free(ri, M_PF_RULE_ITEM);
}
- uma_zfree(V_pf_state_key_z, sk);
- uma_zfree(V_pf_state_key_z, nk);
- pf_udp_mapping_release(udp_mapping);
+ uma_zfree(V_pf_state_key_z, ctx.sk);
+ uma_zfree(V_pf_state_key_z, ctx.nk);
+ pf_udp_mapping_release(ctx.udp_mapping);
+ *reason = ctx.reason;
return (PF_DROP);
}
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 00f25c29e23c..ed87aa2cec4d 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -75,8 +75,9 @@ VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
static uint64_t pf_hash(struct pf_addr *, struct pf_addr *,
struct pf_poolhashkey *, sa_family_t);
-struct pf_krule *pf_match_translation(struct pf_pdesc *,
- int, struct pf_kanchor_stackframe *);
+struct pf_krule *pf_match_translation(int, struct pf_test_ctx *);
+static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *,
+ struct pf_krule *);
static int pf_get_sport(struct pf_pdesc *, struct pf_krule *,
struct pf_addr *, uint16_t *, uint16_t, uint16_t,
struct pf_ksrc_node **, struct pf_srchash **,
@@ -128,25 +129,21 @@ pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
return (res);
}
-#define PF_TEST_ATTRIB(t, a)\
- do { \
- if (t) { \
- r = a; \
- goto nextrule; \
- } \
+#define PF_TEST_ATTRIB(t, a) \
+ if (t) { \
+ r = a; \
+ continue; \
+ } else do { \
} while (0)
-struct pf_krule *
-pf_match_translation(struct pf_pdesc *pd,
- int rs_num, struct pf_kanchor_stackframe *anchor_stack)
+static enum pf_test_status
+pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset)
{
- struct pf_krule *r, *rm = NULL;
- struct pf_kruleset *ruleset = NULL;
- int tag = -1;
+ struct pf_krule *r;
+ struct pf_pdesc *pd = ctx->pd;
int rtableid = -1;
- int asd = 0;
- r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
+ r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
while (r != NULL) {
struct pf_rule_addr *src = NULL, *dst = NULL;
struct pf_addr_wrap *xdst = NULL;
@@ -188,7 +185,7 @@ pf_match_translation(struct pf_pdesc *pd,
!pf_match_port(dst->port_op, dst->port[0],
dst->port[1], pd->ndport),
r->skip[PF_SKIP_DST_PORT]);
- PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &tag,
+ PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag,
pd->pf_mtag ? pd->pf_mtag->tag : 0),
TAILQ_NEXT(r, entries));
PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
@@ -196,33 +193,101 @@ pf_match_translation(struct pf_pdesc *pd,
&pd->hdr.tcp), r->os_fingerprint)),
TAILQ_NEXT(r, entries));
if (r->tag)
- tag = r->tag;
+ ctx->tag = r->tag;
if (r->rtableid >= 0)
rtableid = r->rtableid;
if (r->anchor == NULL) {
- rm = r;
- if (rm->action == PF_NONAT ||
- rm->action == PF_NORDR ||
- rm->action == PF_NOBINAT) {
- rm = NULL;
+ if (r->action == PF_NONAT ||
+ r->action == PF_NORDR ||
+ r->action == PF_NOBINAT) {
+ *ctx->rm = NULL;
+ } else {
+ /*
+ * found matching r
+ */
+ ctx->tr = r;
+ /*
+ * anchor, with ruleset, where r belongs to
+ */
+ *ctx->am = ctx->a;
+ /*
+ * ruleset where r belongs to
+ */
+ *ctx->rsm = ruleset;
+ /*
+ * ruleset, where anchor belongs to.
+ */
+ ctx->arsm = ctx->aruleset;
}
- break;
*** 122 LINES SKIPPED ***