git: 65b4bf7a6e0d - main - ipfw: refactor how we store bpf tap points

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Sat, 17 Jan 2026 04:09:26 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=65b4bf7a6e0d415492889434a72085debdacdf82

commit 65b4bf7a6e0d415492889434a72085debdacdf82
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2026-01-17 04:02:25 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2026-01-17 04:09:02 +0000

    ipfw: refactor how we store bpf tap points
    
    Make the tap database belong to ip_fw_chain, but leave the default "ipfw0"
    tap per-vnet.  This is only slightly better than keeping the database per-
    vnet, as the bpf name space is per-vnet.  However, we yet have only single
    ipfw chain.  Whenever multiple chains will coexist, this needs to be
    addressed.
    
    Require the chain lock to make modifications to the database.
    
    Move tap allocation to a later ruleset build stage, when all rule numbers
    are known already.  This fixes a panic introduced by 3daae1ac1d82.
    
    Fixes:  3daae1ac1d82ecdcd855101bab5206e914b12350
---
 sys/netpfil/ipfw/ip_fw2.c        |  1 +
 sys/netpfil/ipfw/ip_fw_bpf.c     | 60 +++++++++++++++++++++++-----------------
 sys/netpfil/ipfw/ip_fw_log.c     |  4 +--
 sys/netpfil/ipfw/ip_fw_private.h | 11 ++++++--
 sys/netpfil/ipfw/ip_fw_sockopt.c |  9 +++---
 5 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index 7c47a97e4953..fe707abc7682 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -3667,6 +3667,7 @@ vnet_ipfw_init(const void *unused)
 #ifdef IPFIREWALL_NAT
 	LIST_INIT(&chain->nat);
 #endif
+	RB_INIT(&chain->taps);
 
 	/* Init shared services hash table */
 	ipfw_init_srv(chain);
diff --git a/sys/netpfil/ipfw/ip_fw_bpf.c b/sys/netpfil/ipfw/ip_fw_bpf.c
index d9897f700d57..af52cbc62ed6 100644
--- a/sys/netpfil/ipfw/ip_fw_bpf.c
+++ b/sys/netpfil/ipfw/ip_fw_bpf.c
@@ -1,7 +1,7 @@
 /*-
  * Copyright (c) 2016 Yandex LLC
  * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
- * Copyright (c) 2025 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2025-2026 Gleb Smirnoff <glebius@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -68,20 +68,20 @@ tap_compare(const struct ipfw_tap *a, const struct ipfw_tap *b)
 {
 	return (a->rule != b->rule ? (a->rule < b->rule ? -1 : 1) : 0);
 }
-RB_HEAD(tap_tree, ipfw_tap);
-VNET_DEFINE_STATIC(struct tap_tree, tap_tree);
-#define	V_tap_tree	VNET(tap_tree)
 RB_GENERATE_STATIC(tap_tree, ipfw_tap, entry, tap_compare);
-VNET_DEFINE_STATIC(struct ipfw_tap *, default_tap);
+VNET_DEFINE_STATIC(struct ipfw_tap, default_tap) = { .name = "ipfw0" };
 #define	V_default_tap	VNET(default_tap)
 
 void
-ipfw_tap_alloc(uint32_t rule)
+ipfw_tap_alloc(struct ip_fw_chain *ch, uint32_t rule)
 {
 	struct ipfw_tap	*tap, key = { .rule = rule };
 	int n __diagused;
 
-	tap = RB_FIND(tap_tree, &V_tap_tree, &key);
+	MPASS(rule > 0 && rule < IPFW_DEFAULT_RULE);
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	tap = RB_FIND(tap_tree, &ch->taps, &key);
 	if (tap != NULL) {
 		MPASS(tap->rule == rule);
 		tap->refs++;
@@ -90,43 +90,51 @@ ipfw_tap_alloc(uint32_t rule)
 	tap = malloc(sizeof(*tap), M_IPFW, M_WAITOK);
 	tap->rule = rule;
 	tap->refs = 1;
-	/* Note: the default rule logs to "ipfw0". */
-	if (__predict_false(rule == IPFW_DEFAULT_RULE)) {
-		V_default_tap = tap;
-		rule = 0;
-	}
 	n = snprintf(tap->name, sizeof(tap->name), "ipfw%u", rule);
 	MPASS(n > 4 && n < sizeof("ipfw4294967295"));
 	tap->bpf = bpf_attach(tap->name, DLT_EN10MB, PFLOG_HDRLEN,
 	    &bpf_ipfw_methods, NULL);
-	tap = RB_INSERT(tap_tree, &V_tap_tree, tap);
+	tap = RB_INSERT(tap_tree, &ch->taps, tap);
 	MPASS(tap == NULL);
 }
 
 void
-ipfw_tap_free(uint32_t rule)
+ipfw_tap_free(struct ip_fw_chain *ch, uint32_t rule)
 {
-
 	struct ipfw_tap	*tap, key = { .rule = rule };
 
-	tap = RB_FIND(tap_tree, &V_tap_tree, &key);
+	MPASS(rule > 0 && rule < IPFW_DEFAULT_RULE);
+	IPFW_UH_WLOCK_ASSERT(ch);
+
+	tap = RB_FIND(tap_tree, &ch->taps, &key);
 	MPASS(tap != NULL);
 	if (--tap->refs == 0) {
 		bpf_detach(tap->bpf);
-		RB_REMOVE(tap_tree, &V_tap_tree, tap);
+		RB_REMOVE(tap_tree, &ch->taps, tap);
 		free(tap, M_IPFW);
 	}
 }
 
 void
-ipfw_bpf_tap(struct ip_fw_args *args, struct ip *ip, uint32_t rulenum)
+ipfw_bpf_tap(struct ip_fw_chain *ch, struct ip_fw_args *args,
+    struct ip *ip, uint32_t rulenum)
 {
-	struct ipfw_tap *tap, key = { .rule = rulenum };
+	struct ipfw_tap *tap;
 
-	tap = RB_FIND(tap_tree, &V_tap_tree, &key);
-	MPASS(tap != NULL);
-	if (!bpf_peers_present(tap->bpf))
-		tap = V_default_tap;
+	if (rulenum == IPFW_DEFAULT_RULE) {
+		tap = &V_default_tap;
+	} else {
+		struct ipfw_tap key = { .rule = rulenum };
+
+		tap = RB_FIND(tap_tree, &ch->taps, &key);
+		MPASS(tap != NULL);
+		/*
+		 * Compatibility: if user is not using per-rule taps, fallback
+		 * to the default tap.
+		 */
+		if (!bpf_peers_present(tap->bpf))
+			tap = &V_default_tap;
+	}
 	if (args->flags & IPFW_ARGS_LENMASK) {
 		bpf_tap(tap->bpf, args->mem, IPFW_ARGS_LENGTH(args->flags));
 	} else if (args->flags & IPFW_ARGS_ETHER) {
@@ -161,7 +169,8 @@ ipfw_pflog_tap(void *data, struct mbuf *m)
 void
 ipfw_bpf_init(int first __unused)
 {
-	ipfw_tap_alloc(IPFW_DEFAULT_RULE);
+	V_default_tap.bpf = bpf_attach(V_default_tap.name, DLT_EN10MB,
+	    PFLOG_HDRLEN, &bpf_ipfw_methods, NULL);
 	V_bpf_pflog = bpf_attach("ipfwlog0", DLT_PFLOG, PFLOG_HDRLEN,
 	    &bpf_ipfw_methods, NULL);
 }
@@ -169,7 +178,6 @@ ipfw_bpf_init(int first __unused)
 void
 ipfw_bpf_uninit(int last __unused)
 {
-
-	ipfw_tap_free(IPFW_DEFAULT_RULE);
+	bpf_detach(V_default_tap.bpf);
 	bpf_detach(V_bpf_pflog);
 }
diff --git a/sys/netpfil/ipfw/ip_fw_log.c b/sys/netpfil/ipfw/ip_fw_log.c
index b84e8cbf7e59..0f8a4df4e5d6 100644
--- a/sys/netpfil/ipfw/ip_fw_log.c
+++ b/sys/netpfil/ipfw/ip_fw_log.c
@@ -722,7 +722,7 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
 	    /* O_LOG is the first action */
 	    ((cmd = ACTION_PTR(f)) && cmd->arg1 == IPFW_LOG_DEFAULT)) {
 		if (V_fw_verbose == 0) {
-			ipfw_bpf_tap(args, ip,
+			ipfw_bpf_tap(chain, args, ip,
 			    f != NULL ? f->rulenum : IPFW_DEFAULT_RULE);
 			return;
 		}
@@ -737,6 +737,6 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
 		ipfw_log_rtsock(chain, f, hlen, args, offset, tablearg, eh);
 
 	if (cmd->arg1 & IPFW_LOG_IPFW0)
-		ipfw_bpf_tap(args, ip, f->rulenum);
+		ipfw_bpf_tap(chain, args, ip, f->rulenum);
 }
 /* end of file */
diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h
index af592fc4c6c0..67bdde66e385 100644
--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@@ -28,6 +28,9 @@
 #ifndef _IPFW2_PRIVATE_H
 #define _IPFW2_PRIVATE_H
 
+#include <sys/queue.h>
+#include <sys/tree.h>
+
 /*
  * Internal constants and data structures used by ipfw components
  * and not meant to be exported outside the kernel.
@@ -161,9 +164,10 @@ struct ip_fw_chain;
 
 void ipfw_bpf_init(int);
 void ipfw_bpf_uninit(int);
-void ipfw_tap_alloc(uint32_t);
-void ipfw_tap_free(uint32_t);
-void ipfw_bpf_tap(struct ip_fw_args *, struct ip *, uint32_t);
+void ipfw_tap_alloc(struct ip_fw_chain *, uint32_t);
+void ipfw_tap_free(struct ip_fw_chain *, uint32_t);
+void ipfw_bpf_tap(struct ip_fw_chain *, struct ip_fw_args *, struct ip *,
+    uint32_t);
 void ipfw_pflog_tap(void *, struct mbuf *);
 void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
     struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip,
@@ -320,6 +324,7 @@ struct ip_fw_chain {
 	void		*ifcfg;		/* interface module data */
 	int		*idxmap_back;	/* standby skipto array of rules */
 	struct namedobj_instance	*srvmap; /* cfg name->number mappings */
+	RB_HEAD(tap_tree, ipfw_tap) taps;	/* see ip_fw_bpf.c */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t uh_lock;
 #else
diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c
index 4c67eaa9cbba..2941444a7bd3 100644
--- a/sys/netpfil/ipfw/ip_fw_sockopt.c
+++ b/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -210,8 +210,6 @@ ipfw_free_rule(struct ip_fw *rule)
 	 */
 	if (rule->refcnt > 1)
 		return;
-	if (ACTION_PTR(rule)->opcode == O_LOG)
-		ipfw_tap_free(rule->rulenum);
 	uma_zfree_pcpu(V_ipfw_cntr_zone, rule->cntr);
 	free(rule, M_IPFW);
 }
@@ -505,6 +503,8 @@ ipfw_commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci,
 			memcpy((char *)ci->urule + ci->urule_numoff, &rulenum,
 			    sizeof(rulenum));
 		}
+		if (ACTION_PTR(krule)->opcode == O_LOG)
+			ipfw_tap_alloc(chain, krule->rulenum);
 	}
 
 	/* duplicate the remaining part, we always have the default rule */
@@ -2102,6 +2102,8 @@ unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule)
 		else
 			no->refcnt--;
 	}
+	if (ACTION_PTR(rule)->opcode == O_LOG)
+		ipfw_tap_free(ch, rule->rulenum);
 }
 
 /*
@@ -2459,9 +2461,6 @@ import_rule_v1(struct ip_fw_chain *chain, struct rule_check_info *ci)
 
 	/* Copy opcodes */
 	memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t));
-
-	if (ACTION_PTR(krule)->opcode == O_LOG)
-		ipfw_tap_alloc(krule->rulenum);
 }
 
 /*