git: 3daae1ac1d82 - main - ipfw: create a bpf tap point for every log rule

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Mon, 15 Dec 2025 21:45:35 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=3daae1ac1d82ecdcd855101bab5206e914b12350

commit 3daae1ac1d82ecdcd855101bab5206e914b12350
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2025-12-15 20:51:51 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2025-12-15 21:17:23 +0000

    ipfw: create a bpf tap point for every log rule
    
    Dynamically allocate bpf tap points for every rule that has "log".
    The name is "ipfw%u", where %u is substituted to the rule number.
    The default catch all "ipfw0" tap still exists for compatibility
    and it will catch packets in case if there are no bpf listeners
    on a per-rule tap.
    
    Reviewed by:            ae
    Differential Revision:  https://reviews.freebsd.org/D53877
---
 sbin/ipfw/ipfw.8                         |  31 +++++---
 sbin/ipfw/ipfw2.c                        |   7 +-
 sys/netpfil/ipfw/ip_fw_bpf.c             | 119 ++++++++++++++++++++++++-------
 sys/netpfil/ipfw/ip_fw_log.c             |  30 +-------
 sys/netpfil/ipfw/ip_fw_private.h         |   7 +-
 sys/netpfil/ipfw/ip_fw_sockopt.c         |   5 ++
 sys/netpfil/ipfw/nat64/nat64_translate.c |   2 +-
 sys/netpfil/ipfw/nat64/nat64clat.c       |   2 +-
 sys/netpfil/ipfw/nat64/nat64lsn.c        |   2 +-
 sys/netpfil/ipfw/nat64/nat64stl.c        |   2 +-
 10 files changed, 135 insertions(+), 72 deletions(-)

diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8
index 789512e5cc1e..7a81c84de3e4 100644
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@@ -1,5 +1,5 @@
 .\"
-.Dd December 4, 2025
+.Dd December 10, 2025
 .Dt IPFW 8
 .Os
 .Sh NAME
@@ -704,7 +704,18 @@ option (see below), packets are logged in two ways: if the sysctl variable
 is set to 0 (default), one can use the
 .Xr bpf 4
 tap named
-.Li ipfw0 .
+.Li ipfwXXXXX ,
+where XXXXX is the number of the rule that has the
+.Cm log
+keyword.
+The compatibility
+.Xr bpf 4
+tap named
+.Li ipfw0
+still exists.
+It will catch packets in case if there are no
+.Xr bpf 4
+listener(s) on a per-rule tap.
 There is zero overhead when no
 .Xr bpf 4
 listener is attached to the tap.
@@ -746,10 +757,12 @@ Logs a packet to
 with a
 .Dv LOG_SECURITY
 facility.
-.It Ar ipfw0
+.It Ar bpf
 Logs a packet to the
-.Li ipfw0
-pseudo interface.
+.Xr bpf 4
+tap named
+.Li ipfwXXXXX ,
+where XXXXX is the rule number.
 .It Ar rtsock
 Logs a packet to the
 .Xr route 4
@@ -769,7 +782,7 @@ I.e. A packet matching a rule with
 .Cm log logamount
 100
 .Cm logdst
-syslog,ipfw0 ...
+syslog,bpf ...
 .Ed
 .Pp
 will log upto 50 packets.
@@ -3663,9 +3676,9 @@ Default value is
 .It Cm log
 Turn on logging of all handled packets via BPF tap named
 .Ar ipfwlog0 .
-Note that it has different purpose than
-.Ar ipfw0
-tap.
+Note that it has different purpose than per-rule
+.Xr bpf 4
+taps.
 Translators sends to BPF an additional information with each packet.
 With
 .Cm tcpdump
diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c
index 27ccaea2c78f..26baa596cc89 100644
--- a/sbin/ipfw/ipfw2.c
+++ b/sbin/ipfw/ipfw2.c
@@ -2013,7 +2013,7 @@ print_logdst(struct buf_pr *bp, uint16_t arg1)
 		comma = ",";
 	}
 	if (arg1 & IPFW_LOG_IPFW0) {
-		bprintf(bp, "%sipfw0", comma);
+		bprintf(bp, "%sbpf", comma);
 		comma = ",";
 	}
 	if (arg1 & IPFW_LOG_RTSOCK) {
@@ -4066,8 +4066,9 @@ parse_logdst(char *logdst_iter)
 			ret |= IPFW_LOG_SYSLOG;
 			continue;
 		}
-		if (_substrcmp(token, "ipfw0") == 0) {
-			/* XXX add multiple ipfw* */
+		/* ipfw0 is compatibility keyword. */
+		if (_substrcmp(token, "bpf") == 0 ||
+		    _substrcmp(token, "ipfw0") == 0) {
 			ret |= IPFW_LOG_IPFW0;
 			continue;
 		}
diff --git a/sys/netpfil/ipfw/ip_fw_bpf.c b/sys/netpfil/ipfw/ip_fw_bpf.c
index 68f31ca59b2e..aa92d30007c2 100644
--- a/sys/netpfil/ipfw/ip_fw_bpf.c
+++ b/sys/netpfil/ipfw/ip_fw_bpf.c
@@ -32,13 +32,14 @@
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
+#include <sys/tree.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_pflog.h>
 #include <net/vnet.h>
 #include <net/bpf.h>
 
-#include <netinet/in.h>
+#include <netinet/ip.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_var.h>
 #include <netpfil/ipfw/ip_fw_private.h>
@@ -54,48 +55,114 @@ static const struct bif_methods bpf_ipfw_methods = {
 	.bif_chkdir = bpf_ipfw_chkdir,
 };
 
-static const char ipfwname[] = "ipfw0";
-static const char ipfwlogname[] = "ipfwlog0";
+struct ipfw_tap {
+	RB_ENTRY(ipfw_tap)	entry;
+	uint32_t		rule;
+	u_int			refs;
+	struct bpf_if		*bpf;
+	char 			name[sizeof("ipfw4294967295")];
+};
 
-VNET_DEFINE_STATIC(struct bpf_if *, bpf_en10mb);
-VNET_DEFINE_STATIC(struct bpf_if *, bpf_pflog);
-#define	V_bpf_en10mb	VNET(bpf_en10mb)
-#define	V_bpf_pflog	VNET(bpf_pflog)
+static int32_t
+tap_compare(const struct ipfw_tap *a, const struct ipfw_tap *b)
+{
+	return ((int32_t)(a->rule/2 - b->rule/2));
+}
+RB_HEAD(tap_tree, ipfw_tap);
+VNET_DEFINE_STATIC(struct tap_tree, tap_tree);
+#define	V_tap_tree	VNET(tap_tree)
+RB_GENERATE_STATIC(tap_tree, ipfw_tap, entry, tap_compare);
+VNET_DEFINE_STATIC(struct ipfw_tap *, default_tap);
+#define	V_default_tap	VNET(default_tap)
 
 void
-ipfw_bpf_tap(u_char *pkt, u_int pktlen)
+ipfw_tap_alloc(uint32_t rule)
 {
-	bpf_tap(V_bpf_en10mb, pkt, pktlen);
+	struct ipfw_tap	*tap, key = { .rule = rule };
+	int n __diagused;
+
+	tap = RB_FIND(tap_tree, &V_tap_tree, &key);
+	if (tap != NULL) {
+		MPASS(tap->rule == rule);
+		tap->refs++;
+		return;
+	}
+	tap = malloc(sizeof(*tap), M_IPFW, M_WAITOK);
+	tap->rule = rule;
+	tap->refs = 1;
+	/* Note: the default rule logs to "ipfw0". */
+	if (__predict_false(rule == IPFW_DEFAULT_RULE)) {
+		V_default_tap = tap;
+		rule = 0;
+	}
+	n = snprintf(tap->name, sizeof(tap->name), "ipfw%u", rule);
+	MPASS(n > 4 && n < sizeof("ipfw4294967295"));
+	tap->bpf = bpf_attach(tap->name, DLT_EN10MB, PFLOG_HDRLEN,
+	    &bpf_ipfw_methods, NULL);
+	tap = RB_INSERT(tap_tree, &V_tap_tree, tap);
+	MPASS(tap == NULL);
 }
 
 void
-ipfw_bpf_mtap(struct mbuf *m)
+ipfw_tap_free(uint32_t rule)
 {
-	bpf_mtap(V_bpf_en10mb, m);
+
+	struct ipfw_tap	*tap, key = { .rule = rule };
+
+	tap = RB_FIND(tap_tree, &V_tap_tree, &key);
+	MPASS(tap != NULL);
+	if (--tap->refs == 0) {
+		bpf_detach(tap->bpf);
+		RB_REMOVE(tap_tree, &V_tap_tree, tap);
+		free(tap, M_IPFW);
+	}
 }
 
 void
-ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m)
+ipfw_bpf_tap(struct ip_fw_args *args, struct ip *ip, uint32_t rulenum)
 {
-	switch (dlen) {
-	case (ETHER_HDR_LEN):
-		bpf_mtap2(V_bpf_en10mb, data, dlen, m);
-		break;
-	case (PFLOG_HDRLEN):
-		bpf_mtap2(V_bpf_pflog, data, dlen, m);
-		break;
-	default:
-		MPASS(0);
+	struct ipfw_tap *tap, key = { .rule = rulenum };
+
+	tap = RB_FIND(tap_tree, &V_tap_tree, &key);
+	MPASS(tap != NULL);
+	if (!bpf_peers_present(tap->bpf))
+		tap = V_default_tap;
+	if (args->flags & IPFW_ARGS_LENMASK) {
+		bpf_tap(tap->bpf, args->mem, IPFW_ARGS_LENGTH(args->flags));
+	} else if (args->flags & IPFW_ARGS_ETHER) {
+		/* layer2, use orig hdr */
+		bpf_mtap(tap->bpf, args->m);
+	} else {
+		char *fakehdr;
+
+		/* Add fake header. Later we will store
+		 * more info in the header.
+		 */
+		if (ip->ip_v == 4)
+			fakehdr = "DDDDDDSSSSSS\x08\x00";
+		else if (ip->ip_v == 6)
+			fakehdr = "DDDDDDSSSSSS\x86\xdd";
+		else
+			/* Obviously bogus EtherType. */
+			fakehdr = "DDDDDDSSSSSS\xff\xff";
+
+		bpf_mtap2(tap->bpf, fakehdr, ETHER_HDR_LEN, args->m);
 	}
 }
 
+VNET_DEFINE_STATIC(struct bpf_if *, bpf_pflog);
+#define	V_bpf_pflog	VNET(bpf_pflog)
 void
-ipfw_bpf_init(int first __unused)
+ipfw_pflog_tap(void *data, struct mbuf *m)
 {
+	bpf_mtap2(V_bpf_pflog, data, PFLOG_HDRLEN, m);
+}
 
-	V_bpf_en10mb = bpf_attach(ipfwname, DLT_EN10MB, ETHER_HDR_LEN,
-	    &bpf_ipfw_methods, NULL);
-	V_bpf_pflog = bpf_attach(ipfwlogname, DLT_PFLOG, PFLOG_HDRLEN,
+void
+ipfw_bpf_init(int first __unused)
+{
+	ipfw_tap_alloc(IPFW_DEFAULT_RULE);
+	V_bpf_pflog = bpf_attach("ipfwlog0", DLT_PFLOG, PFLOG_HDRLEN,
 	    &bpf_ipfw_methods, NULL);
 }
 
@@ -103,6 +170,6 @@ void
 ipfw_bpf_uninit(int last __unused)
 {
 
-	bpf_detach(V_bpf_en10mb);
+	ipfw_tap_free(IPFW_DEFAULT_RULE);
 	bpf_detach(V_bpf_pflog);
 }
diff --git a/sys/netpfil/ipfw/ip_fw_log.c b/sys/netpfil/ipfw/ip_fw_log.c
index b87aa3da9413..b84e8cbf7e59 100644
--- a/sys/netpfil/ipfw/ip_fw_log.c
+++ b/sys/netpfil/ipfw/ip_fw_log.c
@@ -96,31 +96,6 @@
 
 #define	TARG(k, f)	IP_FW_ARG_TABLEARG(chain, k, f)
 
-static void
-ipfw_log_ipfw0(struct ip_fw_args *args, struct ip *ip)
-{
-	if (args->flags & IPFW_ARGS_LENMASK)
-		ipfw_bpf_tap(args->mem, IPFW_ARGS_LENGTH(args->flags));
-	else if (args->flags & IPFW_ARGS_ETHER)
-		/* layer2, use orig hdr */
-		ipfw_bpf_mtap(args->m);
-	else {
-		/* Add fake header. Later we will store
-		 * more info in the header.
-		 */
-		if (ip->ip_v == 4)
-			ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
-			    ETHER_HDR_LEN, args->m);
-		else if (ip->ip_v == 6)
-			ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
-			    ETHER_HDR_LEN, args->m);
-		else
-			/* Obviously bogus EtherType. */
-			ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
-			    ETHER_HDR_LEN, args->m);
-	}
-}
-
 /*
  * XXX this function alone takes about 2Kbytes of code!
  */
@@ -747,7 +722,8 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
 	    /* O_LOG is the first action */
 	    ((cmd = ACTION_PTR(f)) && cmd->arg1 == IPFW_LOG_DEFAULT)) {
 		if (V_fw_verbose == 0) {
-			ipfw_log_ipfw0(args, ip);
+			ipfw_bpf_tap(args, ip,
+			    f != NULL ? f->rulenum : IPFW_DEFAULT_RULE);
 			return;
 		}
 		ipfw_log_syslog(chain, f, hlen, args, offset, tablearg, ip);
@@ -761,6 +737,6 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
 		ipfw_log_rtsock(chain, f, hlen, args, offset, tablearg, eh);
 
 	if (cmd->arg1 & IPFW_LOG_IPFW0)
-		ipfw_log_ipfw0(args, ip);
+		ipfw_bpf_tap(args, ip, f->rulenum);
 }
 /* end of file */
diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h
index c490d2849a7d..c60b7aa47e94 100644
--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@@ -161,9 +161,10 @@ struct ip_fw_chain;
 
 void ipfw_bpf_init(int);
 void ipfw_bpf_uninit(int);
-void ipfw_bpf_tap(u_char *, u_int);
-void ipfw_bpf_mtap(struct mbuf *);
-void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
+void ipfw_tap_alloc(uint32_t);
+void ipfw_tap_free(uint32_t);
+void ipfw_bpf_tap(struct ip_fw_args *, struct ip *, uint32_t);
+void ipfw_pflog_tap(void *, struct mbuf *);
 void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
     struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip,
     void *eh);
diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c
index 5d57759ffb00..a91fb2e84da9 100644
--- a/sys/netpfil/ipfw/ip_fw_sockopt.c
+++ b/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -210,6 +210,8 @@ ipfw_free_rule(struct ip_fw *rule)
 	 */
 	if (rule->refcnt > 1)
 		return;
+	if (ACTION_PTR(rule)->opcode == O_LOG)
+		ipfw_tap_free(rule->rulenum);
 	uma_zfree_pcpu(V_ipfw_cntr_zone, rule->cntr);
 	free(rule, M_IPFW);
 }
@@ -2511,6 +2513,9 @@ import_rule_v1(struct ip_fw_chain *chain, struct rule_check_info *ci)
 
 	/* Copy opcodes */
 	memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t));
+
+	if (ACTION_PTR(krule)->opcode == O_LOG)
+		ipfw_tap_alloc(krule->rulenum);
 }
 
 /*
diff --git a/sys/netpfil/ipfw/nat64/nat64_translate.c b/sys/netpfil/ipfw/nat64/nat64_translate.c
index 393780c969fe..99340b4e16f1 100644
--- a/sys/netpfil/ipfw/nat64/nat64_translate.c
+++ b/sys/netpfil/ipfw/nat64/nat64_translate.c
@@ -151,7 +151,7 @@ nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family)
 
 	logdata->dir = PF_OUT;
 	logdata->af = family;
-	ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m);
+	ipfw_pflog_tap(logdata, m);
 }
 
 static int
diff --git a/sys/netpfil/ipfw/nat64/nat64clat.c b/sys/netpfil/ipfw/nat64/nat64clat.c
index d524652e9a99..c458f85755d3 100644
--- a/sys/netpfil/ipfw/nat64/nat64clat.c
+++ b/sys/netpfil/ipfw/nat64/nat64clat.c
@@ -77,7 +77,7 @@ nat64clat_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
 	plog->subrulenr = htonl(pktid);
 	plog->ruleset[0] = '\0';
 	strlcpy(plog->ifname, "NAT64CLAT", sizeof(plog->ifname));
-	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+	ipfw_pflog_tap(plog, m);
 }
 
 static int
diff --git a/sys/netpfil/ipfw/nat64/nat64lsn.c b/sys/netpfil/ipfw/nat64/nat64lsn.c
index 5d2ee7ee3b34..1bac425afc30 100644
--- a/sys/netpfil/ipfw/nat64/nat64lsn.c
+++ b/sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -187,7 +187,7 @@ nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
 	    (state->proto << 8) | (state->ip_dst & 0xff));
 	plog->ruleset[0] = '\0';
 	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
-	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+	ipfw_pflog_tap(plog, m);
 }
 
 #define	HVAL(p, n, s)	jenkins_hash32((const uint32_t *)(p), (n), (s))
diff --git a/sys/netpfil/ipfw/nat64/nat64stl.c b/sys/netpfil/ipfw/nat64/nat64stl.c
index ad1f2b3ec378..c1ca2dfd5e13 100644
--- a/sys/netpfil/ipfw/nat64/nat64stl.c
+++ b/sys/netpfil/ipfw/nat64/nat64stl.c
@@ -76,7 +76,7 @@ nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
 	plog->subrulenr = htonl(pktid);
 	plog->ruleset[0] = '\0';
 	strlcpy(plog->ifname, "NAT64STL", sizeof(plog->ifname));
-	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
+	ipfw_pflog_tap(plog, m);
 }
 
 static int