git: e732e742b37f - main - pf: Initial Ethernet level filtering code

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Wed, 02 Mar 2022 16:00:41 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=e732e742b37f66746b7556b990c54869845b72fc

commit e732e742b37f66746b7556b990c54869845b72fc
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2021-02-03 10:16:10 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2022-03-02 16:00:03 +0000

    pf: Initial Ethernet level filtering code
    
    This is the kernel side of stateless Ethernel level filtering for pf.
    
    The primary use case for this is to enable captive portal functionality
    to allow/deny access by MAC address, rather than per IP address.
    
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:  https://reviews.freebsd.org/D31737
---
 sys/net/pfvar.h             |  67 +++++++
 sys/netpfil/pf/pf.c         | 135 +++++++++++++
 sys/netpfil/pf/pf_ioctl.c   | 454 ++++++++++++++++++++++++++++++++++++++++++++
 sys/netpfil/pf/pf_nv.c      | 124 ++++++++++++
 sys/netpfil/pf/pf_nv.h      |   2 +
 sys/netpfil/pf/pf_ruleset.c |  11 ++
 6 files changed, 793 insertions(+)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index ba1f0a2fd9b3..51b7623a5c61 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -52,6 +52,7 @@
 #include <vm/uma.h>
 
 #include <net/if.h>
+#include <net/ethernet.h>
 #include <net/radix.h>
 #include <netinet/in.h>
 #ifdef _KERNEL
@@ -570,6 +571,61 @@ struct pf_rule_actions {
 	uint32_t	 flags;
 };
 
+union pf_keth_rule_ptr {
+	struct pf_keth_rule	*ptr;
+	uint32_t		nr;
+};
+
+struct pf_keth_rule_addr {
+	uint8_t	addr[ETHER_ADDR_LEN];
+	bool neg;
+	uint8_t	isset;
+};
+
+struct pf_keth_rule {
+#define PFE_SKIP_IFP		0
+#define PFE_SKIP_DIR		1
+#define PFE_SKIP_PROTO		2
+#define PFE_SKIP_SRC_ADDR	3
+#define PFE_SKIP_DST_ADDR	4
+#define PFE_SKIP_COUNT		5
+	union pf_keth_rule_ptr	 skip[PFE_SKIP_COUNT];
+
+	TAILQ_ENTRY(pf_keth_rule)	entries;
+
+	uint32_t		 nr;
+
+	bool			 quick;
+
+	/* Filter */
+	char			 ifname[IFNAMSIZ];
+	struct pfi_kkif		*kif;
+	bool			 ifnot;
+	uint8_t			 direction;
+	uint16_t		 proto;
+	struct pf_keth_rule_addr src, dst;
+
+	/* Stats */
+	counter_u64_t		 evaluations;
+	counter_u64_t		 packets[2];
+	counter_u64_t		 bytes[2];
+
+	/* Action */
+	char			 qname[PF_QNAME_SIZE];
+	int			 qid;
+	char			 tagname[PF_TAG_NAME_SIZE];
+	uint16_t		 tag;
+	uint8_t			 action;
+};
+
+TAILQ_HEAD(pf_keth_rules, pf_keth_rule);
+
+struct pf_keth_settings {
+	struct pf_keth_rules	rules;
+	uint32_t		ticket;
+	int			open;
+};
+
 union pf_krule_ptr {
 	struct pf_krule		*ptr;
 	u_int32_t		 nr;
@@ -1617,6 +1673,7 @@ struct pfioc_ruleset {
 
 #define PF_RULESET_ALTQ		(PF_RULESET_MAX)
 #define PF_RULESET_TABLE	(PF_RULESET_MAX+1)
+#define PF_RULESET_ETH		(PF_RULESET_MAX+2)
 struct pfioc_trans {
 	int		 size;	/* number of elements */
 	int		 esize; /* size of each element in bytes */
@@ -1756,6 +1813,9 @@ struct pfioc_iface {
 #define	DIOCSETSYNCOOKIES	_IOWR('D', 95, struct pfioc_nv)
 #define	DIOCKEEPCOUNTERS	_IOWR('D', 96, struct pfioc_nv)
 #define	DIOCKEEPCOUNTERS_FREEBSD13	_IOWR('D', 92, struct pfioc_nv)
+#define	DIOCADDETHRULE		_IOWR('D', 97, struct pfioc_nv)
+#define	DIOCGETETHRULE		_IOWR('D', 98, struct pfioc_nv)
+#define	DIOCGETETHRULES		_IOWR('D', 99, struct pfioc_nv)
 
 struct pf_ifspeed_v0 {
 	char			ifname[IFNAMSIZ];
@@ -1980,6 +2040,7 @@ extern void			 pf_addrcpy(struct pf_addr *, struct pf_addr *,
 				    u_int8_t);
 void				pf_free_rule(struct pf_krule *);
 
+int	pf_test_eth(int, int, struct ifnet *, struct mbuf **, struct inpcb *);
 #ifdef INET
 int	pf_test(int, int, struct ifnet *, struct mbuf **, struct inpcb *);
 int	pf_normalize_ip(struct mbuf **, int, struct pfi_kkif *, u_short *,
@@ -2142,7 +2203,13 @@ VNET_DECLARE(struct pf_kanchor,			 pf_main_anchor);
 #define	V_pf_main_anchor			 VNET(pf_main_anchor)
 #define pf_main_ruleset	V_pf_main_anchor.ruleset
 
+VNET_DECLARE(struct pf_keth_settings*,		 pf_keth);
+#define	V_pf_keth				 VNET(pf_keth)
+VNET_DECLARE(struct pf_keth_settings*,		 pf_keth_inactive);
+#define	V_pf_keth_inactive			 VNET(pf_keth_inactive)
+
 void			 pf_init_kruleset(struct pf_kruleset *);
+void			 pf_init_keth(struct pf_keth_settings *);
 int			 pf_kanchor_setup(struct pf_krule *,
 			    const struct pf_kruleset *, const char *);
 int			 pf_kanchor_nvcopyout(const struct pf_kruleset *,
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 7e98d5062286..8993e5a8698d 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -269,6 +269,8 @@ static int		 pf_state_key_ctor(void *, int, void *, int);
 static u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
 void			 pf_rule_to_actions(struct pf_krule *,
 			    struct pf_rule_actions *);
+static int		 pf_test_eth_rule(int, struct pfi_kkif *,
+			    struct mbuf *);
 static int		 pf_test_rule(struct pf_krule **, struct pf_kstate **,
 			    int, struct pfi_kkif *, struct mbuf *, int,
 			    struct pf_pdesc *, struct pf_krule **,
@@ -3691,6 +3693,108 @@ pf_tcp_iss(struct pf_pdesc *pd)
 #undef	ISN_RANDOM_INCREMENT
 }
 
+static bool
+pf_match_eth_addr(const uint8_t *a, const struct pf_keth_rule_addr *r)
+{
+	/* Always matches if not set */
+	if (! r->isset)
+		return (!r->neg);
+
+	if (memcmp(a, r->addr, ETHER_ADDR_LEN) == 0)
+		return (!r->neg);
+
+	return (r->neg);
+}
+
+static int
+pf_test_eth_rule(int dir, struct pfi_kkif *kif, struct mbuf *m)
+{
+	struct ether_header *e;
+	struct pf_keth_rule *r, *rm;
+	struct pf_mtag *mtag;
+	uint8_t action;
+
+	PF_RULES_RLOCK_TRACKER;
+
+	MPASS(kif->pfik_ifp->if_vnet == curvnet);
+	NET_EPOCH_ASSERT();
+
+	e = mtod(m, struct ether_header *);
+
+	PF_RULES_RLOCK();
+
+	r = TAILQ_FIRST(&V_pf_keth->rules);
+	rm = NULL;
+
+	while (r != NULL) {
+		counter_u64_add(r->evaluations, 1);
+		if (pfi_kkif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PFE_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PFE_SKIP_DIR].ptr;
+		else if (r->proto && r->proto != ntohs(e->ether_type))
+			r = r->skip[PFE_SKIP_PROTO].ptr;
+		else if (! pf_match_eth_addr(e->ether_shost, &r->src))
+			r = r->skip[PFE_SKIP_SRC_ADDR].ptr;
+		else if (! pf_match_eth_addr(e->ether_dhost, &r->dst)) {
+			r = TAILQ_NEXT(r, entries);
+		}
+		else {
+			/* Rule matches */
+			rm = r;
+
+			if (r->quick)
+				break;
+
+			r = TAILQ_NEXT(r, entries);
+		}
+	}
+
+	r = rm;
+
+	/* Default to pass. */
+	if (r == NULL) {
+		PF_RULES_RUNLOCK();
+		return (PF_PASS);
+	}
+
+	/* Execute action. */
+	counter_u64_add(r->packets[dir == PF_OUT], 1);
+	counter_u64_add(r->bytes[dir == PF_OUT], m_length(m, NULL));
+
+	/* Shortcut. Don't tag if we're just going to drop anyway. */
+	if (r->action == PF_DROP) {
+		PF_RULES_RUNLOCK();
+		return (PF_DROP);
+	}
+
+	if (r->tag > 0) {
+		mtag = pf_get_mtag(m);
+		if (mtag == NULL) {
+			counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
+			PF_RULES_RUNLOCK();
+			return (PF_DROP);
+		}
+		mtag->tag = r->tag;
+	}
+
+	if (r->qid != 0) {
+		mtag = pf_get_mtag(m);
+		if (mtag == NULL) {
+			counter_u64_add(V_pf_status.counters[PFRES_MEMORY], 1);
+			PF_RULES_RUNLOCK();
+			return (PF_DROP);
+		}
+		mtag->qid = r->qid;
+	}
+
+	action = r->action;
+
+	PF_RULES_RUNLOCK();
+
+	return (action);
+}
+
 static int
 pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, int direction,
     struct pfi_kkif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
@@ -6439,6 +6543,37 @@ pf_pdesc_to_dnflow(int dir, const struct pf_pdesc *pd,
 	return (true);
 }
 
+int
+pf_test_eth(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
+    struct inpcb *inp)
+{
+	struct pfi_kkif		*kif;
+	struct mbuf		*m = *m0;
+
+	M_ASSERTPKTHDR(m);
+	MPASS(ifp->if_vnet == curvnet);
+	NET_EPOCH_ASSERT();
+
+	if (!V_pf_status.running)
+		return (PF_PASS);
+
+	kif = (struct pfi_kkif *)ifp->if_pf_kif;
+
+	if (kif == NULL) {
+		DPFPRINTF(PF_DEBUG_URGENT,
+		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
+		return (PF_DROP);
+	}
+	if (kif->pfik_flags & PFI_IFLAG_SKIP)
+		return (PF_PASS);
+
+	if (m->m_flags & M_SKIP_FIREWALL)
+		return (PF_PASS);
+
+	/* Stateless! */
+	return (pf_test_eth_rule(dir, kif, m));
+}
+
 #ifdef INET
 int
 pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index 5bc78ab60e74..d16a52c79b26 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -106,6 +106,10 @@ static void		 pf_mv_kpool(struct pf_kpalist *, struct pf_kpalist *);
 static void		 pf_empty_kpool(struct pf_kpalist *);
 static int		 pfioctl(struct cdev *, u_long, caddr_t, int,
 			    struct thread *);
+static int		 pf_begin_eth(uint32_t *);
+static int		 pf_rollback_eth(uint32_t);
+static int		 pf_commit_eth(uint32_t);
+static void		 pf_free_eth_rule(struct pf_keth_rule *);
 #ifdef ALTQ
 static int		 pf_begin_altq(u_int32_t *);
 static int		 pf_rollback_altq(u_int32_t);
@@ -218,6 +222,10 @@ static void		 pf_tbladdr_copyout(struct pf_addr_wrap *);
 /*
  * Wrapper functions for pfil(9) hooks
  */
+static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp,
+    int flags, void *ruleset __unused, struct inpcb *inp);
+static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp,
+    int flags, void *ruleset __unused, struct inpcb *inp);
 #ifdef INET
 static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp,
     int flags, void *ruleset __unused, struct inpcb *inp);
@@ -304,6 +312,9 @@ pfattach_vnet(void)
 	RB_INIT(&V_pf_anchors);
 	pf_init_kruleset(&pf_main_ruleset);
 
+	pf_init_keth(V_pf_keth);
+	pf_init_keth(V_pf_keth_inactive);
+
 	/* default rule should never be garbage collected */
 	V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
 #ifdef PF_DEFAULT_TO_DROP
@@ -471,6 +482,32 @@ pf_unlink_rule(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
 	PF_UNLNKDRULES_UNLOCK();
 }
 
+static void
+pf_free_eth_rule(struct pf_keth_rule *rule)
+{
+	PF_RULES_WASSERT();
+
+	if (rule == NULL)
+		return;
+
+	if (rule->tag)
+		tag_unref(&V_pf_tags, rule->tag);
+#ifdef ALTQ
+	pf_qid_unref(rule->qid);
+#endif
+
+	if (rule->kif)
+		pfi_kkif_unref(rule->kif);
+
+	counter_u64_free(rule->evaluations);
+	for (int i = 0; i < 2; i++) {
+		counter_u64_free(rule->packets[i]);
+		counter_u64_free(rule->bytes[i]);
+	}
+
+	free(rule, M_PFRULE);
+}
+
 void
 pf_free_rule(struct pf_krule *rule)
 {
@@ -657,6 +694,103 @@ pf_tagname2tag(const char *tagname)
 	return (tagname2tag(&V_pf_tags, tagname));
 }
 
+static int
+pf_begin_eth(uint32_t *ticket)
+{
+	struct pf_keth_rule *rule, *tmp;
+
+	PF_RULES_WASSERT();
+
+	/* Purge old inactive rules. */
+	TAILQ_FOREACH_SAFE(rule, &V_pf_keth_inactive->rules, entries, tmp) {
+		TAILQ_REMOVE(&V_pf_keth_inactive->rules, rule, entries);
+		pf_free_eth_rule(rule);
+	}
+
+	*ticket = ++V_pf_keth_inactive->ticket;
+	V_pf_keth_inactive->open = 1;
+
+	return (0);
+}
+
+static int
+pf_rollback_eth(uint32_t ticket)
+{
+	struct pf_keth_rule *rule, *tmp;
+
+	PF_RULES_WASSERT();
+
+	if (!V_pf_keth_inactive->open || ticket != V_pf_keth_inactive->ticket)
+		return (0);
+
+	/* Purge old inactive rules. */
+	TAILQ_FOREACH_SAFE(rule, &V_pf_keth_inactive->rules, entries, tmp) {
+		TAILQ_REMOVE(&V_pf_keth_inactive->rules, rule, entries);
+		pf_free_eth_rule(rule);
+	}
+
+	V_pf_keth_inactive->open = 0;
+
+	return (0);
+}
+
+#define	PF_SET_SKIP_STEPS(i)					\
+	do {							\
+		while (head[i] != cur) {			\
+			head[i]->skip[i].ptr = cur;		\
+			head[i] = TAILQ_NEXT(head[i], entries);	\
+		}						\
+	} while (0)
+
+static void
+pf_eth_calc_skip_steps(struct pf_keth_rules *rules)
+{
+	struct pf_keth_rule *cur, *prev, *head[PFE_SKIP_COUNT];
+	int i;
+
+	cur = TAILQ_FIRST(rules);
+	prev = cur;
+	for (i = 0; i < PFE_SKIP_COUNT; ++i)
+		head[i] = cur;
+	while (cur != NULL) {
+		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
+			PF_SET_SKIP_STEPS(PFE_SKIP_IFP);
+		if (cur->direction != prev->direction)
+			PF_SET_SKIP_STEPS(PFE_SKIP_DIR);
+		if (cur->proto != prev->proto)
+			PF_SET_SKIP_STEPS(PFE_SKIP_PROTO);
+		if (memcmp(&cur->src, &prev->src, sizeof(cur->src)) != 0)
+			PF_SET_SKIP_STEPS(PFE_SKIP_SRC_ADDR);
+		if (memcmp(&cur->dst, &prev->dst, sizeof(cur->dst)) != 0)
+			PF_SET_SKIP_STEPS(PFE_SKIP_DST_ADDR);
+
+		prev = cur;
+		cur = TAILQ_NEXT(cur, entries);
+	}
+	for (i = 0; i < PFE_SKIP_COUNT; ++i)
+		PF_SET_SKIP_STEPS(i);
+}
+
+static int
+pf_commit_eth(uint32_t ticket)
+{
+	struct pf_keth_settings *settings;
+
+	if (!V_pf_keth_inactive->open ||
+	    ticket != V_pf_keth_inactive->ticket)
+		return (EBUSY);
+
+	pf_eth_calc_skip_steps(&V_pf_keth_inactive->rules);
+
+	settings = V_pf_keth;
+	V_pf_keth = V_pf_keth_inactive;
+	V_pf_keth_inactive = settings;
+	V_pf_keth_inactive->ticket = V_pf_keth->ticket;
+
+	/* Clean up inactive rules. */
+	return (pf_rollback_eth(ticket));
+}
+
 #ifdef ALTQ
 static uint16_t
 pf_qname2qid(const char *qname)
@@ -2219,6 +2353,8 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
 		case DIOCGIFSPEEDV1:
 		case DIOCSETIFFLAG:
 		case DIOCCLRIFFLAG:
+		case DIOCGETETHRULES:
+		case DIOCGETETHRULE:
 			break;
 		case DIOCRCLRTABLES:
 		case DIOCRADDTABLES:
@@ -2266,6 +2402,8 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
 		case DIOCGIFSPEEDV1:
 		case DIOCGIFSPEEDV0:
 		case DIOCGETRULENV:
+		case DIOCGETETHRULES:
+		case DIOCGETETHRULE:
 			break;
 		case DIOCRCLRTABLES:
 		case DIOCRADDTABLES:
@@ -2324,6 +2462,213 @@ pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td
 		}
 		break;
 
+	case DIOCGETETHRULES: {
+		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
+		nvlist_t		*nvl;
+		void			*packed;
+		struct pf_keth_rule	*tail;
+		u_int32_t		 ticket, nr;
+
+		nvl = NULL;
+		packed = NULL;
+
+#define	ERROUT(x)	do { error = (x); goto DIOCGETETHRULES_error; } while (0)
+
+		nvl = nvlist_create(0);
+		if (nvl == NULL)
+			ERROUT(ENOMEM);
+
+		PF_RULES_RLOCK();
+
+		ticket = V_pf_keth->ticket;
+		tail = TAILQ_LAST(&V_pf_keth->rules, pf_keth_rules);
+		if (tail)
+			nr = tail->nr + 1;
+		else
+			nr = 0;
+
+		PF_RULES_RUNLOCK();
+
+		nvlist_add_number(nvl, "ticket", ticket);
+		nvlist_add_number(nvl, "nr", nr);
+
+		packed = nvlist_pack(nvl, &nv->len);
+		if (packed == NULL)
+			ERROUT(ENOMEM);
+
+		if (nv->size == 0)
+			ERROUT(0);
+		else if (nv->size < nv->len)
+			ERROUT(ENOSPC);
+
+		error = copyout(packed, nv->data, nv->len);
+
+#undef ERROUT
+DIOCGETETHRULES_error:
+		free(packed, M_TEMP);
+		nvlist_destroy(nvl);
+		break;
+	}
+
+	case DIOCGETETHRULE: {
+		struct epoch_tracker	 et;
+		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
+		nvlist_t		*nvl = NULL;
+		void			*nvlpacked = NULL;
+		struct pf_keth_rule	*rule = NULL;
+		u_int32_t		 ticket, nr;
+
+#define ERROUT(x)	do { error = (x); goto DIOCGETETHRULE_error; } while (0)
+
+		nvlpacked = malloc(nv->len, M_TEMP, M_WAITOK);
+		if (nvlpacked == NULL)
+			ERROUT(ENOMEM);
+
+		error = copyin(nv->data, nvlpacked, nv->len);
+		if (error)
+			ERROUT(error);
+
+		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
+		if (! nvlist_exists_number(nvl, "ticket"))
+			ERROUT(EBADMSG);
+		ticket = nvlist_get_number(nvl, "ticket");
+
+		if (! nvlist_exists_number(nvl, "nr"))
+			ERROUT(EBADMSG);
+		nr = nvlist_get_number(nvl, "nr");
+
+		nvlist_destroy(nvl);
+		nvl = NULL;
+		free(nvlpacked, M_TEMP);
+		nvlpacked = NULL;
+
+		nvl = nvlist_create(0);
+
+		PF_RULES_RLOCK();
+		if (ticket != V_pf_keth->ticket) {
+			PF_RULES_RUNLOCK();
+			ERROUT(EBUSY);
+		}
+		rule = TAILQ_FIRST(&V_pf_keth->rules);
+		while ((rule != NULL) && (rule->nr != nr))
+			rule = TAILQ_NEXT(rule, entries);
+		if (rule == NULL) {
+			PF_RULES_RUNLOCK();
+			ERROUT(ENOENT);
+		}
+		/* Make sure rule can't go away. */
+		NET_EPOCH_ENTER(et);
+		PF_RULES_RUNLOCK();
+		nvl = pf_keth_rule_to_nveth_rule(rule);
+		NET_EPOCH_EXIT(et);
+		if (nvl == NULL)
+			ERROUT(ENOMEM);
+
+		nvlpacked = nvlist_pack(nvl, &nv->len);
+		if (nvlpacked == NULL)
+			ERROUT(ENOMEM);
+
+		if (nv->size == 0)
+			ERROUT(0);
+		else if (nv->size < nv->len)
+			ERROUT(ENOSPC);
+
+		error = copyout(nvlpacked, nv->data, nv->len);
+
+#undef ERROUT
+DIOCGETETHRULE_error:
+		free(nvlpacked, M_TEMP);
+		nvlist_destroy(nvl);
+		break;
+	}
+
+	case DIOCADDETHRULE: {
+		struct pfioc_nv		*nv = (struct pfioc_nv *)addr;
+		nvlist_t		*nvl = NULL;
+		void			*nvlpacked = NULL;
+		struct pf_keth_rule	*rule = NULL;
+		struct pfi_kkif		*kif = NULL;
+
+#define ERROUT(x)	do { error = (x); goto DIOCADDETHRULE_error; } while (0)
+
+		nvlpacked = malloc(nv->len, M_TEMP, M_WAITOK);
+		if (nvlpacked == NULL)
+			ERROUT(ENOMEM);
+
+		error = copyin(nv->data, nvlpacked, nv->len);
+		if (error)
+			ERROUT(error);
+
+		nvl = nvlist_unpack(nvlpacked, nv->len, 0);
+		if (nvl == NULL)
+			ERROUT(EBADMSG);
+
+		if (! nvlist_exists_number(nvl, "ticket"))
+			ERROUT(EBADMSG);
+
+		if (nvlist_get_number(nvl, "ticket") !=
+		    V_pf_keth_inactive->ticket) {
+			DPFPRINTF(PF_DEBUG_MISC,
+			    ("ticket: %d != %d\n",
+			    (u_int32_t)nvlist_get_number(nvl, "ticket"),
+			    V_pf_keth_inactive->ticket));
+			ERROUT(EBUSY);
+		}
+
+		rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
+		if (rule == NULL)
+			ERROUT(ENOMEM);
+
+		error = pf_nveth_rule_to_keth_rule(nvl, rule);
+		if (error != 0)
+			ERROUT(error);
+
+		if (rule->ifname[0])
+			kif = pf_kkif_create(M_WAITOK);
+		rule->evaluations = counter_u64_alloc(M_WAITOK);
+		for (int i = 0; i < 2; i++) {
+			rule->packets[i] = counter_u64_alloc(M_WAITOK);
+			rule->bytes[i] = counter_u64_alloc(M_WAITOK);
+		}
+
+		PF_RULES_WLOCK();
+
+		if (rule->ifname[0]) {
+			rule->kif = pfi_kkif_attach(kif, rule->ifname);
+			pfi_kkif_ref(rule->kif);
+		} else
+			rule->kif = NULL;
+
+#ifdef ALTQ
+		/* set queue IDs */
+		if (rule->qname[0] != 0) {
+			if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
+				error = EBUSY;
+			else
+				rule->qid = rule->qid;
+		}
+#endif
+		if (rule->tagname[0])
+			if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
+				error = EBUSY;
+
+		if (error) {
+			pf_free_eth_rule(rule);
+			PF_RULES_WUNLOCK();
+			ERROUT(error);
+		}
+
+		TAILQ_INSERT_TAIL(&V_pf_keth_inactive->rules, rule, entries);
+
+		PF_RULES_WUNLOCK();
+
+#undef ERROUT
+DIOCADDETHRULE_error:
+		nvlist_destroy(nvl);
+		free(nvlpacked, M_TEMP);
+		break;
+	}
+
 	case DIOCADDRULENV: {
 		struct pfioc_nv	*nv = (struct pfioc_nv *)addr;
 		nvlist_t	*nvl = NULL;
@@ -4367,6 +4712,19 @@ DIOCCHANGEADDR_error:
 		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
 			ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
 			switch (ioe->rs_num) {
+			case PF_RULESET_ETH:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if ((error = pf_begin_eth(&ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail;
+				}
+				break;
 #ifdef ALTQ
 			case PF_RULESET_ALTQ:
 				if (ioe->anchor[0]) {
@@ -4441,6 +4799,19 @@ DIOCCHANGEADDR_error:
 		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
 			ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
 			switch (ioe->rs_num) {
+			case PF_RULESET_ETH:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if ((error = pf_rollback_eth(ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
 #ifdef ALTQ
 			case PF_RULESET_ALTQ:
 				if (ioe->anchor[0]) {
@@ -4518,6 +4889,21 @@ DIOCCHANGEADDR_error:
 		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
 			ioe->anchor[sizeof(ioe->anchor) - 1] = 0;
 			switch (ioe->rs_num) {
+			case PF_RULESET_ETH:
+				if (ioe->anchor[0]) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EINVAL;
+					goto fail;
+				}
+				if (!V_pf_keth_inactive->ticket ||
+				    ioe->ticket != V_pf_keth_inactive->ticket) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					error = EBUSY;
+					goto fail;
+				}
+				break;
 #ifdef ALTQ
 			case PF_RULESET_ALTQ:
 				if (ioe->anchor[0]) {
@@ -4569,6 +4955,13 @@ DIOCCHANGEADDR_error:
 		/* Now do the commit - no errors should happen here. */
 		for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
 			switch (ioe->rs_num) {
+			case PF_RULESET_ETH:
+				if ((error = pf_commit_eth(ioe->ticket))) {
+					PF_RULES_WUNLOCK();
+					free(ioes, M_TEMP);
+					goto fail; /* really bad */
+				}
+				break;
 #ifdef ALTQ
 			case PF_RULESET_ALTQ:
 				if ((error = pf_commit_altq(ioe->ticket))) {
@@ -5510,6 +5903,12 @@ shutdown_pf(void)
 		if ((error = pf_clear_tables()) != 0)
 			break;
 
+		if ((error = pf_begin_eth(&t[0])) != 0) {
+			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: eth\n"));
+			break;
+		}
+		pf_commit_eth(t[0]);
+
 #ifdef ALTQ
 		if ((error = pf_begin_altq(&t[0])) != 0) {
 			DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
@@ -5549,6 +5948,28 @@ pf_check_return(int chk, struct mbuf **m)
 	}
 }
 
+static pfil_return_t
+pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
+    void *ruleset __unused, struct inpcb *inp)
+{
+	int chk;
+
+	chk = pf_test_eth(PF_IN, flags, ifp, m, inp);
+
+	return (pf_check_return(chk, m));
+}
+
+static pfil_return_t
+pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
+    void *ruleset __unused, struct inpcb *inp)
+{
+	int chk;
+
+	chk = pf_test_eth(PF_OUT, flags, ifp, m, inp);
+
+	return (pf_check_return(chk, m));
+}
+
 #ifdef INET
 static pfil_return_t
 pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
@@ -5606,6 +6027,11 @@ pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags,
 }
 #endif /* INET6 */
 
+VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_in_hook);
+VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_out_hook);
+#define	V_pf_eth_in_hook	VNET(pf_eth_in_hook)
+#define	V_pf_eth_out_hook	VNET(pf_eth_out_hook)
+
 #ifdef INET
 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook);
 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook);
@@ -5635,6 +6061,24 @@ hook_pf(void)
 
 	pla.pa_version = PFIL_VERSION;
 
+	pha.pa_type = PFIL_TYPE_ETHERNET;
+	pha.pa_func = pf_eth_check_in;
+	pha.pa_flags = PFIL_IN;
+	pha.pa_rulname = "eth-in";
+	V_pf_eth_in_hook = pfil_add_hook(&pha);
+	pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
+	pla.pa_head = V_link_pfil_head;
+	pla.pa_hook = V_pf_eth_in_hook;
+	(void)pfil_link(&pla);
+	pha.pa_func = pf_eth_check_out;
+	pha.pa_flags = PFIL_OUT;
+	pha.pa_rulname = "eth-out";
+	V_pf_eth_out_hook = pfil_add_hook(&pha);
+	pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
+	pla.pa_head = V_link_pfil_head;
+	pla.pa_hook = V_pf_eth_out_hook;
+	(void)pfil_link(&pla);
+
 #ifdef INET
 	pha.pa_type = PFIL_TYPE_IP4;
 	pha.pa_func = pf_check_in;
@@ -5688,6 +6132,9 @@ dehook_pf(void)
 	if (V_pf_pfil_hooked == 0)
 		return;
 
+	pfil_remove_hook(V_pf_eth_in_hook);
+	pfil_remove_hook(V_pf_eth_out_hook);
+
 #ifdef INET
 	pfil_remove_hook(V_pf_ip4_in_hook);
 	pfil_remove_hook(V_pf_ip4_out_hook);
@@ -5713,6 +6160,10 @@ pf_load_vnet(void)
 	    PF_QUEUE_TAG_HASH_SIZE_DEFAULT);
 #endif
 
+	V_pf_keth = malloc(sizeof(*V_pf_keth), M_PFRULE, M_WAITOK);
+	V_pf_keth_inactive = malloc(sizeof(*V_pf_keth_inactive),
+	    M_PFRULE, M_WAITOK);
+
 	pfattach_vnet();
 	V_pf_vnet_active = 1;
 }
@@ -5822,6 +6273,9 @@ pf_unload_vnet(void)
 		pf_counter_u64_deinit(&V_pf_status.fcounters[i]);
 	for (int i = 0; i < SCNT_MAX; i++)
 		counter_u64_free(V_pf_status.scounters[i]);
+
+	free(V_pf_keth, M_PFRULE);
+	free(V_pf_keth_inactive, M_PFRULE);
 }
 
 static void
diff --git a/sys/netpfil/pf/pf_nv.c b/sys/netpfil/pf/pf_nv.c
index f3a28bf770b1..24128a21c363 100644
--- a/sys/netpfil/pf/pf_nv.c
+++ b/sys/netpfil/pf/pf_nv.c
@@ -1003,3 +1003,127 @@ errout:
 	nvlist_destroy(nvl);
 	return (NULL);
 }
+
+static int
+pf_nveth_rule_addr_to_keth_rule_addr(const nvlist_t *nvl,
+    struct pf_keth_rule_addr *krule)
+{
+	static const u_int8_t EMPTY_MAC[ETHER_ADDR_LEN] = { 0 };
+	int error = 0;
+
+	PFNV_CHK(pf_nvbinary(nvl, "addr", &krule->addr, sizeof(krule->addr)));
+	PFNV_CHK(pf_nvbool(nvl, "neg", &krule->neg));
+
+	/* To make checks for 'is this address set?' easier. */
+	if (memcmp(krule->addr, EMPTY_MAC, ETHER_ADDR_LEN) != 0)
+		krule->isset = 1;
+
+errout:
+	return (error);
+}
+
+static nvlist_t*
+pf_keth_rule_addr_to_nveth_rule_addr(const struct pf_keth_rule_addr *krule)
+{
+	nvlist_t *nvl;
+
+	nvl = nvlist_create(0);
+	if (nvl == NULL)
+		return (NULL);
+
+	nvlist_add_binary(nvl, "addr", &krule->addr, sizeof(krule->addr));
+	nvlist_add_bool(nvl, "neg", krule->neg);
+
+	return (nvl);
+}
+
+nvlist_t*
+pf_keth_rule_to_nveth_rule(const struct pf_keth_rule *krule)
+{
+	nvlist_t *nvl, *addr;
+
+	nvl = nvlist_create(0);
+	if (nvl == NULL)
+		return (NULL);
+
+	nvlist_add_number(nvl, "nr", krule->nr);
+	nvlist_add_bool(nvl, "quick", krule->quick);
+	nvlist_add_string(nvl, "ifname", krule->ifname);
+	nvlist_add_bool(nvl, "ifnot", krule->ifnot);
+	nvlist_add_number(nvl, "direction", krule->direction);
+	nvlist_add_number(nvl, "proto", krule->proto);
+
+	addr = pf_keth_rule_addr_to_nveth_rule_addr(&krule->src);
+	if (addr == NULL) {
+		nvlist_destroy(nvl);
+		return (NULL);
+	}
+	nvlist_add_nvlist(nvl, "src", addr);
+
+	addr = pf_keth_rule_addr_to_nveth_rule_addr(&krule->dst);
+	if (addr == NULL) {
+		nvlist_destroy(nvl);
+		return (NULL);
+	}
+	nvlist_add_nvlist(nvl, "dst", addr);
+
+	nvlist_add_number(nvl, "evaluations",
+	    counter_u64_fetch(krule->evaluations));
+	nvlist_add_number(nvl, "packets-in",
+	    counter_u64_fetch(krule->packets[0]));
+	nvlist_add_number(nvl, "packets-out",
+	    counter_u64_fetch(krule->packets[1]));
+	nvlist_add_number(nvl, "bytes-in",
+	    counter_u64_fetch(krule->bytes[0]));
+	nvlist_add_number(nvl, "bytes-out",
+	    counter_u64_fetch(krule->bytes[1]));
+
+	nvlist_add_string(nvl, "qname", krule->qname);
+	nvlist_add_string(nvl, "tagname", krule->tagname);
+
+	nvlist_add_number(nvl, "action", krule->action);
+
+	return (nvl);
+}
+
+int
+pf_nveth_rule_to_keth_rule(const nvlist_t *nvl,
+    struct pf_keth_rule *krule)
*** 78 LINES SKIPPED ***