git: 160e7a4c16eb - stable/14 - wg: Add netmap support

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 20 May 2024 13:46:43 UTC
The branch stable/14 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=160e7a4c16ebbec4211a941a706778daf8ea62bd

commit 160e7a4c16ebbec4211a941a706778daf8ea62bd
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-04-20 16:01:28 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-05-20 13:42:35 +0000

    wg: Add netmap support
    
    When in netmap (emulated) mode, wireguard interfaces prepend or strip a
    dummy ethernet header when interfacing with netmap.  The netmap
    application thus sees unencrypted, de-encapsulated frames with a fixed
    header.
    
    In this mode, netmap hooks the if_input and if_transmit routines of the
    ifnet.  Packets from the host TX ring are handled by wg_if_input(),
    which simply hands them to the netisr layer; packets which would
    otherwise be tunneled are intercepted in wg_output() and placed in the
    host RX ring.
    
    The "physical" TX ring is processed by wg_transmit(), which behaves
    identically to wg_output() when netmap is not enabled, and packets
    appear in the "physical" RX ring by hooking wg_deliver_in().
    
    Reviewed by:    vmaffione
    MFC after:      1 month
    Sponsored by:   Klara, Inc.
    Sponsored by:   Zenarmor
    Differential Revision:  https://reviews.freebsd.org/D43460
    
    (cherry picked from commit bf454ca88bdf4acfa873386e876ff5e772e6a830)
---
 share/man/man4/wg.4 |  14 +++++
 sys/dev/wg/if_wg.c  | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 163 insertions(+), 6 deletions(-)

diff --git a/share/man/man4/wg.4 b/share/man/man4/wg.4
index d0d871e52220..05d6961a9610 100644
--- a/share/man/man4/wg.4
+++ b/share/man/man4/wg.4
@@ -121,6 +121,19 @@ as follows:
 Although a valid Curve25519 key must have 5 bits set to
 specific values, this is done by the interface and so it
 will accept any random 32-byte base64 string.
+.Sh NETMAP
+.Xr netmap 4
+applications may open a WireGuard interface in emulated mode.
+The netmap application will receive decrypted, unencapsulated packets prepended
+by a dummy Ethernet header.
+The Ethertype field will be one of
+.Dv ETHERTYPE_IP
+or
+.Dv ETHERTYPE_IPV6
+depending on the address family of the packet.
+Packets transmitted by the application should similarly begin with a dummy
+Ethernet header; this header will be stripped before the packet is encrypted
+and tunneled.
 .Sh EXAMPLES
 Create a
 .Nm
@@ -183,6 +196,7 @@ is not assigned to the allowed IPs of Peer X.
 .Xr ip 4 ,
 .Xr ipsec 4 ,
 .Xr netintro 4 ,
+.Xr netmap 4 ,
 .Xr ovpn 4 ,
 .Xr ipf 5 ,
 .Xr pf.conf 5 ,
diff --git a/sys/dev/wg/if_wg.c b/sys/dev/wg/if_wg.c
index 30429c3725cd..552f47f9645b 100644
--- a/sys/dev/wg/if_wg.c
+++ b/sys/dev/wg/if_wg.c
@@ -1672,6 +1672,31 @@ error:
 	}
 }
 
+#ifdef DEV_NETMAP
+/*
+ * Hand a packet to the netmap RX ring, via netmap's
+ * freebsd_generic_rx_handler().
+ */
+static void
+wg_deliver_netmap(if_t ifp, struct mbuf *m, int af)
+{
+	struct ether_header *eh;
+
+	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+	if (__predict_false(m == NULL)) {
+		if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+		return;
+	}
+
+	eh = mtod(m, struct ether_header *);
+	eh->ether_type = af == AF_INET ?
+	    htons(ETHERTYPE_IP) : htons(ETHERTYPE_IPV6);
+	memcpy(eh->ether_shost, "\x02\x02\x02\x02\x02\x02", ETHER_ADDR_LEN);
+	memcpy(eh->ether_dhost, "\xff\xff\xff\xff\xff\xff", ETHER_ADDR_LEN);
+	if_input(ifp, m);
+}
+#endif
+
 static void
 wg_deliver_in(struct wg_peer *peer)
 {
@@ -1680,6 +1705,7 @@ wg_deliver_in(struct wg_peer *peer)
 	struct wg_packet	*pkt;
 	struct mbuf		*m;
 	struct epoch_tracker	 et;
+	int			 af;
 
 	while ((pkt = wg_queue_dequeue_serial(&peer->p_decrypt_serial)) != NULL) {
 		if (atomic_load_acq_int(&pkt->p_state) != WG_PACKET_CRYPTED)
@@ -1705,19 +1731,25 @@ wg_deliver_in(struct wg_peer *peer)
 		if (m->m_pkthdr.len == 0)
 			goto done;
 
-		MPASS(pkt->p_af == AF_INET || pkt->p_af == AF_INET6);
+		af = pkt->p_af;
+		MPASS(af == AF_INET || af == AF_INET6);
 		pkt->p_mbuf = NULL;
 
 		m->m_pkthdr.rcvif = ifp;
 
 		NET_EPOCH_ENTER(et);
-		BPF_MTAP2_AF(ifp, m, pkt->p_af);
+		BPF_MTAP2_AF(ifp, m, af);
 
 		CURVNET_SET(if_getvnet(ifp));
 		M_SETFIB(m, if_getfib(ifp));
-		if (pkt->p_af == AF_INET)
+#ifdef DEV_NETMAP
+		if ((if_getcapenable(ifp) & IFCAP_NETMAP) != 0)
+			wg_deliver_netmap(ifp, m, af);
+		else
+#endif
+		if (af == AF_INET)
 			netisr_dispatch(NETISR_IP, m);
-		if (pkt->p_af == AF_INET6)
+		else if (af == AF_INET6)
 			netisr_dispatch(NETISR_IPV6, m);
 		CURVNET_RESTORE();
 		NET_EPOCH_EXIT(et);
@@ -2162,13 +2194,36 @@ determine_af_and_pullup(struct mbuf **m, sa_family_t *af)
 	return (0);
 }
 
+#ifdef DEV_NETMAP
+static int
+determine_ethertype_and_pullup(struct mbuf **m, int *etp)
+{
+	struct ether_header *eh;
+
+	*m = m_pullup(*m, sizeof(struct ether_header));
+	if (__predict_false(*m == NULL))
+		return (ENOBUFS);
+	eh = mtod(*m, struct ether_header *);
+	*etp = ntohs(eh->ether_type);
+	if (*etp != ETHERTYPE_IP && *etp != ETHERTYPE_IPV6)
+		return (EAFNOSUPPORT);
+	return (0);
+}
+
+/*
+ * This should only be invoked by netmap, via nm_os_generic_xmit_frame(), to
+ * transmit packets from the netmap TX ring.
+ */
 static int
 wg_transmit(if_t ifp, struct mbuf *m)
 {
 	sa_family_t af;
-	int ret;
+	int et, ret;
 	struct mbuf *defragged;
 
+	KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0,
+	    ("%s: ifp %p is not in netmap mode", __func__, ifp));
+
 	defragged = m_defrag(m, M_NOWAIT);
 	if (defragged)
 		m = defragged;
@@ -2178,14 +2233,94 @@ wg_transmit(if_t ifp, struct mbuf *m)
 		return (ENOBUFS);
 	}
 
+	ret = determine_ethertype_and_pullup(&m, &et);
+	if (ret) {
+		xmit_err(ifp, m, NULL, AF_UNSPEC);
+		return (ret);
+	}
+	m_adj(m, sizeof(struct ether_header));
+
 	ret = determine_af_and_pullup(&m, &af);
 	if (ret) {
 		xmit_err(ifp, m, NULL, AF_UNSPEC);
 		return (ret);
 	}
-	return (wg_xmit(ifp, m, af, if_getmtu(ifp)));
+
+	/*
+	 * netmap only gets to see transient errors, since it handles errors by
+	 * refusing to advance the transmit ring and retrying later.
+	 */
+	ret = wg_xmit(ifp, m, af, if_getmtu(ifp));
+	if (ret == ENOBUFS)
+		return (ret);
+	return (0);
 }
 
+/*
+ * This should only be invoked by netmap, via nm_os_send_up(), to process
+ * packets from the host TX ring.
+ */
+static void
+wg_if_input(if_t ifp, struct mbuf *m)
+{
+	int et;
+
+	KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0,
+	    ("%s: ifp %p is not in netmap mode", __func__, ifp));
+
+	if (determine_ethertype_and_pullup(&m, &et) != 0) {
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+		m_freem(m);
+		return;
+	}
+	CURVNET_SET(if_getvnet(ifp));
+	switch (et) {
+	case ETHERTYPE_IP:
+		m_adj(m, sizeof(struct ether_header));
+		netisr_dispatch(NETISR_IP, m);
+		break;
+	case ETHERTYPE_IPV6:
+		m_adj(m, sizeof(struct ether_header));
+		netisr_dispatch(NETISR_IPV6, m);
+		break;
+	default:
+		__assert_unreachable();
+	}
+	CURVNET_RESTORE();
+}
+
+/*
+ * Deliver a packet to the host RX ring.  Because the interface is in netmap
+ * mode, the if_transmit() call should pass the packet to netmap_transmit().
+ */
+static int
+wg_xmit_netmap(if_t ifp, struct mbuf *m, int af)
+{
+	struct ether_header *eh;
+
+	if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_WGLOOP,
+	    MAX_LOOPS))) {
+		printf("%s:%d\n", __func__, __LINE__);
+		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+		m_freem(m);
+		return (ELOOP);
+	}
+
+	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+	if (__predict_false(m == NULL)) {
+		if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+		return (ENOBUFS);
+	}
+
+	eh = mtod(m, struct ether_header *);
+	eh->ether_type = af == AF_INET ?
+	    htons(ETHERTYPE_IP) : htons(ETHERTYPE_IPV6);
+	memcpy(eh->ether_shost, "\x06\x06\x06\x06\x06\x06", ETHER_ADDR_LEN);
+	memcpy(eh->ether_dhost, "\xff\xff\xff\xff\xff\xff", ETHER_ADDR_LEN);
+	return (if_transmit(ifp, m));
+}
+#endif /* DEV_NETMAP */
+
 static int
 wg_output(if_t ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro)
 {
@@ -2204,6 +2339,11 @@ wg_output(if_t ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro
 		return (EAFNOSUPPORT);
 	}
 
+#ifdef DEV_NETMAP
+	if ((if_getcapenable(ifp) & IFCAP_NETMAP) != 0)
+		return (wg_xmit_netmap(ifp, m, af));
+#endif
+
 	defragged = m_defrag(m, M_NOWAIT);
 	if (defragged)
 		m = defragged;
@@ -2779,7 +2919,10 @@ wg_clone_create(struct if_clone *ifc, char *name, size_t len,
 	if_setinitfn(ifp, wg_init);
 	if_setreassignfn(ifp, wg_reassign);
 	if_setqflushfn(ifp, wg_qflush);
+#ifdef DEV_NETMAP
 	if_settransmitfn(ifp, wg_transmit);
+	if_setinputfn(ifp, wg_if_input);
+#endif
 	if_setoutputfn(ifp, wg_output);
 	if_setioctlfn(ifp, wg_ioctl);
 	if_attach(ifp);