git: e44d2e941e8e - main - if_geneve: Add Support for Geneve (RFC8926)

From: Pouria Mousavizadeh Tehrani <pouria_at_FreeBSD.org>
Date: Mon, 13 Apr 2026 14:16:56 UTC
The branch main has been updated by pouria:

URL: https://cgit.FreeBSD.org/src/commit/?id=e44d2e941e8ebd74e6a1b1fdbed83fe86671cbc6

commit e44d2e941e8ebd74e6a1b1fdbed83fe86671cbc6
Author:     Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
AuthorDate: 2026-04-11 14:12:01 +0000
Commit:     Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
CommitDate: 2026-04-13 14:14:58 +0000

    if_geneve: Add Support for Geneve (RFC8926)
    
    geneve creates a generic network virtualization tunnel interface
    for Tentant Systems over an L3 (IP/UDP) underlay network that provides
    a Layer 2 (ethernet) or Layer 3 service using the geneve protocol.
    This implementation is based on RFC8926.
    
    Reviewed by:    glebius, adrian
    Discussed with: zlei, kp
    Relnotes:       yes
    Differential Revision: https://reviews.freebsd.org/D54172
---
 sys/conf/NOTES                 |    4 +
 sys/conf/files                 |    1 +
 sys/kern/kern_jail.c           |    1 +
 sys/modules/Makefile           |    1 +
 sys/modules/if_geneve/Makefile |    7 +
 sys/net/if.c                   |    2 +
 sys/net/if.h                   |    6 +-
 sys/net/if_geneve.c            | 3967 ++++++++++++++++++++++++++++++++++++++++
 sys/net/if_geneve.h            |   70 +
 sys/net/if_strings.h           |   12 +-
 sys/netlink/route/interface.h  |   44 +
 sys/sys/mbuf.h                 |    6 +-
 sys/sys/priv.h                 |    1 +
 13 files changed, 4115 insertions(+), 7 deletions(-)

diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 4dda93e2ee70..4279fae4c547 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -880,6 +880,10 @@ device		vlan
 # frames in UDP packets according to RFC7348.
 device		vxlan
 
+# The `geneve' device implements the GENEVE encapsulation of virtual
+# overlays according to RFC8926.
+device		geneve
+
 #  The `wlan' device provides generic code to support 802.11
 #  drivers, including host AP mode; it is MANDATORY for the wi,
 #  and ath drivers and will eventually be required by all 802.11 drivers.
diff --git a/sys/conf/files b/sys/conf/files
index b44fb46ef764..99ba7cdaba33 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4238,6 +4238,7 @@ net/if_stf.c			optional stf inet inet6
 net/if_tuntap.c			optional tuntap
 net/if_vlan.c			optional vlan
 net/if_vxlan.c			optional vxlan inet | vxlan inet6
+net/if_geneve.c			optional geneve inet | geneve inet6
 net/ifdi_if.m			optional ether pci iflib
 net/iflib.c			optional ether pci iflib
 net/mp_ring.c			optional ether iflib
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 384825b7f8ac..bc80adb91cd6 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -4385,6 +4385,7 @@ prison_priv_check(struct ucred *cred, int priv)
 	case PRIV_NET_SETIFVNET:
 	case PRIV_NET_SETIFFIB:
 	case PRIV_NET_OVPN:
+	case PRIV_NET_GENEVE:
 	case PRIV_NET_ME:
 	case PRIV_NET_WG:
 
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index a4100c31ef26..faedb856977c 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -169,6 +169,7 @@ SUBDIR=	\
 	if_tuntap \
 	if_vlan \
 	if_vxlan \
+	if_geneve \
 	${_if_wg} \
 	iflib \
 	${_igc} \
diff --git a/sys/modules/if_geneve/Makefile b/sys/modules/if_geneve/Makefile
new file mode 100644
index 000000000000..1e65d4dbb168
--- /dev/null
+++ b/sys/modules/if_geneve/Makefile
@@ -0,0 +1,7 @@
+.PATH: ${SRCTOP}/sys/net
+
+KMOD=	if_geneve
+SRCS=	if_geneve.c
+SRCS+=	opt_inet.h opt_inet6.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/net/if.c b/sys/net/if.c
index 760ae94e842b..8a148ba0fd06 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -2273,6 +2273,8 @@ const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = {
 	CAP2NV(RXTLS4),
 	CAP2NV(RXTLS6),
 	CAP2NV(IPSEC_OFFLOAD),
+	CAP2NV(GENEVE_HWCSUM),
+	CAP2NV(GENEVE_HWTSO),
 	{0, NULL}
 };
 #undef CAPNV
diff --git a/sys/net/if.h b/sys/net/if.h
index 1b47237e46bb..4bb6a2659ce7 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -255,7 +255,9 @@ struct if_data {
 #define	IFCAP_B_RXTLS4		32 /* can do TLS receive for TCP */
 #define	IFCAP_B_RXTLS6		33 /* can do TLS receive for TCP6 */
 #define	IFCAP_B_IPSEC_OFFLOAD	34 /* inline IPSEC offload */
-#define	__IFCAP_B_SIZE		35
+#define	IFCAP_B_GENEVE_HWCSUM	35 /* can do IFCAN_HWCSUM on GENEVE */
+#define	IFCAP_B_GENEVE_HWTSO	36 /* can do IFCAP_TSO on GENEVE */
+#define	__IFCAP_B_SIZE		37
 
 #define	IFCAP_B_MAX	(__IFCAP_B_MAX - 1)
 #define	IFCAP_B_SIZE	(__IFCAP_B_SIZE)
@@ -299,6 +301,8 @@ struct if_data {
 #define	IFCAP2_RXTLS4		(IFCAP_B_RXTLS4 - 32)
 #define	IFCAP2_RXTLS6		(IFCAP_B_RXTLS6 - 32)
 #define	IFCAP2_IPSEC_OFFLOAD	(IFCAP_B_IPSEC_OFFLOAD - 32)
+#define	IFCAP2_GENEVE_HWCSUM		(IFCAP_B_GENEVE_HWCSUM - 32)
+#define	IFCAP2_GENEVE_HWTSO		(IFCAP_B_GENEVE_HWTSO - 32)
 
 #define	IFCAP2_BIT(x)		(1UL << (x))
 
diff --git a/sys/net/if_geneve.c b/sys/net/if_geneve.c
new file mode 100644
index 000000000000..9562a3476099
--- /dev/null
+++ b/sys/net/if_geneve.c
@@ -0,0 +1,3967 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025-2026 Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/hash.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/refcount.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sdt.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/jail.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_private.h>
+#include <net/if_arp.h>
+#include <net/if_clone.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/route/nhop.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/scope6_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/in_fib.h>
+#include <netinet6/in6_fib.h>
+#include <netinet/ip_ecn.h>
+#include <net/if_geneve.h>
+
+#include <netlink/netlink.h>
+#include <netlink/netlink_ctl.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_route.h>
+#include <netlink/route/route_var.h>
+
+#include <security/mac/mac_framework.h>
+
+SDT_PROVIDER_DEFINE(if_geneve);
+
+struct geneve_softc;
+LIST_HEAD(geneve_softc_head, geneve_softc);
+
+static struct sx geneve_sx;
+SX_SYSINIT(geneve, &geneve_sx, "GENEVE global start/stop lock");
+
+static unsigned geneve_osd_jail_slot;
+
+union sockaddr_union {
+	struct sockaddr		sa;
+	struct sockaddr_in	sin;
+	struct sockaddr_in6	sin6;
+};
+
+struct geneve_socket_mc_info {
+	union sockaddr_union	gnvsomc_saddr;
+	union sockaddr_union	gnvsomc_gaddr;
+	int			gnvsomc_ifidx;
+	int			gnvsomc_users;
+};
+
+/* The maximum MTU of encapsulated geneve packet. */
+#define GENEVE_MAX_L3MTU	(IP_MAXPACKET - \
+	    60 /* Maximum IPv4 header len */ - \
+	    sizeof(struct udphdr) - \
+	    sizeof(struct genevehdr))
+#define GENEVE_MAX_MTU		(GENEVE_MAX_L3MTU - \
+	    ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN)
+
+#define GENEVE_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU | IFCAP_NV)
+
+#define GENEVE_VERSION	0
+#define GENEVE_VNI_MASK	(GENEVE_VNI_MAX - 1)
+
+#define GENEVE_HDR_VNI_SHIFT	8
+
+#define GENEVE_SO_MC_MAX_GROUPS		32
+
+#define GENEVE_SO_VNI_HASH_SHIFT	6
+#define GENEVE_SO_VNI_HASH_SIZE		(1 << GENEVE_SO_VNI_HASH_SHIFT)
+#define GENEVE_SO_VNI_HASH(_vni)	((_vni) % GENEVE_SO_VNI_HASH_SIZE)
+
+struct geneve_socket {
+	struct socket			*gnvso_sock;
+	struct rmlock			gnvso_lock;
+	u_int				gnvso_refcnt;
+	union sockaddr_union		gnvso_laddr;
+	LIST_ENTRY(geneve_socket)	gnvso_entry;
+	struct geneve_softc_head	gnvso_vni_hash[GENEVE_SO_VNI_HASH_SIZE];
+	struct geneve_socket_mc_info	gnvso_mc[GENEVE_SO_MC_MAX_GROUPS];
+};
+
+#define GENEVE_SO_RLOCK(_gnvso, _p)	rm_rlock(&(_gnvso)->gnvso_lock, (_p))
+#define GENEVE_SO_RUNLOCK(_gnvso, _p)	rm_runlock(&(_gnvso)->gnvso_lock, (_p))
+#define GENEVE_SO_WLOCK(_gnvso)		rm_wlock(&(_gnvso)->gnvso_lock)
+#define GENEVE_SO_WUNLOCK(_gnvso)		rm_wunlock(&(_gnvso)->gnvso_lock)
+#define GENEVE_SO_LOCK_ASSERT(_gnvso) \
+    rm_assert(&(_gnvso)->gnvso_lock, RA_LOCKED)
+#define GENEVE_SO_LOCK_WASSERT(_gnvso) \
+    rm_assert(&(_gnvso)->gnvso_lock, RA_WLOCKED)
+
+#define GENEVE_SO_ACQUIRE(_gnvso)		refcount_acquire(&(_gnvso)->gnvso_refcnt)
+#define GENEVE_SO_RELEASE(_gnvso)		refcount_release(&(_gnvso)->gnvso_refcnt)
+
+struct gnv_ftable_entry {
+	LIST_ENTRY(gnv_ftable_entry)	gnvfe_hash;
+	uint16_t			gnvfe_flags;
+	uint8_t				gnvfe_mac[ETHER_ADDR_LEN];
+	union sockaddr_union		gnvfe_raddr;
+	time_t				gnvfe_expire;
+};
+
+#define GENEVE_FE_FLAG_DYNAMIC		0x01
+#define GENEVE_FE_FLAG_STATIC		0x02
+
+#define GENEVE_FE_IS_DYNAMIC(_fe) \
+    ((_fe)->gnvfe_flags & GENEVE_FE_FLAG_DYNAMIC)
+
+#define GENEVE_SC_FTABLE_SHIFT		9
+#define GENEVE_SC_FTABLE_SIZE		(1 << GENEVE_SC_FTABLE_SHIFT)
+#define GENEVE_SC_FTABLE_MASK		(GENEVE_SC_FTABLE_SIZE - 1)
+#define GENEVE_SC_FTABLE_HASH(_sc, _mac)	\
+    (geneve_mac_hash(_sc, _mac) % GENEVE_SC_FTABLE_SIZE)
+
+LIST_HEAD(geneve_ftable_head, gnv_ftable_entry);
+
+struct geneve_statistics {
+	uint32_t	ftable_nospace;
+	uint32_t	ftable_lock_upgrade_failed;
+	counter_u64_t	txcsum;
+	counter_u64_t	tso;
+	counter_u64_t	rxcsum;
+};
+
+struct geneve_softc {
+	LIST_ENTRY(geneve_softc)	gnv_entry;
+
+	struct ifnet			*gnv_ifp;
+	uint32_t			gnv_flags;
+#define GENEVE_FLAG_INIT		0x0001
+#define GENEVE_FLAG_RUNNING		0x0002
+#define GENEVE_FLAG_TEARDOWN		0x0004
+#define GENEVE_FLAG_LEARN		0x0008
+#define GENEVE_FLAG_USER_MTU		0x0010
+#define GENEVE_FLAG_TTL_INHERIT		0x0020
+#define GENEVE_FLAG_DSCP_INHERIT	0x0040
+#define GENEVE_FLAG_COLLECT_METADATA	0x0080
+
+	int				gnv_reqcap;
+	int				gnv_reqcap2;
+	struct geneve_socket		*gnv_sock;
+	union sockaddr_union		gnv_src_addr;
+	union sockaddr_union		gnv_dst_addr;
+	uint32_t			gnv_fibnum;
+	uint32_t			gnv_vni;
+	uint32_t			gnv_port_hash_key;
+	uint16_t			gnv_proto;
+	uint16_t			gnv_min_port;
+	uint16_t			gnv_max_port;
+	uint8_t				gnv_ttl;
+	enum ifla_geneve_df		gnv_df;
+
+	/* Lookup table from MAC address to forwarding entry. */
+	uint32_t			gnv_ftable_cnt;
+	uint32_t			gnv_ftable_max;
+	uint32_t			gnv_ftable_timeout;
+	uint32_t			gnv_ftable_hash_key;
+	struct geneve_ftable_head	*gnv_ftable;
+
+	/* Derived from gnv_dst_addr. */
+	struct gnv_ftable_entry		gnv_default_fe;
+
+	struct ip_moptions		*gnv_im4o;
+	struct ip6_moptions		*gnv_im6o;
+
+	struct rmlock			gnv_lock;
+	volatile u_int			gnv_refcnt;
+
+	int				gnv_so_mc_index;
+	struct geneve_statistics	gnv_stats;
+	struct callout			gnv_callout;
+	struct ether_addr		gnv_hwaddr;
+	int				gnv_mc_ifindex;
+	struct ifnet			*gnv_mc_ifp;
+	struct ifmedia			gnv_media;
+	char				gnv_mc_ifname[IFNAMSIZ];
+
+	/* For rate limiting errors on the tx fast path. */
+	struct timeval			err_time;
+	int				err_pps;
+};
+
+#define GENEVE_RLOCK(_sc, _p)	rm_rlock(&(_sc)->gnv_lock, (_p))
+#define GENEVE_RUNLOCK(_sc, _p)	rm_runlock(&(_sc)->gnv_lock, (_p))
+#define GENEVE_WLOCK(_sc)	rm_wlock(&(_sc)->gnv_lock)
+#define GENEVE_WUNLOCK(_sc)	rm_wunlock(&(_sc)->gnv_lock)
+#define GENEVE_LOCK_WOWNED(_sc)	rm_wowned(&(_sc)->gnv_lock)
+#define GENEVE_LOCK_ASSERT(_sc)	rm_assert(&(_sc)->gnv_lock, RA_LOCKED)
+#define GENEVE_LOCK_WASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_WLOCKED)
+#define GENEVE_UNLOCK(_sc, _p) do {		\
+    if (GENEVE_LOCK_WOWNED(_sc))		\
+	GENEVE_WUNLOCK(_sc);			\
+    else					\
+	GENEVE_RUNLOCK(_sc, _p);		\
+} while (0)
+
+#define GENEVE_ACQUIRE(_sc)	refcount_acquire(&(_sc)->gnv_refcnt)
+#define GENEVE_RELEASE(_sc)	refcount_release(&(_sc)->gnv_refcnt)
+
+#define	SATOCONSTSIN(sa)	((const struct sockaddr_in *)(sa))
+#define	SATOCONSTSIN6(sa)	((const struct sockaddr_in6 *)(sa))
+
+struct geneve_pkt_info {
+	u_int		isr;
+	uint16_t	ethertype;
+	uint8_t		ecn;
+	uint8_t		ttl;
+};
+
+struct nl_parsed_geneve {
+	/* essential */
+	uint32_t			ifla_vni;
+	uint16_t			ifla_proto;
+	struct sockaddr			*ifla_local;
+	struct sockaddr			*ifla_remote;
+	uint16_t			ifla_local_port;
+	uint16_t			ifla_remote_port;
+
+	/* optional */
+	struct ifla_geneve_port_range	ifla_port_range;
+	enum ifla_geneve_df		ifla_df;
+	uint8_t				ifla_ttl;
+	bool				ifla_ttl_inherit;
+	bool				ifla_dscp_inherit;
+	bool				ifla_external;
+
+	/* l2 specific */
+	bool				ifla_ftable_learn;
+	bool				ifla_ftable_flush;
+	uint32_t			ifla_ftable_max;
+	uint32_t			ifla_ftable_timeout;
+	uint32_t			ifla_ftable_count;	/* read-only */
+
+	/* multicast specific */
+	char				*ifla_mc_ifname;
+	uint32_t			ifla_mc_ifindex;	/* read-only */
+};
+
+/* The multicast-based learning parts of the code are taken from if_vxlan */
+static int	geneve_ftable_addr_cmp(const uint8_t *, const uint8_t *);
+static void	geneve_ftable_init(struct geneve_softc *);
+static void	geneve_ftable_fini(struct geneve_softc *);
+static void	geneve_ftable_flush(struct geneve_softc *, int);
+static void	geneve_ftable_expire(struct geneve_softc *);
+static int	geneve_ftable_update_locked(struct geneve_softc *,
+		    const union sockaddr_union *, const uint8_t *,
+		    struct rm_priotracker *);
+static int	geneve_ftable_learn(struct geneve_softc *,
+		    const struct sockaddr *, const uint8_t *);
+
+static struct gnv_ftable_entry *
+		geneve_ftable_entry_alloc(void);
+static void	geneve_ftable_entry_free(struct gnv_ftable_entry *);
+static void	geneve_ftable_entry_init(struct geneve_softc *,
+		    struct gnv_ftable_entry *, const uint8_t *,
+		    const struct sockaddr *, uint32_t);
+static void	geneve_ftable_entry_destroy(struct geneve_softc *,
+		    struct gnv_ftable_entry *);
+static int	geneve_ftable_entry_insert(struct geneve_softc *,
+		    struct gnv_ftable_entry *);
+static struct gnv_ftable_entry *
+		geneve_ftable_entry_lookup(struct geneve_softc *,
+		    const uint8_t *);
+
+static struct geneve_socket *
+		geneve_socket_alloc(union sockaddr_union *laddr);
+static void	geneve_socket_destroy(struct geneve_socket *);
+static void	geneve_socket_release(struct geneve_socket *);
+static struct geneve_socket *
+		geneve_socket_lookup(union sockaddr_union *);
+static void	geneve_socket_insert(struct geneve_socket *);
+static int	geneve_socket_init(struct geneve_socket *, struct ifnet *);
+static int	geneve_socket_bind(struct geneve_socket *, struct ifnet *);
+static int	geneve_socket_create(struct ifnet *, int,
+		    const union sockaddr_union *, struct geneve_socket **);
+static int	geneve_socket_set_df(struct geneve_socket *, bool);
+
+static struct geneve_socket *
+		geneve_socket_mc_lookup(const union sockaddr_union *);
+static int	geneve_sockaddr_mc_info_match(
+		    const struct geneve_socket_mc_info *,
+		    const union sockaddr_union *,
+		    const union sockaddr_union *, int);
+static int	geneve_socket_mc_join_group(struct geneve_socket *,
+		    const union sockaddr_union *, const union sockaddr_union *,
+		    int *, union sockaddr_union *);
+static int	geneve_socket_mc_leave_group(struct geneve_socket *,
+		    const union sockaddr_union *,
+		    const union sockaddr_union *, int);
+static int	geneve_socket_mc_add_group(struct geneve_socket *,
+		    const union sockaddr_union *,
+		    const union sockaddr_union *, int, int *);
+static void	geneve_socket_mc_release_group(struct geneve_socket *, int);
+
+static struct geneve_softc *
+		geneve_socket_lookup_softc_locked(struct geneve_socket *,
+		    uint32_t);
+static struct geneve_softc *
+		geneve_socket_lookup_softc(struct geneve_socket *, uint32_t);
+static int	geneve_socket_insert_softc(struct geneve_socket *,
+		    struct geneve_softc *);
+static void	geneve_socket_remove_softc(struct geneve_socket *,
+		    struct geneve_softc *);
+
+static struct ifnet *
+		geneve_multicast_if_ref(struct geneve_softc *, uint32_t);
+static void	geneve_free_multicast(struct geneve_softc *);
+static int	geneve_setup_multicast_interface(struct geneve_softc *);
+
+static int	geneve_setup_multicast(struct geneve_softc *);
+static int	geneve_setup_socket(struct geneve_softc *);
+static void	geneve_setup_interface_hdrlen(struct geneve_softc *);
+static int	geneve_valid_init_config(struct geneve_softc *);
+static void	geneve_init_complete(struct geneve_softc *);
+static void	geneve_init(void *);
+static void	geneve_release(struct geneve_softc *);
+static void	geneve_teardown_wait(struct geneve_softc *);
+static void	geneve_teardown_locked(struct geneve_softc *);
+static void	geneve_teardown(struct geneve_softc *);
+static void	geneve_timer(void *);
+
+static int	geneve_flush_ftable(struct geneve_softc *, bool);
+static uint16_t	geneve_get_local_port(struct geneve_softc *);
+static uint16_t	geneve_get_remote_port(struct geneve_softc *);
+
+static int	geneve_set_vni_nl(struct geneve_softc *, struct nl_pstate *,
+		    uint32_t);
+static int	geneve_set_local_addr_nl(struct geneve_softc *, struct nl_pstate *,
+		    struct sockaddr *);
+static int	geneve_set_remote_addr_nl(struct geneve_softc *, struct nl_pstate *,
+		    struct sockaddr *);
+static int	geneve_set_local_port_nl(struct geneve_softc *, struct nl_pstate *,
+		    uint16_t);
+static int	geneve_set_remote_port_nl(struct geneve_softc *, struct nl_pstate *,
+		    uint16_t);
+static int	geneve_set_port_range_nl(struct geneve_softc *, struct nl_pstate *,
+		    struct ifla_geneve_port_range);
+static int	geneve_set_df_nl(struct geneve_softc *, struct nl_pstate *,
+		    enum ifla_geneve_df);
+static int	geneve_set_ttl_nl(struct geneve_softc *, struct nl_pstate *,
+		    uint8_t);
+static int	geneve_set_ttl_inherit_nl(struct geneve_softc *, struct nl_pstate *,
+		    bool);
+static int	geneve_set_dscp_inherit_nl(struct geneve_softc *, struct nl_pstate *,
+		    bool);
+static int	geneve_set_collect_metadata_nl(struct geneve_softc *,
+		    struct nl_pstate *, bool);
+static int	geneve_set_learn_nl(struct geneve_softc *, struct nl_pstate *,
+		    bool);
+static int	geneve_set_ftable_max_nl(struct geneve_softc *, struct nl_pstate *,
+		    uint32_t);
+static int	geneve_set_ftable_timeout_nl(struct geneve_softc *,
+		    struct nl_pstate *, uint32_t);
+static int	geneve_set_mc_if_nl(struct geneve_softc *, struct nl_pstate *,
+		    char *);
+static int	geneve_flush_ftable_nl(struct geneve_softc *, struct nl_pstate *,
+		    bool);
+static void	geneve_get_local_addr_nl(struct geneve_softc *, struct nl_writer *);
+static void	geneve_get_remote_addr_nl(struct geneve_softc *, struct nl_writer *);
+
+static int	geneve_ioctl_ifflags(struct geneve_softc *);
+static int	geneve_ioctl(struct ifnet *, u_long, caddr_t);
+
+static uint16_t geneve_pick_source_port(struct geneve_softc *, struct mbuf *);
+static void	geneve_encap_header(struct geneve_softc *, struct mbuf *,
+		    int, uint16_t, uint16_t, uint16_t);
+static uint16_t	geneve_get_ethertype(struct mbuf *);
+static int	geneve_inherit_l3_hdr(struct mbuf *, struct geneve_softc *,
+		    uint16_t, uint8_t *, uint8_t *, u_short *);
+static int	geneve_encap4(struct geneve_softc *,
+		    const union sockaddr_union *, struct mbuf *);
+static int	geneve_encap6(struct geneve_softc *,
+		    const union sockaddr_union *, struct mbuf *);
+static int	geneve_transmit(struct ifnet *, struct mbuf *);
+static void	geneve_qflush(struct ifnet *);
+static int	geneve_output(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, struct route *);
+static uint32_t	geneve_map_etype_to_af(uint32_t);
+static bool	geneve_udp_input(struct mbuf *, int, struct inpcb *,
+		    const struct sockaddr *, void *);
+static int	geneve_input_ether(struct geneve_softc *, struct mbuf **,
+		    const struct sockaddr *, struct geneve_pkt_info *);
+static int	geneve_input_inherit(struct geneve_softc *,
+		    struct mbuf **, int, struct geneve_pkt_info *);
+static int	geneve_next_option(struct geneve_socket *, struct genevehdr *,
+		    struct mbuf **);
+static void	geneve_input_csum(struct mbuf *m, struct ifnet *ifp,
+		    counter_u64_t rxcsum);
+
+static void	geneve_stats_alloc(struct geneve_softc *);
+static void	geneve_stats_free(struct geneve_softc *);
+static void	geneve_set_default_config(struct geneve_softc *);
+static int	geneve_set_reqcap(struct geneve_softc *, struct ifnet *, int,
+		    int);
+static void	geneve_set_hwcaps(struct geneve_softc *);
+static int	geneve_clone_create(struct if_clone *, char *, size_t,
+		    struct ifc_data *, struct ifnet **);
+static int	geneve_clone_destroy(struct if_clone *, struct ifnet *,
+		    uint32_t);
+static int	geneve_clone_create_nl(struct if_clone *, char *, size_t,
+		    struct ifc_data_nl *);
+static int	geneve_clone_modify_nl(struct ifnet *, struct ifc_data_nl *);
+static void	geneve_clone_dump_nl(struct ifnet *, struct nl_writer *);
+
+static uint32_t geneve_mac_hash(struct geneve_softc *, const uint8_t *);
+static int	geneve_media_change(struct ifnet *);
+static void	geneve_media_status(struct ifnet *, struct ifmediareq *);
+
+static int	geneve_sockaddr_cmp(const union sockaddr_union *,
+		    const struct sockaddr *);
+static void	geneve_sockaddr_copy(union sockaddr_union *,
+		    const struct sockaddr *);
+static int	geneve_sockaddr_in_equal(const union sockaddr_union *,
+		    const struct sockaddr *);
+static void	geneve_sockaddr_in_copy(union sockaddr_union *,
+		    const struct sockaddr *);
+static int	geneve_sockaddr_supported(const union sockaddr_union *, int);
+static int	geneve_sockaddr_in_any(const union sockaddr_union *);
+
+static int	geneve_can_change_config(struct geneve_softc *);
+static int	geneve_check_proto(uint16_t);
+static int	geneve_check_multicast_addr(const union sockaddr_union *);
+static int	geneve_check_sockaddr(const union sockaddr_union *, const int);
+
+static int	geneve_prison_remove(void *, void *);
+static void	vnet_geneve_load(void);
+static void	vnet_geneve_unload(void);
+static void	geneve_module_init(void);
+static void	geneve_module_deinit(void);
+static int	geneve_modevent(module_t, int, void *);
+
+
+static const char geneve_name[] = "geneve";
+static MALLOC_DEFINE(M_GENEVE, geneve_name,
+    "Generic Network Virtualization Encapsulation Interface");
+#define MTAG_GENEVE_LOOP	0x93d66dc0 /* geneve mtag */
+
+VNET_DEFINE_STATIC(struct if_clone *, geneve_cloner);
+#define	V_geneve_cloner	VNET(geneve_cloner)
+
+static struct mtx geneve_list_mtx;
+#define GENEVE_LIST_LOCK()	mtx_lock(&geneve_list_mtx)
+#define GENEVE_LIST_UNLOCK()	mtx_unlock(&geneve_list_mtx)
+
+static LIST_HEAD(, geneve_socket) geneve_socket_list = LIST_HEAD_INITIALIZER(geneve_socket_list);
+
+/* Default maximum number of addresses in the forwarding table. */
+#define GENEVE_FTABLE_MAX	2000
+
+/* Timeout (in seconds) of addresses learned in the forwarding table. */
+#define GENEVE_FTABLE_TIMEOUT	(20 * 60)
+
+/* Maximum timeout (in seconds) of addresses learned in the forwarding table. */
+#define GENEVE_FTABLE_MAX_TIMEOUT	(60 * 60 * 24)
+
+/* Number of seconds between pruning attempts of the forwarding table. */
+#define GENEVE_FTABLE_PRUNE	(5 * 60)
+
+static int geneve_ftable_prune_period = GENEVE_FTABLE_PRUNE;
+
+#define _OUT(_field)	offsetof(struct nl_parsed_geneve, _field)
+static const struct nlattr_parser nla_p_geneve_create[] = {
+	{ .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(geneve_create_parser, nla_p_geneve_create);
+
+#define _OUT(_field)	offsetof(struct nl_parsed_geneve, _field)
+static const struct nlattr_parser nla_p_geneve[] = {
+	{ .type = IFLA_GENEVE_ID, .off = _OUT(ifla_vni), .cb = nlattr_get_uint32 },
+	{ .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
+	{ .type = IFLA_GENEVE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip },
+	{ .type = IFLA_GENEVE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip },
+	{ .type = IFLA_GENEVE_LOCAL_PORT, .off = _OUT(ifla_local_port), .cb = nlattr_get_uint16 },
+	{ .type = IFLA_GENEVE_PORT, .off = _OUT(ifla_remote_port), .cb = nlattr_get_uint16 },
+	{ .type = IFLA_GENEVE_PORT_RANGE, .off = _OUT(ifla_port_range),
+		.arg = (void *)sizeof(struct ifla_geneve_port_range), .cb = nlattr_get_bytes },
+	{ .type = IFLA_GENEVE_DF, .off = _OUT(ifla_df), .cb = nlattr_get_uint8 },
+	{ .type = IFLA_GENEVE_TTL, .off = _OUT(ifla_ttl), .cb = nlattr_get_uint8 },
+	{ .type = IFLA_GENEVE_TTL_INHERIT, .off = _OUT(ifla_ttl_inherit), .cb = nlattr_get_bool },
+	{ .type = IFLA_GENEVE_DSCP_INHERIT, .off = _OUT(ifla_dscp_inherit), .cb = nlattr_get_bool },
+	{ .type = IFLA_GENEVE_COLLECT_METADATA, .off = _OUT(ifla_external), .cb = nlattr_get_bool },
+	{ .type = IFLA_GENEVE_FTABLE_LEARN, .off = _OUT(ifla_ftable_learn), .cb = nlattr_get_bool },
+	{ .type = IFLA_GENEVE_FTABLE_FLUSH, .off = _OUT(ifla_ftable_flush), .cb = nlattr_get_bool },
+	{ .type = IFLA_GENEVE_FTABLE_MAX, .off = _OUT(ifla_ftable_max), .cb = nlattr_get_uint32 },
+	{ .type = IFLA_GENEVE_FTABLE_TIMEOUT, .off = _OUT(ifla_ftable_timeout), .cb = nlattr_get_uint32 },
+	{ .type = IFLA_GENEVE_MC_IFNAME, .off = _OUT(ifla_mc_ifname), .cb = nlattr_get_string },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(geneve_modify_parser, nla_p_geneve);
+
+static const struct nlhdr_parser *all_parsers[] = {
+	&geneve_create_parser, &geneve_modify_parser,
+};
+
+static int
+geneve_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+	int i, d;
+
+	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
+		d = (int)a[i] - (int)b[i];
+
+	return (d);
+}
+
+static void
+geneve_ftable_init(struct geneve_softc *sc)
+{
+	int i;
+
+	sc->gnv_ftable = malloc(sizeof(struct geneve_ftable_head) *
+	    GENEVE_SC_FTABLE_SIZE, M_GENEVE, M_ZERO | M_WAITOK);
+
+	for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++)
+		LIST_INIT(&sc->gnv_ftable[i]);
+	sc->gnv_ftable_hash_key = arc4random();
+}
+
+static void
+geneve_ftable_fini(struct geneve_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+		KASSERT(LIST_EMPTY(&sc->gnv_ftable[i]),
+		    ("%s: geneve %p ftable[%d] not empty", __func__, sc, i));
+	}
+	MPASS(sc->gnv_ftable_cnt == 0);
+
+	free(sc->gnv_ftable, M_GENEVE);
+	sc->gnv_ftable = NULL;
+}
+
+static void
+geneve_ftable_flush(struct geneve_softc *sc, int all)
+{
+	struct gnv_ftable_entry *fe, *tfe;
+
+	for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+		LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
+			if (all || GENEVE_FE_IS_DYNAMIC(fe))
+				geneve_ftable_entry_destroy(sc, fe);
+		}
+	}
+}
+
+static void
+geneve_ftable_expire(struct geneve_softc *sc)
+{
+	struct gnv_ftable_entry *fe, *tfe;
+
+	GENEVE_LOCK_WASSERT(sc);
+
+	for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+		LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
+			if (GENEVE_FE_IS_DYNAMIC(fe) &&
+			    time_uptime >= fe->gnvfe_expire)
+				geneve_ftable_entry_destroy(sc, fe);
+		}
+	}
+}
+
+static int
+geneve_ftable_update_locked(struct geneve_softc *sc,
+    const union sockaddr_union *unsa, const uint8_t *mac,
+    struct rm_priotracker *tracker)
+{
+	struct gnv_ftable_entry *fe;
+	int error;
+
+	GENEVE_LOCK_ASSERT(sc);
+
+again:
+	/*
+	 * A forwarding entry for this MAC address might already exist. If
+	 * so, update it, otherwise create a new one. We may have to upgrade
+	 * the lock if we have to change or create an entry.
+	 */
+	fe = geneve_ftable_entry_lookup(sc, mac);
+	if (fe != NULL) {
+		fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
+
+		if (!GENEVE_FE_IS_DYNAMIC(fe) ||
+		    geneve_sockaddr_in_equal(&fe->gnvfe_raddr, &unsa->sa))
+			return (0);
+		if (!GENEVE_LOCK_WOWNED(sc)) {
+			GENEVE_RUNLOCK(sc, tracker);
+			GENEVE_WLOCK(sc);
+			sc->gnv_stats.ftable_lock_upgrade_failed++;
+			goto again;
+		}
+		geneve_sockaddr_in_copy(&fe->gnvfe_raddr, &unsa->sa);
+		return (0);
+	}
+
+	if (!GENEVE_LOCK_WOWNED(sc)) {
+		GENEVE_RUNLOCK(sc, tracker);
+		GENEVE_WLOCK(sc);
+		sc->gnv_stats.ftable_lock_upgrade_failed++;
+		goto again;
+	}
+
+	if (sc->gnv_ftable_cnt >= sc->gnv_ftable_max) {
+		sc->gnv_stats.ftable_nospace++;
+		return (ENOSPC);
+	}
+
+	fe = geneve_ftable_entry_alloc();
+	if (fe == NULL)
+		return (ENOMEM);
+
+	geneve_ftable_entry_init(sc, fe, mac, &unsa->sa, GENEVE_FE_FLAG_DYNAMIC);
+
+	/* The prior lookup failed, so the insert should not. */
+	error = geneve_ftable_entry_insert(sc, fe);
+	MPASS(error == 0);
+
+	return (error);
+}
+
+static int
+geneve_ftable_learn(struct geneve_softc *sc, const struct sockaddr *sa,
+    const uint8_t *mac)
+{
+	struct rm_priotracker tracker;
+	union sockaddr_union unsa;
+	int error;
+
+	/*
+	 * The source port may be randomly selected by the remote host, so
+	 * use the port of the default destination address.
+	 */
+	geneve_sockaddr_copy(&unsa, sa);
+	unsa.sin.sin_port = sc->gnv_dst_addr.sin.sin_port;
+
+	if (unsa.sa.sa_family == AF_INET6) {
+		error = sa6_embedscope(&unsa.sin6, V_ip6_use_defzone);
+		if (error)
+			return (error);
+	}
+
+	GENEVE_RLOCK(sc, &tracker);
+	error = geneve_ftable_update_locked(sc, &unsa, mac, &tracker);
+	GENEVE_UNLOCK(sc, &tracker);
+
+	return (error);
+}
+
+static struct gnv_ftable_entry *
+geneve_ftable_entry_alloc(void)
+{
+	struct gnv_ftable_entry *fe;
+
+	fe = malloc(sizeof(*fe), M_GENEVE, M_ZERO | M_NOWAIT);
+
+	return (fe);
+}
+
+static void
+geneve_ftable_entry_free(struct gnv_ftable_entry *fe)
+{
+
+	free(fe, M_GENEVE);
+}
+
+static void
+geneve_ftable_entry_init(struct geneve_softc *sc, struct gnv_ftable_entry *fe,
+    const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
+{
+
+	fe->gnvfe_flags = flags;
+	fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
+	memcpy(fe->gnvfe_mac, mac, ETHER_ADDR_LEN);
+	geneve_sockaddr_copy(&fe->gnvfe_raddr, sa);
+}
+
+static void
+geneve_ftable_entry_destroy(struct geneve_softc *sc,
+    struct gnv_ftable_entry *fe)
+{
+
+	sc->gnv_ftable_cnt--;
+	LIST_REMOVE(fe, gnvfe_hash);
+	geneve_ftable_entry_free(fe);
+}
+
+static int
+geneve_ftable_entry_insert(struct geneve_softc *sc,
+    struct gnv_ftable_entry *fe)
+{
+	struct gnv_ftable_entry *lfe;
+	uint32_t hash;
+	int dir;
+
+	GENEVE_LOCK_WASSERT(sc);
+	hash = GENEVE_SC_FTABLE_HASH(sc, fe->gnvfe_mac);
+
+	lfe = LIST_FIRST(&sc->gnv_ftable[hash]);
+	if (lfe == NULL) {
+		LIST_INSERT_HEAD(&sc->gnv_ftable[hash], fe, gnvfe_hash);
+		goto out;
+	}
+
+	do {
+		dir = geneve_ftable_addr_cmp(fe->gnvfe_mac, lfe->gnvfe_mac);
+		if (dir == 0)
+			return (EEXIST);
+		if (dir > 0) {
+			LIST_INSERT_BEFORE(lfe, fe, gnvfe_hash);
+			goto out;
+		} else if (LIST_NEXT(lfe, gnvfe_hash) == NULL) {
+			LIST_INSERT_AFTER(lfe, fe, gnvfe_hash);
+			goto out;
+		} else
+			lfe = LIST_NEXT(lfe, gnvfe_hash);
+	} while (lfe != NULL);
+
+out:
+	sc->gnv_ftable_cnt++;
+
+	return (0);
+}
+
+static struct gnv_ftable_entry *
+geneve_ftable_entry_lookup(struct geneve_softc *sc, const uint8_t *mac)
+{
+	struct gnv_ftable_entry *fe;
+	uint32_t hash;
+	int dir;
+
+	GENEVE_LOCK_ASSERT(sc);
+
+	hash = GENEVE_SC_FTABLE_HASH(sc, mac);
+	LIST_FOREACH(fe, &sc->gnv_ftable[hash], gnvfe_hash) {
+		dir = geneve_ftable_addr_cmp(mac, fe->gnvfe_mac);
+		if (dir == 0)
+			return (fe);
+		if (dir > 0)
+			break;
+	}
+
+	return (NULL);
+}
+
+static struct geneve_socket *
+geneve_socket_alloc(union sockaddr_union *laddr)
+{
+	struct geneve_socket *gnvso;
+
+	gnvso = malloc(sizeof(*gnvso), M_GENEVE, M_WAITOK | M_ZERO);
+	rm_init(&gnvso->gnvso_lock, "genevesorm");
+	refcount_init(&gnvso->gnvso_refcnt, 0);
+	for (int i = 0; i < GENEVE_SO_VNI_HASH_SIZE; i++)
+		LIST_INIT(&gnvso->gnvso_vni_hash[i]);
+	gnvso->gnvso_laddr = *laddr;
+
+	return (gnvso);
+}
+
+static void
+geneve_socket_destroy(struct geneve_socket *gnvso)
+{
+	struct socket *so;
+
+	so = gnvso->gnvso_sock;
+	if (so != NULL) {
+		gnvso->gnvso_sock = NULL;
+		soclose(so);
+	}
+
+	rm_destroy(&gnvso->gnvso_lock);
+	free(gnvso, M_GENEVE);
+}
+
+static void
+geneve_socket_release(struct geneve_socket *gnvso)
+{
+	int destroy;
+
+	GENEVE_LIST_LOCK();
*** 3327 LINES SKIPPED ***