git: e44d2e941e8e - main - if_geneve: Add Support for Geneve (RFC8926)
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 13 Apr 2026 14:16:56 UTC
The branch main has been updated by pouria:
URL: https://cgit.FreeBSD.org/src/commit/?id=e44d2e941e8ebd74e6a1b1fdbed83fe86671cbc6
commit e44d2e941e8ebd74e6a1b1fdbed83fe86671cbc6
Author: Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
AuthorDate: 2026-04-11 14:12:01 +0000
Commit: Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
CommitDate: 2026-04-13 14:14:58 +0000
if_geneve: Add Support for Geneve (RFC8926)
geneve creates a generic network virtualization tunnel interface
for Tentant Systems over an L3 (IP/UDP) underlay network that provides
a Layer 2 (ethernet) or Layer 3 service using the geneve protocol.
This implementation is based on RFC8926.
Reviewed by: glebius, adrian
Discussed with: zlei, kp
Relnotes: yes
Differential Revision: https://reviews.freebsd.org/D54172
---
sys/conf/NOTES | 4 +
sys/conf/files | 1 +
sys/kern/kern_jail.c | 1 +
sys/modules/Makefile | 1 +
sys/modules/if_geneve/Makefile | 7 +
sys/net/if.c | 2 +
sys/net/if.h | 6 +-
sys/net/if_geneve.c | 3967 ++++++++++++++++++++++++++++++++++++++++
sys/net/if_geneve.h | 70 +
sys/net/if_strings.h | 12 +-
sys/netlink/route/interface.h | 44 +
sys/sys/mbuf.h | 6 +-
sys/sys/priv.h | 1 +
13 files changed, 4115 insertions(+), 7 deletions(-)
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 4dda93e2ee70..4279fae4c547 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -880,6 +880,10 @@ device vlan
# frames in UDP packets according to RFC7348.
device vxlan
+# The `geneve' device implements the GENEVE encapsulation of virtual
+# overlays according to RFC8926.
+device geneve
+
# The `wlan' device provides generic code to support 802.11
# drivers, including host AP mode; it is MANDATORY for the wi,
# and ath drivers and will eventually be required by all 802.11 drivers.
diff --git a/sys/conf/files b/sys/conf/files
index b44fb46ef764..99ba7cdaba33 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4238,6 +4238,7 @@ net/if_stf.c optional stf inet inet6
net/if_tuntap.c optional tuntap
net/if_vlan.c optional vlan
net/if_vxlan.c optional vxlan inet | vxlan inet6
+net/if_geneve.c optional geneve inet | geneve inet6
net/ifdi_if.m optional ether pci iflib
net/iflib.c optional ether pci iflib
net/mp_ring.c optional ether iflib
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 384825b7f8ac..bc80adb91cd6 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -4385,6 +4385,7 @@ prison_priv_check(struct ucred *cred, int priv)
case PRIV_NET_SETIFVNET:
case PRIV_NET_SETIFFIB:
case PRIV_NET_OVPN:
+ case PRIV_NET_GENEVE:
case PRIV_NET_ME:
case PRIV_NET_WG:
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index a4100c31ef26..faedb856977c 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -169,6 +169,7 @@ SUBDIR= \
if_tuntap \
if_vlan \
if_vxlan \
+ if_geneve \
${_if_wg} \
iflib \
${_igc} \
diff --git a/sys/modules/if_geneve/Makefile b/sys/modules/if_geneve/Makefile
new file mode 100644
index 000000000000..1e65d4dbb168
--- /dev/null
+++ b/sys/modules/if_geneve/Makefile
@@ -0,0 +1,7 @@
+.PATH: ${SRCTOP}/sys/net
+
+KMOD= if_geneve
+SRCS= if_geneve.c
+SRCS+= opt_inet.h opt_inet6.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/net/if.c b/sys/net/if.c
index 760ae94e842b..8a148ba0fd06 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -2273,6 +2273,8 @@ const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = {
CAP2NV(RXTLS4),
CAP2NV(RXTLS6),
CAP2NV(IPSEC_OFFLOAD),
+ CAP2NV(GENEVE_HWCSUM),
+ CAP2NV(GENEVE_HWTSO),
{0, NULL}
};
#undef CAPNV
diff --git a/sys/net/if.h b/sys/net/if.h
index 1b47237e46bb..4bb6a2659ce7 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -255,7 +255,9 @@ struct if_data {
#define IFCAP_B_RXTLS4 32 /* can do TLS receive for TCP */
#define IFCAP_B_RXTLS6 33 /* can do TLS receive for TCP6 */
#define IFCAP_B_IPSEC_OFFLOAD 34 /* inline IPSEC offload */
-#define __IFCAP_B_SIZE 35
+#define IFCAP_B_GENEVE_HWCSUM 35 /* can do IFCAN_HWCSUM on GENEVE */
+#define IFCAP_B_GENEVE_HWTSO 36 /* can do IFCAP_TSO on GENEVE */
+#define __IFCAP_B_SIZE 37
#define IFCAP_B_MAX (__IFCAP_B_MAX - 1)
#define IFCAP_B_SIZE (__IFCAP_B_SIZE)
@@ -299,6 +301,8 @@ struct if_data {
#define IFCAP2_RXTLS4 (IFCAP_B_RXTLS4 - 32)
#define IFCAP2_RXTLS6 (IFCAP_B_RXTLS6 - 32)
#define IFCAP2_IPSEC_OFFLOAD (IFCAP_B_IPSEC_OFFLOAD - 32)
+#define IFCAP2_GENEVE_HWCSUM (IFCAP_B_GENEVE_HWCSUM - 32)
+#define IFCAP2_GENEVE_HWTSO (IFCAP_B_GENEVE_HWTSO - 32)
#define IFCAP2_BIT(x) (1UL << (x))
diff --git a/sys/net/if_geneve.c b/sys/net/if_geneve.c
new file mode 100644
index 000000000000..9562a3476099
--- /dev/null
+++ b/sys/net/if_geneve.c
@@ -0,0 +1,3967 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025-2026 Pouria Mousavizadeh Tehrani <pouria@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/hash.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/refcount.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sdt.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/jail.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_private.h>
+#include <net/if_arp.h>
+#include <net/if_clone.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/route/nhop.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/scope6_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/in_fib.h>
+#include <netinet6/in6_fib.h>
+#include <netinet/ip_ecn.h>
+#include <net/if_geneve.h>
+
+#include <netlink/netlink.h>
+#include <netlink/netlink_ctl.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_route.h>
+#include <netlink/route/route_var.h>
+
+#include <security/mac/mac_framework.h>
+
+SDT_PROVIDER_DEFINE(if_geneve);
+
+struct geneve_softc;
+LIST_HEAD(geneve_softc_head, geneve_softc);
+
+static struct sx geneve_sx;
+SX_SYSINIT(geneve, &geneve_sx, "GENEVE global start/stop lock");
+
+static unsigned geneve_osd_jail_slot;
+
+union sockaddr_union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+};
+
+struct geneve_socket_mc_info {
+ union sockaddr_union gnvsomc_saddr;
+ union sockaddr_union gnvsomc_gaddr;
+ int gnvsomc_ifidx;
+ int gnvsomc_users;
+};
+
+/* The maximum MTU of encapsulated geneve packet. */
+#define GENEVE_MAX_L3MTU (IP_MAXPACKET - \
+ 60 /* Maximum IPv4 header len */ - \
+ sizeof(struct udphdr) - \
+ sizeof(struct genevehdr))
+#define GENEVE_MAX_MTU (GENEVE_MAX_L3MTU - \
+ ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN)
+
+#define GENEVE_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU | IFCAP_NV)
+
+#define GENEVE_VERSION 0
+#define GENEVE_VNI_MASK (GENEVE_VNI_MAX - 1)
+
+#define GENEVE_HDR_VNI_SHIFT 8
+
+#define GENEVE_SO_MC_MAX_GROUPS 32
+
+#define GENEVE_SO_VNI_HASH_SHIFT 6
+#define GENEVE_SO_VNI_HASH_SIZE (1 << GENEVE_SO_VNI_HASH_SHIFT)
+#define GENEVE_SO_VNI_HASH(_vni) ((_vni) % GENEVE_SO_VNI_HASH_SIZE)
+
+struct geneve_socket {
+ struct socket *gnvso_sock;
+ struct rmlock gnvso_lock;
+ u_int gnvso_refcnt;
+ union sockaddr_union gnvso_laddr;
+ LIST_ENTRY(geneve_socket) gnvso_entry;
+ struct geneve_softc_head gnvso_vni_hash[GENEVE_SO_VNI_HASH_SIZE];
+ struct geneve_socket_mc_info gnvso_mc[GENEVE_SO_MC_MAX_GROUPS];
+};
+
+#define GENEVE_SO_RLOCK(_gnvso, _p) rm_rlock(&(_gnvso)->gnvso_lock, (_p))
+#define GENEVE_SO_RUNLOCK(_gnvso, _p) rm_runlock(&(_gnvso)->gnvso_lock, (_p))
+#define GENEVE_SO_WLOCK(_gnvso) rm_wlock(&(_gnvso)->gnvso_lock)
+#define GENEVE_SO_WUNLOCK(_gnvso) rm_wunlock(&(_gnvso)->gnvso_lock)
+#define GENEVE_SO_LOCK_ASSERT(_gnvso) \
+ rm_assert(&(_gnvso)->gnvso_lock, RA_LOCKED)
+#define GENEVE_SO_LOCK_WASSERT(_gnvso) \
+ rm_assert(&(_gnvso)->gnvso_lock, RA_WLOCKED)
+
+#define GENEVE_SO_ACQUIRE(_gnvso) refcount_acquire(&(_gnvso)->gnvso_refcnt)
+#define GENEVE_SO_RELEASE(_gnvso) refcount_release(&(_gnvso)->gnvso_refcnt)
+
+struct gnv_ftable_entry {
+ LIST_ENTRY(gnv_ftable_entry) gnvfe_hash;
+ uint16_t gnvfe_flags;
+ uint8_t gnvfe_mac[ETHER_ADDR_LEN];
+ union sockaddr_union gnvfe_raddr;
+ time_t gnvfe_expire;
+};
+
+#define GENEVE_FE_FLAG_DYNAMIC 0x01
+#define GENEVE_FE_FLAG_STATIC 0x02
+
+#define GENEVE_FE_IS_DYNAMIC(_fe) \
+ ((_fe)->gnvfe_flags & GENEVE_FE_FLAG_DYNAMIC)
+
+#define GENEVE_SC_FTABLE_SHIFT 9
+#define GENEVE_SC_FTABLE_SIZE (1 << GENEVE_SC_FTABLE_SHIFT)
+#define GENEVE_SC_FTABLE_MASK (GENEVE_SC_FTABLE_SIZE - 1)
+#define GENEVE_SC_FTABLE_HASH(_sc, _mac) \
+ (geneve_mac_hash(_sc, _mac) % GENEVE_SC_FTABLE_SIZE)
+
+LIST_HEAD(geneve_ftable_head, gnv_ftable_entry);
+
+struct geneve_statistics {
+ uint32_t ftable_nospace;
+ uint32_t ftable_lock_upgrade_failed;
+ counter_u64_t txcsum;
+ counter_u64_t tso;
+ counter_u64_t rxcsum;
+};
+
+struct geneve_softc {
+ LIST_ENTRY(geneve_softc) gnv_entry;
+
+ struct ifnet *gnv_ifp;
+ uint32_t gnv_flags;
+#define GENEVE_FLAG_INIT 0x0001
+#define GENEVE_FLAG_RUNNING 0x0002
+#define GENEVE_FLAG_TEARDOWN 0x0004
+#define GENEVE_FLAG_LEARN 0x0008
+#define GENEVE_FLAG_USER_MTU 0x0010
+#define GENEVE_FLAG_TTL_INHERIT 0x0020
+#define GENEVE_FLAG_DSCP_INHERIT 0x0040
+#define GENEVE_FLAG_COLLECT_METADATA 0x0080
+
+ int gnv_reqcap;
+ int gnv_reqcap2;
+ struct geneve_socket *gnv_sock;
+ union sockaddr_union gnv_src_addr;
+ union sockaddr_union gnv_dst_addr;
+ uint32_t gnv_fibnum;
+ uint32_t gnv_vni;
+ uint32_t gnv_port_hash_key;
+ uint16_t gnv_proto;
+ uint16_t gnv_min_port;
+ uint16_t gnv_max_port;
+ uint8_t gnv_ttl;
+ enum ifla_geneve_df gnv_df;
+
+ /* Lookup table from MAC address to forwarding entry. */
+ uint32_t gnv_ftable_cnt;
+ uint32_t gnv_ftable_max;
+ uint32_t gnv_ftable_timeout;
+ uint32_t gnv_ftable_hash_key;
+ struct geneve_ftable_head *gnv_ftable;
+
+ /* Derived from gnv_dst_addr. */
+ struct gnv_ftable_entry gnv_default_fe;
+
+ struct ip_moptions *gnv_im4o;
+ struct ip6_moptions *gnv_im6o;
+
+ struct rmlock gnv_lock;
+ volatile u_int gnv_refcnt;
+
+ int gnv_so_mc_index;
+ struct geneve_statistics gnv_stats;
+ struct callout gnv_callout;
+ struct ether_addr gnv_hwaddr;
+ int gnv_mc_ifindex;
+ struct ifnet *gnv_mc_ifp;
+ struct ifmedia gnv_media;
+ char gnv_mc_ifname[IFNAMSIZ];
+
+ /* For rate limiting errors on the tx fast path. */
+ struct timeval err_time;
+ int err_pps;
+};
+
+#define GENEVE_RLOCK(_sc, _p) rm_rlock(&(_sc)->gnv_lock, (_p))
+#define GENEVE_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->gnv_lock, (_p))
+#define GENEVE_WLOCK(_sc) rm_wlock(&(_sc)->gnv_lock)
+#define GENEVE_WUNLOCK(_sc) rm_wunlock(&(_sc)->gnv_lock)
+#define GENEVE_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->gnv_lock)
+#define GENEVE_LOCK_ASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_LOCKED)
+#define GENEVE_LOCK_WASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_WLOCKED)
+#define GENEVE_UNLOCK(_sc, _p) do { \
+ if (GENEVE_LOCK_WOWNED(_sc)) \
+ GENEVE_WUNLOCK(_sc); \
+ else \
+ GENEVE_RUNLOCK(_sc, _p); \
+} while (0)
+
+#define GENEVE_ACQUIRE(_sc) refcount_acquire(&(_sc)->gnv_refcnt)
+#define GENEVE_RELEASE(_sc) refcount_release(&(_sc)->gnv_refcnt)
+
+#define SATOCONSTSIN(sa) ((const struct sockaddr_in *)(sa))
+#define SATOCONSTSIN6(sa) ((const struct sockaddr_in6 *)(sa))
+
+struct geneve_pkt_info {
+ u_int isr;
+ uint16_t ethertype;
+ uint8_t ecn;
+ uint8_t ttl;
+};
+
+struct nl_parsed_geneve {
+ /* essential */
+ uint32_t ifla_vni;
+ uint16_t ifla_proto;
+ struct sockaddr *ifla_local;
+ struct sockaddr *ifla_remote;
+ uint16_t ifla_local_port;
+ uint16_t ifla_remote_port;
+
+ /* optional */
+ struct ifla_geneve_port_range ifla_port_range;
+ enum ifla_geneve_df ifla_df;
+ uint8_t ifla_ttl;
+ bool ifla_ttl_inherit;
+ bool ifla_dscp_inherit;
+ bool ifla_external;
+
+ /* l2 specific */
+ bool ifla_ftable_learn;
+ bool ifla_ftable_flush;
+ uint32_t ifla_ftable_max;
+ uint32_t ifla_ftable_timeout;
+ uint32_t ifla_ftable_count; /* read-only */
+
+ /* multicast specific */
+ char *ifla_mc_ifname;
+ uint32_t ifla_mc_ifindex; /* read-only */
+};
+
+/* The multicast-based learning parts of the code are taken from if_vxlan */
+static int geneve_ftable_addr_cmp(const uint8_t *, const uint8_t *);
+static void geneve_ftable_init(struct geneve_softc *);
+static void geneve_ftable_fini(struct geneve_softc *);
+static void geneve_ftable_flush(struct geneve_softc *, int);
+static void geneve_ftable_expire(struct geneve_softc *);
+static int geneve_ftable_update_locked(struct geneve_softc *,
+ const union sockaddr_union *, const uint8_t *,
+ struct rm_priotracker *);
+static int geneve_ftable_learn(struct geneve_softc *,
+ const struct sockaddr *, const uint8_t *);
+
+static struct gnv_ftable_entry *
+ geneve_ftable_entry_alloc(void);
+static void geneve_ftable_entry_free(struct gnv_ftable_entry *);
+static void geneve_ftable_entry_init(struct geneve_softc *,
+ struct gnv_ftable_entry *, const uint8_t *,
+ const struct sockaddr *, uint32_t);
+static void geneve_ftable_entry_destroy(struct geneve_softc *,
+ struct gnv_ftable_entry *);
+static int geneve_ftable_entry_insert(struct geneve_softc *,
+ struct gnv_ftable_entry *);
+static struct gnv_ftable_entry *
+ geneve_ftable_entry_lookup(struct geneve_softc *,
+ const uint8_t *);
+
+static struct geneve_socket *
+ geneve_socket_alloc(union sockaddr_union *laddr);
+static void geneve_socket_destroy(struct geneve_socket *);
+static void geneve_socket_release(struct geneve_socket *);
+static struct geneve_socket *
+ geneve_socket_lookup(union sockaddr_union *);
+static void geneve_socket_insert(struct geneve_socket *);
+static int geneve_socket_init(struct geneve_socket *, struct ifnet *);
+static int geneve_socket_bind(struct geneve_socket *, struct ifnet *);
+static int geneve_socket_create(struct ifnet *, int,
+ const union sockaddr_union *, struct geneve_socket **);
+static int geneve_socket_set_df(struct geneve_socket *, bool);
+
+static struct geneve_socket *
+ geneve_socket_mc_lookup(const union sockaddr_union *);
+static int geneve_sockaddr_mc_info_match(
+ const struct geneve_socket_mc_info *,
+ const union sockaddr_union *,
+ const union sockaddr_union *, int);
+static int geneve_socket_mc_join_group(struct geneve_socket *,
+ const union sockaddr_union *, const union sockaddr_union *,
+ int *, union sockaddr_union *);
+static int geneve_socket_mc_leave_group(struct geneve_socket *,
+ const union sockaddr_union *,
+ const union sockaddr_union *, int);
+static int geneve_socket_mc_add_group(struct geneve_socket *,
+ const union sockaddr_union *,
+ const union sockaddr_union *, int, int *);
+static void geneve_socket_mc_release_group(struct geneve_socket *, int);
+
+static struct geneve_softc *
+ geneve_socket_lookup_softc_locked(struct geneve_socket *,
+ uint32_t);
+static struct geneve_softc *
+ geneve_socket_lookup_softc(struct geneve_socket *, uint32_t);
+static int geneve_socket_insert_softc(struct geneve_socket *,
+ struct geneve_softc *);
+static void geneve_socket_remove_softc(struct geneve_socket *,
+ struct geneve_softc *);
+
+static struct ifnet *
+ geneve_multicast_if_ref(struct geneve_softc *, uint32_t);
+static void geneve_free_multicast(struct geneve_softc *);
+static int geneve_setup_multicast_interface(struct geneve_softc *);
+
+static int geneve_setup_multicast(struct geneve_softc *);
+static int geneve_setup_socket(struct geneve_softc *);
+static void geneve_setup_interface_hdrlen(struct geneve_softc *);
+static int geneve_valid_init_config(struct geneve_softc *);
+static void geneve_init_complete(struct geneve_softc *);
+static void geneve_init(void *);
+static void geneve_release(struct geneve_softc *);
+static void geneve_teardown_wait(struct geneve_softc *);
+static void geneve_teardown_locked(struct geneve_softc *);
+static void geneve_teardown(struct geneve_softc *);
+static void geneve_timer(void *);
+
+static int geneve_flush_ftable(struct geneve_softc *, bool);
+static uint16_t geneve_get_local_port(struct geneve_softc *);
+static uint16_t geneve_get_remote_port(struct geneve_softc *);
+
+static int geneve_set_vni_nl(struct geneve_softc *, struct nl_pstate *,
+ uint32_t);
+static int geneve_set_local_addr_nl(struct geneve_softc *, struct nl_pstate *,
+ struct sockaddr *);
+static int geneve_set_remote_addr_nl(struct geneve_softc *, struct nl_pstate *,
+ struct sockaddr *);
+static int geneve_set_local_port_nl(struct geneve_softc *, struct nl_pstate *,
+ uint16_t);
+static int geneve_set_remote_port_nl(struct geneve_softc *, struct nl_pstate *,
+ uint16_t);
+static int geneve_set_port_range_nl(struct geneve_softc *, struct nl_pstate *,
+ struct ifla_geneve_port_range);
+static int geneve_set_df_nl(struct geneve_softc *, struct nl_pstate *,
+ enum ifla_geneve_df);
+static int geneve_set_ttl_nl(struct geneve_softc *, struct nl_pstate *,
+ uint8_t);
+static int geneve_set_ttl_inherit_nl(struct geneve_softc *, struct nl_pstate *,
+ bool);
+static int geneve_set_dscp_inherit_nl(struct geneve_softc *, struct nl_pstate *,
+ bool);
+static int geneve_set_collect_metadata_nl(struct geneve_softc *,
+ struct nl_pstate *, bool);
+static int geneve_set_learn_nl(struct geneve_softc *, struct nl_pstate *,
+ bool);
+static int geneve_set_ftable_max_nl(struct geneve_softc *, struct nl_pstate *,
+ uint32_t);
+static int geneve_set_ftable_timeout_nl(struct geneve_softc *,
+ struct nl_pstate *, uint32_t);
+static int geneve_set_mc_if_nl(struct geneve_softc *, struct nl_pstate *,
+ char *);
+static int geneve_flush_ftable_nl(struct geneve_softc *, struct nl_pstate *,
+ bool);
+static void geneve_get_local_addr_nl(struct geneve_softc *, struct nl_writer *);
+static void geneve_get_remote_addr_nl(struct geneve_softc *, struct nl_writer *);
+
+static int geneve_ioctl_ifflags(struct geneve_softc *);
+static int geneve_ioctl(struct ifnet *, u_long, caddr_t);
+
+static uint16_t geneve_pick_source_port(struct geneve_softc *, struct mbuf *);
+static void geneve_encap_header(struct geneve_softc *, struct mbuf *,
+ int, uint16_t, uint16_t, uint16_t);
+static uint16_t geneve_get_ethertype(struct mbuf *);
+static int geneve_inherit_l3_hdr(struct mbuf *, struct geneve_softc *,
+ uint16_t, uint8_t *, uint8_t *, u_short *);
+static int geneve_encap4(struct geneve_softc *,
+ const union sockaddr_union *, struct mbuf *);
+static int geneve_encap6(struct geneve_softc *,
+ const union sockaddr_union *, struct mbuf *);
+static int geneve_transmit(struct ifnet *, struct mbuf *);
+static void geneve_qflush(struct ifnet *);
+static int geneve_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+static uint32_t geneve_map_etype_to_af(uint32_t);
+static bool geneve_udp_input(struct mbuf *, int, struct inpcb *,
+ const struct sockaddr *, void *);
+static int geneve_input_ether(struct geneve_softc *, struct mbuf **,
+ const struct sockaddr *, struct geneve_pkt_info *);
+static int geneve_input_inherit(struct geneve_softc *,
+ struct mbuf **, int, struct geneve_pkt_info *);
+static int geneve_next_option(struct geneve_socket *, struct genevehdr *,
+ struct mbuf **);
+static void geneve_input_csum(struct mbuf *m, struct ifnet *ifp,
+ counter_u64_t rxcsum);
+
+static void geneve_stats_alloc(struct geneve_softc *);
+static void geneve_stats_free(struct geneve_softc *);
+static void geneve_set_default_config(struct geneve_softc *);
+static int geneve_set_reqcap(struct geneve_softc *, struct ifnet *, int,
+ int);
+static void geneve_set_hwcaps(struct geneve_softc *);
+static int geneve_clone_create(struct if_clone *, char *, size_t,
+ struct ifc_data *, struct ifnet **);
+static int geneve_clone_destroy(struct if_clone *, struct ifnet *,
+ uint32_t);
+static int geneve_clone_create_nl(struct if_clone *, char *, size_t,
+ struct ifc_data_nl *);
+static int geneve_clone_modify_nl(struct ifnet *, struct ifc_data_nl *);
+static void geneve_clone_dump_nl(struct ifnet *, struct nl_writer *);
+
+static uint32_t geneve_mac_hash(struct geneve_softc *, const uint8_t *);
+static int geneve_media_change(struct ifnet *);
+static void geneve_media_status(struct ifnet *, struct ifmediareq *);
+
+static int geneve_sockaddr_cmp(const union sockaddr_union *,
+ const struct sockaddr *);
+static void geneve_sockaddr_copy(union sockaddr_union *,
+ const struct sockaddr *);
+static int geneve_sockaddr_in_equal(const union sockaddr_union *,
+ const struct sockaddr *);
+static void geneve_sockaddr_in_copy(union sockaddr_union *,
+ const struct sockaddr *);
+static int geneve_sockaddr_supported(const union sockaddr_union *, int);
+static int geneve_sockaddr_in_any(const union sockaddr_union *);
+
+static int geneve_can_change_config(struct geneve_softc *);
+static int geneve_check_proto(uint16_t);
+static int geneve_check_multicast_addr(const union sockaddr_union *);
+static int geneve_check_sockaddr(const union sockaddr_union *, const int);
+
+static int geneve_prison_remove(void *, void *);
+static void vnet_geneve_load(void);
+static void vnet_geneve_unload(void);
+static void geneve_module_init(void);
+static void geneve_module_deinit(void);
+static int geneve_modevent(module_t, int, void *);
+
+
+static const char geneve_name[] = "geneve";
+static MALLOC_DEFINE(M_GENEVE, geneve_name,
+ "Generic Network Virtualization Encapsulation Interface");
+#define MTAG_GENEVE_LOOP 0x93d66dc0 /* geneve mtag */
+
+VNET_DEFINE_STATIC(struct if_clone *, geneve_cloner);
+#define V_geneve_cloner VNET(geneve_cloner)
+
+static struct mtx geneve_list_mtx;
+#define GENEVE_LIST_LOCK() mtx_lock(&geneve_list_mtx)
+#define GENEVE_LIST_UNLOCK() mtx_unlock(&geneve_list_mtx)
+
+static LIST_HEAD(, geneve_socket) geneve_socket_list = LIST_HEAD_INITIALIZER(geneve_socket_list);
+
+/* Default maximum number of addresses in the forwarding table. */
+#define GENEVE_FTABLE_MAX 2000
+
+/* Timeout (in seconds) of addresses learned in the forwarding table. */
+#define GENEVE_FTABLE_TIMEOUT (20 * 60)
+
+/* Maximum timeout (in seconds) of addresses learned in the forwarding table. */
+#define GENEVE_FTABLE_MAX_TIMEOUT (60 * 60 * 24)
+
+/* Number of seconds between pruning attempts of the forwarding table. */
+#define GENEVE_FTABLE_PRUNE (5 * 60)
+
+static int geneve_ftable_prune_period = GENEVE_FTABLE_PRUNE;
+
+#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field)
+static const struct nlattr_parser nla_p_geneve_create[] = {
+ { .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(geneve_create_parser, nla_p_geneve_create);
+
+#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field)
+static const struct nlattr_parser nla_p_geneve[] = {
+ { .type = IFLA_GENEVE_ID, .off = _OUT(ifla_vni), .cb = nlattr_get_uint32 },
+ { .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
+ { .type = IFLA_GENEVE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip },
+ { .type = IFLA_GENEVE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip },
+ { .type = IFLA_GENEVE_LOCAL_PORT, .off = _OUT(ifla_local_port), .cb = nlattr_get_uint16 },
+ { .type = IFLA_GENEVE_PORT, .off = _OUT(ifla_remote_port), .cb = nlattr_get_uint16 },
+ { .type = IFLA_GENEVE_PORT_RANGE, .off = _OUT(ifla_port_range),
+ .arg = (void *)sizeof(struct ifla_geneve_port_range), .cb = nlattr_get_bytes },
+ { .type = IFLA_GENEVE_DF, .off = _OUT(ifla_df), .cb = nlattr_get_uint8 },
+ { .type = IFLA_GENEVE_TTL, .off = _OUT(ifla_ttl), .cb = nlattr_get_uint8 },
+ { .type = IFLA_GENEVE_TTL_INHERIT, .off = _OUT(ifla_ttl_inherit), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_DSCP_INHERIT, .off = _OUT(ifla_dscp_inherit), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_COLLECT_METADATA, .off = _OUT(ifla_external), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_LEARN, .off = _OUT(ifla_ftable_learn), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_FLUSH, .off = _OUT(ifla_ftable_flush), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_MAX, .off = _OUT(ifla_ftable_max), .cb = nlattr_get_uint32 },
+ { .type = IFLA_GENEVE_FTABLE_TIMEOUT, .off = _OUT(ifla_ftable_timeout), .cb = nlattr_get_uint32 },
+ { .type = IFLA_GENEVE_MC_IFNAME, .off = _OUT(ifla_mc_ifname), .cb = nlattr_get_string },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(geneve_modify_parser, nla_p_geneve);
+
+static const struct nlhdr_parser *all_parsers[] = {
+ &geneve_create_parser, &geneve_modify_parser,
+};
+
+static int
+geneve_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+ int i, d;
+
+ for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
+ d = (int)a[i] - (int)b[i];
+
+ return (d);
+}
+
+static void
+geneve_ftable_init(struct geneve_softc *sc)
+{
+ int i;
+
+ sc->gnv_ftable = malloc(sizeof(struct geneve_ftable_head) *
+ GENEVE_SC_FTABLE_SIZE, M_GENEVE, M_ZERO | M_WAITOK);
+
+ for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++)
+ LIST_INIT(&sc->gnv_ftable[i]);
+ sc->gnv_ftable_hash_key = arc4random();
+}
+
+static void
+geneve_ftable_fini(struct geneve_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+ KASSERT(LIST_EMPTY(&sc->gnv_ftable[i]),
+ ("%s: geneve %p ftable[%d] not empty", __func__, sc, i));
+ }
+ MPASS(sc->gnv_ftable_cnt == 0);
+
+ free(sc->gnv_ftable, M_GENEVE);
+ sc->gnv_ftable = NULL;
+}
+
+static void
+geneve_ftable_flush(struct geneve_softc *sc, int all)
+{
+ struct gnv_ftable_entry *fe, *tfe;
+
+ for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
+ if (all || GENEVE_FE_IS_DYNAMIC(fe))
+ geneve_ftable_entry_destroy(sc, fe);
+ }
+ }
+}
+
+static void
+geneve_ftable_expire(struct geneve_softc *sc)
+{
+ struct gnv_ftable_entry *fe, *tfe;
+
+ GENEVE_LOCK_WASSERT(sc);
+
+ for (int i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
+ if (GENEVE_FE_IS_DYNAMIC(fe) &&
+ time_uptime >= fe->gnvfe_expire)
+ geneve_ftable_entry_destroy(sc, fe);
+ }
+ }
+}
+
+static int
+geneve_ftable_update_locked(struct geneve_softc *sc,
+ const union sockaddr_union *unsa, const uint8_t *mac,
+ struct rm_priotracker *tracker)
+{
+ struct gnv_ftable_entry *fe;
+ int error;
+
+ GENEVE_LOCK_ASSERT(sc);
+
+again:
+ /*
+ * A forwarding entry for this MAC address might already exist. If
+ * so, update it, otherwise create a new one. We may have to upgrade
+ * the lock if we have to change or create an entry.
+ */
+ fe = geneve_ftable_entry_lookup(sc, mac);
+ if (fe != NULL) {
+ fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
+
+ if (!GENEVE_FE_IS_DYNAMIC(fe) ||
+ geneve_sockaddr_in_equal(&fe->gnvfe_raddr, &unsa->sa))
+ return (0);
+ if (!GENEVE_LOCK_WOWNED(sc)) {
+ GENEVE_RUNLOCK(sc, tracker);
+ GENEVE_WLOCK(sc);
+ sc->gnv_stats.ftable_lock_upgrade_failed++;
+ goto again;
+ }
+ geneve_sockaddr_in_copy(&fe->gnvfe_raddr, &unsa->sa);
+ return (0);
+ }
+
+ if (!GENEVE_LOCK_WOWNED(sc)) {
+ GENEVE_RUNLOCK(sc, tracker);
+ GENEVE_WLOCK(sc);
+ sc->gnv_stats.ftable_lock_upgrade_failed++;
+ goto again;
+ }
+
+ if (sc->gnv_ftable_cnt >= sc->gnv_ftable_max) {
+ sc->gnv_stats.ftable_nospace++;
+ return (ENOSPC);
+ }
+
+ fe = geneve_ftable_entry_alloc();
+ if (fe == NULL)
+ return (ENOMEM);
+
+ geneve_ftable_entry_init(sc, fe, mac, &unsa->sa, GENEVE_FE_FLAG_DYNAMIC);
+
+ /* The prior lookup failed, so the insert should not. */
+ error = geneve_ftable_entry_insert(sc, fe);
+ MPASS(error == 0);
+
+ return (error);
+}
+
+static int
+geneve_ftable_learn(struct geneve_softc *sc, const struct sockaddr *sa,
+ const uint8_t *mac)
+{
+ struct rm_priotracker tracker;
+ union sockaddr_union unsa;
+ int error;
+
+ /*
+ * The source port may be randomly selected by the remote host, so
+ * use the port of the default destination address.
+ */
+ geneve_sockaddr_copy(&unsa, sa);
+ unsa.sin.sin_port = sc->gnv_dst_addr.sin.sin_port;
+
+ if (unsa.sa.sa_family == AF_INET6) {
+ error = sa6_embedscope(&unsa.sin6, V_ip6_use_defzone);
+ if (error)
+ return (error);
+ }
+
+ GENEVE_RLOCK(sc, &tracker);
+ error = geneve_ftable_update_locked(sc, &unsa, mac, &tracker);
+ GENEVE_UNLOCK(sc, &tracker);
+
+ return (error);
+}
+
+static struct gnv_ftable_entry *
+geneve_ftable_entry_alloc(void)
+{
+ struct gnv_ftable_entry *fe;
+
+ fe = malloc(sizeof(*fe), M_GENEVE, M_ZERO | M_NOWAIT);
+
+ return (fe);
+}
+
+static void
+geneve_ftable_entry_free(struct gnv_ftable_entry *fe)
+{
+
+ free(fe, M_GENEVE);
+}
+
+static void
+geneve_ftable_entry_init(struct geneve_softc *sc, struct gnv_ftable_entry *fe,
+ const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
+{
+
+ fe->gnvfe_flags = flags;
+ fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
+ memcpy(fe->gnvfe_mac, mac, ETHER_ADDR_LEN);
+ geneve_sockaddr_copy(&fe->gnvfe_raddr, sa);
+}
+
+static void
+geneve_ftable_entry_destroy(struct geneve_softc *sc,
+ struct gnv_ftable_entry *fe)
+{
+
+ sc->gnv_ftable_cnt--;
+ LIST_REMOVE(fe, gnvfe_hash);
+ geneve_ftable_entry_free(fe);
+}
+
+static int
+geneve_ftable_entry_insert(struct geneve_softc *sc,
+ struct gnv_ftable_entry *fe)
+{
+ struct gnv_ftable_entry *lfe;
+ uint32_t hash;
+ int dir;
+
+ GENEVE_LOCK_WASSERT(sc);
+ hash = GENEVE_SC_FTABLE_HASH(sc, fe->gnvfe_mac);
+
+ lfe = LIST_FIRST(&sc->gnv_ftable[hash]);
+ if (lfe == NULL) {
+ LIST_INSERT_HEAD(&sc->gnv_ftable[hash], fe, gnvfe_hash);
+ goto out;
+ }
+
+ do {
+ dir = geneve_ftable_addr_cmp(fe->gnvfe_mac, lfe->gnvfe_mac);
+ if (dir == 0)
+ return (EEXIST);
+ if (dir > 0) {
+ LIST_INSERT_BEFORE(lfe, fe, gnvfe_hash);
+ goto out;
+ } else if (LIST_NEXT(lfe, gnvfe_hash) == NULL) {
+ LIST_INSERT_AFTER(lfe, fe, gnvfe_hash);
+ goto out;
+ } else
+ lfe = LIST_NEXT(lfe, gnvfe_hash);
+ } while (lfe != NULL);
+
+out:
+ sc->gnv_ftable_cnt++;
+
+ return (0);
+}
+
+static struct gnv_ftable_entry *
+geneve_ftable_entry_lookup(struct geneve_softc *sc, const uint8_t *mac)
+{
+ struct gnv_ftable_entry *fe;
+ uint32_t hash;
+ int dir;
+
+ GENEVE_LOCK_ASSERT(sc);
+
+ hash = GENEVE_SC_FTABLE_HASH(sc, mac);
+ LIST_FOREACH(fe, &sc->gnv_ftable[hash], gnvfe_hash) {
+ dir = geneve_ftable_addr_cmp(mac, fe->gnvfe_mac);
+ if (dir == 0)
+ return (fe);
+ if (dir > 0)
+ break;
+ }
+
+ return (NULL);
+}
+
+static struct geneve_socket *
+geneve_socket_alloc(union sockaddr_union *laddr)
+{
+ struct geneve_socket *gnvso;
+
+ gnvso = malloc(sizeof(*gnvso), M_GENEVE, M_WAITOK | M_ZERO);
+ rm_init(&gnvso->gnvso_lock, "genevesorm");
+ refcount_init(&gnvso->gnvso_refcnt, 0);
+ for (int i = 0; i < GENEVE_SO_VNI_HASH_SIZE; i++)
+ LIST_INIT(&gnvso->gnvso_vni_hash[i]);
+ gnvso->gnvso_laddr = *laddr;
+
+ return (gnvso);
+}
+
+static void
+geneve_socket_destroy(struct geneve_socket *gnvso)
+{
+ struct socket *so;
+
+ so = gnvso->gnvso_sock;
+ if (so != NULL) {
+ gnvso->gnvso_sock = NULL;
+ soclose(so);
+ }
+
+ rm_destroy(&gnvso->gnvso_lock);
+ free(gnvso, M_GENEVE);
+}
+
+static void
+geneve_socket_release(struct geneve_socket *gnvso)
+{
+ int destroy;
+
+ GENEVE_LIST_LOCK();
*** 3327 LINES SKIPPED ***