git: d10589584484 - main - LinuxKPI: import beginning of a new version of netdevice.h

From: Bjoern A. Zeeb <bz_at_FreeBSD.org>
Date: Mon, 27 Dec 2021 19:11:44 UTC
The branch main has been updated by bz:

URL: https://cgit.FreeBSD.org/src/commit/?id=d105895844848ddba85d686e03e0d14c03ad4eb4

commit d105895844848ddba85d686e03e0d14c03ad4eb4
Author:     Bjoern A. Zeeb <bz@FreeBSD.org>
AuthorDate: 2021-12-26 18:29:29 +0000
Commit:     Bjoern A. Zeeb <bz@FreeBSD.org>
CommitDate: 2021-12-27 18:47:25 +0000

    LinuxKPI: import beginning of a new version of netdevice.h
    
    Import a netdevice update complementing the last remaining bits of
    the old ifnet derived implementation.  Along add a (for now) task
    based NAPI implementation.
    
    This is the minimal set of chnages which are needed for the initial
    support of wireless drivers.  The NAPI implementation has an option to
    still switch to "direct dispatch" as it had been used by these drivers
    before not relying on a deferred context along with some printf tracing.
    This has been helpful in the last weeks for debugging and will be
    cleaned once we have had broader testing and are sure this is fine as-is.
    Should we need a more time-sensitive or load-sensitive response
    in the future we can always switch to something more sophisticated.
    
    Sponsored by:           The FreeBSD Foundation
    MFC after:              3 days
    X-Differential Revision: D33075 (abandoned without feedback a while ago)
---
 .../linuxkpi/common/include/linux/netdevice.h      | 240 ++++++++++-
 sys/compat/linuxkpi/common/src/linux_netdev.c      | 441 +++++++++++++++++++++
 sys/conf/files                                     |   2 +
 sys/modules/linuxkpi/Makefile                      |   1 +
 4 files changed, 678 insertions(+), 6 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/linux/netdevice.h b/sys/compat/linuxkpi/common/include/linux/netdevice.h
index 1de31b3fa156..f8c03f92b025 100644
--- a/sys/compat/linuxkpi/common/include/linux/netdevice.h
+++ b/sys/compat/linuxkpi/common/include/linux/netdevice.h
@@ -4,6 +4,11 @@
  * Copyright (c) 2010 Panasas, Inc.
  * Copyright (c) 2013-2019 Mellanox Technologies, Ltd.
  * All rights reserved.
+ * Copyright (c) 2020-2021 The FreeBSD Foundation
+ * Copyright (c) 2020-2021 Bjoern A. Zeeb
+ *
+ * Portions of this software were developed by Björn Zeeb
+ * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -28,20 +33,34 @@
  *
  * $FreeBSD$
  */
-#ifndef	_LINUX_NETDEVICE_H_
-#define	_LINUX_NETDEVICE_H_
+#ifndef	_LINUXKPI_LINUX_NETDEVICE_H
+#define	_LINUXKPI_LINUX_NETDEVICE_H
 
 #include <linux/types.h>
+#include <linux/netdev_features.h>
 
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
 #include <sys/socket.h>
+#include <sys/taskqueue.h>
 
 #include <net/if_types.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/device.h>
 #include <linux/net.h>
+#include <linux/if_ether.h>
 #include <linux/notifier.h>
+#include <linux/random.h>
+#include <linux/rcupdate.h>
 
 #ifdef VIMAGE
 #define	init_net *vnet0
@@ -49,13 +68,93 @@
 #define	init_net *((struct vnet *)0)
 #endif
 
+struct sk_buff;
+struct net_device;
+struct wireless_dev;		/* net/cfg80211.h */
+
 #define	MAX_ADDR_LEN		20
 
-#define	net_device	ifnet
+#define	NET_NAME_UNKNOWN	0
+
+enum netdev_tx {
+	NETDEV_TX_OK		= 0,
+};
+typedef	enum netdev_tx		netdev_tx_t;
+
+struct netdev_hw_addr {
+	struct list_head	addr_list;
+	uint8_t			addr[MAX_ADDR_LEN];
+};
+
+struct netdev_hw_addr_list {
+	struct list_head	addr_list;
+	int			count;
+};
+
+enum net_device_reg_state {
+	NETREG_DUMMY		= 1,
+	NETREG_REGISTERED,
+};
+
+struct net_device_ops {
+	int (*ndo_open)(struct net_device *);
+	int (*ndo_stop)(struct net_device *);
+	int (*ndo_set_mac_address)(struct net_device *,  void *);
+	netdev_tx_t (*ndo_start_xmit)(struct sk_buff *, struct net_device *);
+	void (*ndo_set_rx_mode)(struct net_device *);
+};
+
+struct net_device {
+	/* BSD specific for compat. */
+	struct ifnet			bsdifp;
+
+	/* net_device fields seen publicly. */
+	/* XXX can we later make some aliases to ifnet? */
+	char				name[IFNAMSIZ];
+	struct wireless_dev		*ieee80211_ptr;
+	uint8_t				dev_addr[ETH_ALEN];
+	struct netdev_hw_addr_list	mc;
+	netdev_features_t		features;
+	struct {
+		unsigned long		multicast;
+
+		unsigned long		rx_bytes;
+		unsigned long		rx_errors;
+		unsigned long		rx_packets;
+		unsigned long		tx_bytes;
+		unsigned long		tx_dropped;
+		unsigned long		tx_errors;
+		unsigned long		tx_packets;
+	} stats;
+	enum net_device_reg_state	reg_state;
+	const struct ethtool_ops	*ethtool_ops;
+	const struct net_device_ops	*netdev_ops;
 
-#define	rtnl_lock()
-#define	rtnl_unlock()
+	bool				needs_free_netdev;
+	/* Not properly typed as-of now. */
+	int	flags, type;
+	int	name_assign_type, needed_headroom;
 
+	void (*priv_destructor)(struct net_device *);
+
+	/* net_device internal. */
+	struct device			dev;
+
+	/*
+	 * In case we delete the net_device we need to be able to clear all
+	 * NAPI consumers.
+	 */
+	struct mtx			napi_mtx;
+	TAILQ_HEAD(, napi_struct)	napi_head;
+	struct taskqueue		*napi_tq;
+
+	/* Must stay last. */
+	uint8_t				drv_priv[0] __aligned(CACHE_LINE_SIZE);
+};
+
+#define	SET_NETDEV_DEV(_ndev, _dev)	(_ndev)->dev.parent = _dev;
+
+/* -------------------------------------------------------------------------- */
 /* According to linux::ipoib_main.c. */
 struct netdev_notifier_info {
 	struct net_device	*dev;
@@ -79,4 +178,133 @@ int	register_inetaddr_notifier(struct notifier_block *);
 int	unregister_netdevice_notifier(struct notifier_block *);
 int	unregister_inetaddr_notifier(struct notifier_block *);
 
-#endif	/* _LINUX_NETDEVICE_H_ */
+/* -------------------------------------------------------------------------- */
+
+#define	NAPI_POLL_WEIGHT			64	/* budget */
+
+struct napi_struct {
+	TAILQ_ENTRY(napi_struct)	entry;
+
+	struct list_head	rx_list;
+	struct net_device	*dev;
+	int			(*poll)(struct napi_struct *, int);
+	int			budget;
+	int			rx_count;
+
+	/*
+	 * These flags mostly need to be checked/changed atomically
+	 * (multiple together in some cases).
+	 */
+	volatile unsigned long	_flags;
+
+	/* FreeBSD internal. */
+	/* Use task for now, so we can easily switch between direct and task. */
+	struct task		napi_task;
+};
+
+void linuxkpi_init_dummy_netdev(struct net_device *);
+void linuxkpi_netif_napi_add(struct net_device *, struct napi_struct *,
+    int(*napi_poll)(struct napi_struct *, int), int);
+void linuxkpi_netif_napi_del(struct napi_struct *);
+bool linuxkpi_napi_schedule_prep(struct napi_struct *);
+void linuxkpi___napi_schedule(struct napi_struct *);
+void linuxkpi_napi_schedule(struct napi_struct *);
+void linuxkpi_napi_reschedule(struct napi_struct *);
+bool linuxkpi_napi_complete_done(struct napi_struct *, int);
+bool linuxkpi_napi_complete(struct napi_struct *);
+void linuxkpi_napi_disable(struct napi_struct *);
+void linuxkpi_napi_enable(struct napi_struct *);
+void linuxkpi_napi_synchronize(struct napi_struct *);
+
+#define	init_dummy_netdev(_n)						\
+	linuxkpi_init_dummy_netdev(_n)
+#define	netif_napi_add(_nd, _ns, _p, _b)				\
+	linuxkpi_netif_napi_add(_nd, _ns, _p, _b)
+#define	netif_napi_del(_n)						\
+	linuxkpi_netif_napi_del(_n)
+#define	napi_schedule_prep(_n)						\
+	linuxkpi_napi_schedule_prep(_n)
+#define	__napi_schedule(_n)						\
+	linuxkpi___napi_schedule(_n)
+#define	napi_schedule(_n)						\
+	linuxkpi_napi_schedule(_n)
+#define	napi_reschedule(_n)						\
+	linuxkpi_napi_reschedule(_n)
+#define	napi_complete_done(_n, _r)					\
+	linuxkpi_napi_complete_done(_n, _r)
+#define	napi_complete(_n)						\
+	linuxkpi_napi_complete(_n)
+#define	napi_disable(_n)						\
+	linuxkpi_napi_disable(_n)
+#define	napi_enable(_n)							\
+	linuxkpi_napi_enable(_n)
+#define	napi_synchronize(_n)						\
+	linuxkpi_napi_synchronize(_n)
+
+/* -------------------------------------------------------------------------- */
+
+static inline void
+netdev_rss_key_fill(uint32_t *buf, size_t len)
+{
+
+	/*
+	 * Remembering from a previous life there was discussions on what is
+	 * a good RSS hash key.  See end of rss_init() in net/rss_config.c.
+	 * iwlwifi is looking for a 10byte "secret" so stay with random for now.
+	 */
+	get_random_bytes(buf, len);
+}
+
+static inline int
+netdev_hw_addr_list_count(struct netdev_hw_addr_list *list)
+{
+
+	return (list->count);
+}
+
+static inline int
+netdev_mc_count(struct net_device *ndev)
+{
+
+	return (netdev_hw_addr_list_count(&ndev->mc));
+}
+
+#define	netdev_hw_addr_list_for_each(_addr, _list)			\
+	list_for_each_entry((_addr), &(_list)->addr_list, addr_list)
+
+#define	netdev_for_each_mc_addr(na, ndev)				\
+	netdev_hw_addr_list_for_each(na, &(ndev)->mc)
+
+static __inline void
+synchronize_net(void)
+{
+
+	/* We probably cannot do that unconditionally at some point anymore. */
+	synchronize_rcu();
+}
+
+/* -------------------------------------------------------------------------- */
+
+struct net_device *linuxkpi_alloc_netdev(size_t, const char *, uint32_t,
+    void(*)(struct net_device *));
+void linuxkpi_free_netdev(struct net_device *);
+
+#define	alloc_netdev(_l, _n, _f, _func)						\
+	linuxkpi_alloc_netdev(_l, _n, _f, _func)
+#define	free_netdev(_n)								\
+	linuxkpi_free_netdev(_n)
+
+static inline void *
+netdev_priv(const struct net_device *ndev)
+{
+
+	return (__DECONST(void *, ndev->drv_priv));
+}
+
+/* -------------------------------------------------------------------------- */
+/* This is really rtnetlink and probably belongs elsewhere. */
+
+#define	rtnl_lock()		do { } while(0)
+#define	rtnl_unlock()		do { } while(0)
+
+#endif	/* _LINUXKPI_LINUX_NETDEVICE_H */
diff --git a/sys/compat/linuxkpi/common/src/linux_netdev.c b/sys/compat/linuxkpi/common/src/linux_netdev.c
new file mode 100644
index 000000000000..27e29b40ea44
--- /dev/null
+++ b/sys/compat/linuxkpi/common/src/linux_netdev.c
@@ -0,0 +1,441 @@
+/*-
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * This software was developed by Björn Zeeb under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+
+MALLOC_DEFINE(M_NETDEV, "lkpindev", "Linux KPI netdevice compat");
+
+#define	NAPI_LOCK_INIT(_ndev)		\
+    mtx_init(&(_ndev)->napi_mtx, "napi_mtx", NULL, MTX_DEF)
+#define	NAPI_LOCK_DESTROY(_ndev)	mtx_destroy(&(_ndev)->napi_mtx)
+#define	NAPI_LOCK_ASSERT(_ndev)		mtx_assert(&(_ndev)->napi_mtx, MA_OWNED)
+#define	NAPI_LOCK(_ndev)		mtx_lock(&(_ndev)->napi_mtx)
+#define	NAPI_UNLOCK(_ndev)		mtx_unlock(&(_ndev)->napi_mtx)
+
+/* -------------------------------------------------------------------------- */
+/* Do not schedule new things while we are waiting to clear things. */
+#define	LKPI_NAPI_FLAG_DISABLE_PENDING				0
+/* To synchronise that only one poll is ever running. */
+#define	LKPI_NAPI_FLAG_IS_SCHEDULED				1
+/* If trying to schedule while poll is running. Need to re-schedule. */
+#define	LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN			2
+/* When shutting down forcefully prevent anything from running task/poll. */
+#define	LKPI_NAPI_FLAG_SHUTDOWN					3
+
+#define LKPI_NAPI_FLAGS \
+        "\20\1DISABLE_PENDING\2IS_SCHEDULED\3LOST_RACE_TRY_AGAIN"
+
+/* #define	NAPI_DEBUG */
+#ifdef NAPI_DEBUG
+static int debug_napi;
+SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug_napi, CTLFLAG_RWTUN,
+    &debug_napi, 0, "NAPI debug level");
+
+#define	DNAPI_TODO		0x01
+#define	DNAPI_IMPROVE		0x02
+#define	DNAPI_TRACE		0x10
+#define	DNAPI_TRACE_TASK	0x20
+#define	DNAPI_DIRECT_DISPATCH	0x1000
+
+#define	NAPI_TRACE(_n)		if (debug_napi & DNAPI_TRACE)		\
+    printf("NAPI_TRACE %s:%d %u %p (%#jx %b)\n", __func__, __LINE__,	\
+	(unsigned int)ticks, _n, (uintmax_t)(_n)->_flags,		\
+	(int)(_n)->_flags, LKPI_NAPI_FLAGS)
+#define	NAPI_TRACE2D(_n, _d)	if (debug_napi & DNAPI_TRACE)		\
+    printf("NAPI_TRACE %s:%d %u %p (%#jx %b) %d\n", __func__, __LINE__, \
+	(unsigned int)ticks, _n, (uintmax_t)(_n)->_flags,		\
+	(int)(_n)->_flags, LKPI_NAPI_FLAGS, _d)
+#define	NAPI_TRACE_TASK(_n, _p, _c) if (debug_napi & DNAPI_TRACE_TASK)	\
+    printf("NAPI_TRACE %s:%d %u %p (%#jx %b) pending %d count %d "	\
+	"rx_count %d\n", __func__, __LINE__,				\
+	(unsigned int)ticks, _n, (uintmax_t)(_n)->_flags,		\
+	(int)(_n)->_flags, LKPI_NAPI_FLAGS, _p, _c, (_n)->rx_count)
+#define	NAPI_TODO()		if (debug_napi & DNAPI_TODO)		\
+    printf("NAPI_TODO %s:%d %d\n", __func__, __LINE__, ticks)
+#define	NAPI_IMPROVE()		if (debug_napi & DNAPI_IMPROVE)		\
+    printf("NAPI_IMPROVE %s:%d %d\n", __func__, __LINE__, ticks)
+
+#define	NAPI_DIRECT_DISPATCH()	((debug_napi & DNAPI_DIRECT_DISPATCH) != 0)
+#else
+#define	NAPI_TRACE(_n)			do { } while(0)
+#define	NAPI_TRACE2D(_n, _d)		do { } while(0)
+#define	NAPI_TRACE_TASK(_n, _p, _c)	do { } while(0)
+#define	NAPI_TODO()			do { } while(0)
+#define	NAPI_IMPROVE()			do { } while(0)
+
+#define	NAPI_DIRECT_DISPATCH()		(0)
+#endif
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Check if a poll is running or can run and and if the latter
+ * make us as running.  That way we ensure that only one poll
+ * can only ever run at the same time.  Returns true if no poll
+ * was scheduled yet.
+ */
+bool
+linuxkpi_napi_schedule_prep(struct napi_struct *napi)
+{
+	unsigned long old, new;
+
+	NAPI_TRACE(napi);
+
+	/* Can can only update/return if all flags agree. */
+	do {
+		old = READ_ONCE(napi->_flags);
+
+		/* If we are stopping, cannot run again. */
+		if ((old & BIT(LKPI_NAPI_FLAG_DISABLE_PENDING)) != 0) {
+			NAPI_TRACE(napi);
+			return (false);
+		}
+
+		new = old;
+		/* We were already scheduled. Need to try again? */
+		if ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) != 0)
+			new |= BIT(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN);
+		new |= BIT(LKPI_NAPI_FLAG_IS_SCHEDULED);
+
+	} while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0);
+
+	NAPI_TRACE(napi);
+        return ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) == 0);
+}
+
+static void
+lkpi___napi_schedule_dd(struct napi_struct *napi)
+{
+	unsigned long old, new;
+	int rc;
+
+	rc = 0;
+again:
+	NAPI_TRACE2D(napi, rc);
+	if (napi->poll != NULL)
+		rc = napi->poll(napi, napi->budget);
+	napi->rx_count += rc;
+
+	/* Check if interrupts are still disabled, more work to do. */
+	/* Bandaid for now. */
+	if (rc >= napi->budget)
+		goto again;
+
+	/* Bandaid for now. */
+	if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->_flags))
+		goto again;
+
+	do {
+		new = old = READ_ONCE(napi->_flags);
+		clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new);
+		clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new);
+	} while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0);
+
+	NAPI_TRACE2D(napi, rc);
+}
+
+void
+linuxkpi___napi_schedule(struct napi_struct *napi)
+{
+	int rc;
+
+	NAPI_TRACE(napi);
+	if (test_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags)) {
+		clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->_flags);
+		clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags);
+		NAPI_TRACE(napi);
+		return;
+	}
+
+	if (NAPI_DIRECT_DISPATCH()) {
+		lkpi___napi_schedule_dd(napi);
+	} else {
+		rc = taskqueue_enqueue(napi->dev->napi_tq, &napi->napi_task);
+		NAPI_TRACE2D(napi, rc);
+		if (rc != 0) {
+			/* Should we assert EPIPE? */
+			return;
+		}
+	}
+}
+
+void
+linuxkpi_napi_schedule(struct napi_struct *napi)
+{
+
+	NAPI_TRACE(napi);
+
+	/*
+	 * iwlwifi calls this sequence instead of napi_schedule()
+	 * to be able to test the prep result.
+	 */
+	if (napi_schedule_prep(napi))
+		__napi_schedule(napi);
+}
+
+void
+linuxkpi_napi_reschedule(struct napi_struct *napi)
+{
+
+	NAPI_TRACE(napi);
+
+	/* Not sure what is different to napi_schedule yet. */
+	if (napi_schedule_prep(napi))
+		__napi_schedule(napi);
+}
+
+bool
+linuxkpi_napi_complete_done(struct napi_struct *napi, int ret)
+{
+	unsigned long old, new;
+
+	NAPI_TRACE(napi);
+	if (NAPI_DIRECT_DISPATCH())
+		return (true);
+
+	do {
+		new = old = READ_ONCE(napi->_flags);
+
+		/*
+		 * If we lost a race before, we need to re-schedule.
+		 * Leave IS_SCHEDULED set essentially doing "_prep".
+		 */
+		if (!test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old))
+			clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new);
+		clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new);
+	} while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0);
+
+	NAPI_TRACE(napi);
+
+	/* Someone tried to schedule while poll was running. Re-sched. */
+	if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) {
+		__napi_schedule(napi);
+		return (false);
+	}
+
+	return (true);
+}
+
+bool
+linuxkpi_napi_complete(struct napi_struct *napi)
+{
+
+	NAPI_TRACE(napi);
+	return (napi_complete_done(napi, 0));
+}
+
+void
+linuxkpi_napi_disable(struct napi_struct *napi)
+{
+	NAPI_TRACE(napi);
+	set_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->_flags);
+	while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags))
+		pause_sbt("napidslp", SBT_1MS, 0, C_HARDCLOCK);
+	clear_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->_flags);
+}
+
+void
+linuxkpi_napi_enable(struct napi_struct *napi)
+{
+
+	NAPI_TRACE(napi);
+	KASSERT(!test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags),
+	    ("%s: enabling napi %p already scheduled\n", __func__, napi));
+	mb();
+	/* Let us be scheduled. */
+	clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags);
+}
+
+void
+linuxkpi_napi_synchronize(struct napi_struct *napi)
+{
+	NAPI_TRACE(napi);
+#if defined(SMP)
+	/* Check & sleep while a napi is scheduled. */
+	while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags))
+		pause_sbt("napisslp", SBT_1MS, 0, C_HARDCLOCK);
+#else
+	mb();
+#endif
+}
+
+/* -------------------------------------------------------------------------- */
+
+static void
+lkpi_napi_task(void *ctx, int pending)
+{
+	struct napi_struct *napi;
+	int count;
+
+	KASSERT(ctx != NULL, ("%s: napi %p, pending %d\n",
+	    __func__, ctx, pending));
+	napi = ctx;
+	KASSERT(napi->poll != NULL, ("%s: napi %p poll is NULL\n",
+	    __func__, napi));
+
+	NAPI_TRACE_TASK(napi, pending, napi->budget);
+	count = napi->poll(napi, napi->budget);
+	napi->rx_count += count;
+	NAPI_TRACE_TASK(napi, pending, count);
+
+	/*
+	 * We must not check against count < pending here.  There are situations
+	 * when a driver may "poll" and we may not have any work to do and that
+	 * would make us re-schedule ourseless for ever.
+	 */
+	if (count >= napi->budget) {
+		/*
+		 * Have to re-schedule ourselves.  napi_complete() was not run
+		 * in this case which means we are still SCHEDULED.
+		 * In order to queue another task we have to directly call
+		 * __napi_schedule() without _prep() in the way.
+		 */
+		__napi_schedule(napi);
+	}
+}
+
+/* -------------------------------------------------------------------------- */
+
+void
+linuxkpi_netif_napi_add(struct net_device *ndev, struct napi_struct *napi,
+    int(*napi_poll)(struct napi_struct *, int), int budget)
+{
+
+	napi->dev = ndev;
+	napi->poll = napi_poll;
+	napi->budget = budget;
+
+	INIT_LIST_HEAD(&napi->rx_list);
+	napi->rx_count = 0;
+
+	TASK_INIT(&napi->napi_task, 0, lkpi_napi_task, napi);
+
+	NAPI_LOCK(ndev);
+	TAILQ_INSERT_TAIL(&ndev->napi_head, napi, entry);
+	NAPI_UNLOCK(ndev);
+
+	/* Anything else to do on the ndev? */
+	clear_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags);
+}
+
+static void
+lkpi_netif_napi_del_locked(struct napi_struct *napi)
+{
+	struct net_device *ndev;
+
+	ndev = napi->dev;
+	NAPI_LOCK_ASSERT(ndev);
+
+	set_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags);
+	TAILQ_REMOVE(&ndev->napi_head, napi, entry);
+	while (taskqueue_cancel(ndev->napi_tq, &napi->napi_task, NULL) != 0)
+		taskqueue_drain(ndev->napi_tq, &napi->napi_task);
+}
+
+void
+linuxkpi_netif_napi_del(struct napi_struct *napi)
+{
+	struct net_device *ndev;
+
+	ndev = napi->dev;
+	NAPI_LOCK(ndev);
+	lkpi_netif_napi_del_locked(napi);
+	NAPI_UNLOCK(ndev);
+}
+
+/* -------------------------------------------------------------------------- */
+
+void
+linuxkpi_init_dummy_netdev(struct net_device *ndev)
+{
+
+	memset(ndev, 0, sizeof(*ndev));
+
+	ndev->reg_state = NETREG_DUMMY;
+	NAPI_LOCK_INIT(ndev);
+	TAILQ_INIT(&ndev->napi_head);
+	/* Anything else? */
+
+	ndev->napi_tq = taskqueue_create("tq_ndev_napi", M_WAITOK,
+	    taskqueue_thread_enqueue, &ndev->napi_tq);
+	/* One thread for now. */
+	(void) taskqueue_start_threads(&ndev->napi_tq, 1, PWAIT,
+	    "ndev napi taskq");
+}
+
+struct net_device *
+linuxkpi_alloc_netdev(size_t len, const char *name, uint32_t flags,
+    void(*setup_func)(struct net_device *))
+{
+	struct net_device *ndev;
+
+	ndev = malloc(sizeof(*ndev) + len, M_NETDEV, M_NOWAIT);
+	if (ndev == NULL)
+		return (ndev);
+
+	/* Always first as it zeros! */
+	linuxkpi_init_dummy_netdev(ndev);
+
+	strlcpy(ndev->name, name, sizeof(*ndev->name));
+
+	/* This needs extending as we support more. */
+
+	setup_func(ndev);
+
+	return (ndev);
+}
+
+void
+linuxkpi_free_netdev(struct net_device *ndev)
+{
+	struct napi_struct *napi, *temp;
+
+	NAPI_LOCK(ndev);
+	TAILQ_FOREACH_SAFE(napi, &ndev->napi_head, entry, temp) {
+		lkpi_netif_napi_del_locked(napi);
+	}
+	NAPI_UNLOCK(ndev);
+
+	taskqueue_free(ndev->napi_tq);
+	ndev->napi_tq = NULL;
+	NAPI_LOCK_DESTROY(ndev);
+
+	/* This needs extending as we support more. */
+
+	free(ndev, M_NETDEV);
+}
diff --git a/sys/conf/files b/sys/conf/files
index 6a809efe75d1..a9525d8beab3 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4552,6 +4552,8 @@ compat/linuxkpi/common/src/linux_kthread.c	optional compat_linuxkpi \
 	compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_lock.c		optional compat_linuxkpi \
 	compile-with "${LINUXKPI_C}"
+compat/linuxkpi/common/src/linux_netdev.c	optional compat_linuxkpi \
+	compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_page.c		optional compat_linuxkpi \
 	compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_pci.c		optional compat_linuxkpi pci \
diff --git a/sys/modules/linuxkpi/Makefile b/sys/modules/linuxkpi/Makefile
index 41778cf948fd..4d15ac9fa962 100644
--- a/sys/modules/linuxkpi/Makefile
+++ b/sys/modules/linuxkpi/Makefile
@@ -15,6 +15,7 @@ SRCS=	linux_compat.c \
 	linux_kmod.c \
 	linux_kthread.c \
 	linux_lock.c \
+	linux_netdev.c \
 	linux_page.c \
 	linux_pci.c \
 	linux_radix.c \