svn commit: r314843 - in head/sys: compat/linuxkpi/common/include/linux compat/linuxkpi/common/src conf modules/linuxkpi

Tue Mar 7 12:09:16 UTC 2017

Author: hselasky
Date: Tue Mar  7 12:09:14 2017
New Revision: 314843
URL: https://svnweb.freebsd.org/changeset/base/314843

Log:
  LinuxKPI workqueue cleanup.
  
  This change makes the workqueue implementation behave more like in
  Linux, both functionality wise and structure wise.
  
  All workqueue code has been moved to linux_work.c
  
  Add an atomic based statemachine to the work_struct to ensure proper
  operation. Prior to this change struct_work was directly mapped to a
  FreeBSD task. When a taskqueue has multiple threads the same task may
  end up being executed on more than one worker thread simultaneously.
  This might cause problems with code coming from Linux, which expects
  serial behaviour, similar to Linux tasklets.
  
  Move all global workqueue function names into the linux_xxx domain to
  avoid symbol name clashes in the future.
  
  Implement a few more workqueue related functions and macros.
  
  Create two multithreaded taskqueues for the LinuxKPI during module
  load, one for time-consuming callbacks and one for non-time consuming
  callbacks.
  
  MFC after:		1 week
  Sponsored by:		Mellanox Technologies

Added:
  head/sys/compat/linuxkpi/common/src/linux_work.c   (contents, props changed)
Modified:
  head/sys/compat/linuxkpi/common/include/linux/workqueue.h
  head/sys/compat/linuxkpi/common/src/linux_compat.c
  head/sys/conf/files
  head/sys/modules/linuxkpi/Makefile

Modified: head/sys/compat/linuxkpi/common/include/linux/workqueue.h
==============================================================================

--- head/sys/compat/linuxkpi/common/include/linux/workqueue.h	Tue Mar  7 09:18:52 2017	(r314842)
+++ head/sys/compat/linuxkpi/common/include/linux/workqueue.h	Tue Mar  7 12:09:14 2017	(r314843)
@@ -2,7 +2,7 @@
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
- * Copyright (c) 2013-2015 Mellanox Technologies, Ltd.
+ * Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -38,179 +38,189 @@
 
 #include <asm/atomic.h>
 
+#include <sys/param.h>
+#include <sys/kernel.h>
 #include <sys/taskqueue.h>
+#include <sys/mutex.h>
+
+#define	WORK_CPU_UNBOUND MAXCPU
+#define	WQ_UNBOUND (1 << 0)
+#define	WQ_HIGHPRI (1 << 1)
+
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *);
+
+struct work_exec {
+	TAILQ_ENTRY(work_exec) entry;
+	struct work_struct *target;
+};
 
 struct workqueue_struct {
-	struct taskqueue	*taskqueue;
-	atomic_t		draining;
+	struct taskqueue *taskqueue;
+	struct mtx exec_mtx;
+	TAILQ_HEAD(, work_exec) exec_head;
+	atomic_t draining;
 };
 
+#define	WQ_EXEC_LOCK(wq) mtx_lock(&(wq)->exec_mtx)
+#define	WQ_EXEC_UNLOCK(wq) mtx_unlock(&(wq)->exec_mtx)
+
 struct work_struct {
-	struct	task 		work_task;
-	struct	taskqueue	*taskqueue;
-	void			(*fn)(struct work_struct *);
+	struct task work_task;
+	struct workqueue_struct *work_queue;
+	work_func_t func;
+	atomic_t state;
 };
 
-typedef __typeof(((struct work_struct *)0)->fn) work_func_t;
+#define	DECLARE_WORK(name, fn) \
+	struct work_struct name = { .func = (fn) }
 
 struct delayed_work {
-	struct work_struct	work;
-	struct callout		timer;
+	struct work_struct work;
+	struct {
+		struct callout callout;
+		struct mtx mtx;
+		int	expires;
+	} timer;
 };
 
-extern void linux_work_fn(void *, int);
-extern void linux_flush_fn(void *, int);
-extern void linux_delayed_work_fn(void *);
-extern struct workqueue_struct *linux_create_workqueue_common(const char *, int);
-extern void destroy_workqueue(struct workqueue_struct *);
+#define	DECLARE_DELAYED_WORK(name, fn)					\
+	struct delayed_work name;					\
+	static void name##_init(void *arg)				\
+	{								\
+		linux_init_delayed_work(&name, fn);			\
+	}								\
+	SYSINIT(name, SI_SUB_LOCK, SI_ORDER_SECOND, name##_init, NULL)
 
 static inline struct delayed_work *
 to_delayed_work(struct work_struct *work)
 {
-
- 	return container_of(work, struct delayed_work, work);
+	return (container_of(work, struct delayed_work, work));
 }
 
-#define	INIT_WORK(work, func) 	 					\
+#define	INIT_WORK(work, fn) 	 					\
 do {									\
-	(work)->fn = (func);						\
-	(work)->taskqueue = NULL;					\
-	TASK_INIT(&(work)->work_task, 0, linux_work_fn, (work));		\
+	(work)->func = (fn);						\
+	(work)->work_queue = NULL;					\
+	atomic_set(&(work)->state, 0);					\
+	TASK_INIT(&(work)->work_task, 0, linux_work_fn, (work));	\
 } while (0)
 
-#define	INIT_DELAYED_WORK(_work, func)					\
-do {									\
-	INIT_WORK(&(_work)->work, func);				\
-	callout_init(&(_work)->timer, 1);				\
-} while (0)
+#define	INIT_WORK_ONSTACK(work, fn) \
+	INIT_WORK(work, fn)
 
-#define	INIT_DEFERRABLE_WORK(...) INIT_DELAYED_WORK(__VA_ARGS__)
+#define	INIT_DELAYED_WORK(dwork, fn) \
+	linux_init_delayed_work(dwork, fn)
 
-#define	schedule_work(work)						\
-do {									\
-	(work)->taskqueue = taskqueue_thread;				\
-	taskqueue_enqueue(taskqueue_thread, &(work)->work_task);	\
-} while (0)
+#define	INIT_DEFERRABLE_WORK(dwork, fn) \
+	INIT_DELAYED_WORK(dwork, fn)
 
-#define	flush_scheduled_work()	flush_taskqueue(taskqueue_thread)
+#define	flush_scheduled_work() \
+	taskqueue_drain_all(system_wq->taskqueue)
 
-static inline int
-queue_work(struct workqueue_struct *wq, struct work_struct *work)
-{
-	work->taskqueue = wq->taskqueue;
-	/* Check for draining */
-	if (atomic_read(&wq->draining) != 0)
-		return (!work->work_task.ta_pending);
-	/* Return opposite value to align with Linux logic */
-	return (!taskqueue_enqueue(wq->taskqueue, &work->work_task));
-}
+#define	queue_work(wq, work) \
+	linux_queue_work_on(WORK_CPU_UNBOUND, wq, work)
 
-static inline int
-queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work,
-    unsigned long delay)
-{
-	int pending;
+#define	schedule_work(work) \
+	linux_queue_work_on(WORK_CPU_UNBOUND, system_wq, work)
 
-	work->work.taskqueue = wq->taskqueue;
-	if (atomic_read(&wq->draining) != 0) {
-	  	pending = work->work.work_task.ta_pending;
-	} else if (delay != 0) {
-		pending = work->work.work_task.ta_pending;
-		callout_reset(&work->timer, delay, linux_delayed_work_fn, work);
-	} else {
-		callout_stop(&work->timer);
-		pending = taskqueue_enqueue(work->work.taskqueue,
-		    &work->work.work_task);
-	}
-	return (!pending);
-}
+#define	queue_delayed_work(wq, dwork, delay) \
+	linux_queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay)
 
-static inline bool
-schedule_delayed_work(struct delayed_work *dwork,
-    unsigned long delay)
-{
-	struct workqueue_struct wq;
+#define	schedule_delayed_work_on(cpu, dwork, delay) \
+	linux_queue_delayed_work_on(cpu, system_wq, dwork, delay)
 
-	wq.taskqueue = taskqueue_thread;
-	atomic_set(&wq.draining, 0);
-	return (queue_delayed_work(&wq, dwork, delay));
-}
+#define	queue_work_on(cpu, wq, work) \
+	linux_queue_work_on(cpu, wq, work)
 
-#define	create_singlethread_workqueue(name)				\
+#define	schedule_delayed_work(dwork, delay) \
+	linux_queue_delayed_work_on(WORK_CPU_UNBOUND, system_wq, dwork, delay)
+
+#define	queue_delayed_work_on(cpu, wq, dwork, delay) \
+	linux_queue_delayed_work_on(cpu, wq, dwork, delay)
+
+#define	create_singlethread_workqueue(name) \
 	linux_create_workqueue_common(name, 1)
 
-#define	create_workqueue(name)						\
-	linux_create_workqueue_common(name, MAXCPU)
+#define	create_workqueue(name) \
+	linux_create_workqueue_common(name, mp_ncpus)
 
-#define	alloc_ordered_workqueue(name, flags)				\
+#define	alloc_ordered_workqueue(name, flags) \
 	linux_create_workqueue_common(name, 1)
 
-#define	alloc_workqueue(name, flags, max_active)			\
+#define	alloc_workqueue(name, flags, max_active) \
 	linux_create_workqueue_common(name, max_active)
 
-#define	flush_workqueue(wq)	flush_taskqueue((wq)->taskqueue)
+#define	flush_workqueue(wq) \
+	taskqueue_drain_all((wq)->taskqueue)
 
-static inline void
-flush_taskqueue(struct taskqueue *tq)
-{
-	struct task flushtask;
+#define	drain_workqueue(wq) do {		\
+	atomic_inc(&(wq)->draining);		\
+	taskqueue_drain_all((wq)->taskqueue);	\
+	atomic_dec(&(wq)->draining);		\
+} while (0)
 
-	PHOLD(curproc);
-	TASK_INIT(&flushtask, 0, linux_flush_fn, NULL);
-	taskqueue_enqueue(tq, &flushtask);
-	taskqueue_drain(tq, &flushtask);
-	PRELE(curproc);
-}
+#define	mod_delayed_work(wq, dwork, delay) ({		\
+	bool __retval;					\
+	__retval = linux_cancel_delayed_work(dwork);	\
+	linux_queue_delayed_work_on(WORK_CPU_UNBOUND,	\
+	    wq, dwork, delay);				\
+	__retval;					\
+})
 
-static inline void
-drain_workqueue(struct workqueue_struct *wq)
-{
-	atomic_inc(&wq->draining);
-	flush_taskqueue(wq->taskqueue);
-	atomic_dec(&wq->draining);
-}
+#define	delayed_work_pending(dwork) \
+	linux_work_pending(&(dwork)->work)
 
-static inline int
-cancel_work_sync(struct work_struct *work)
-{
-	if (work->taskqueue &&
-	    taskqueue_cancel(work->taskqueue, &work->work_task, NULL))
-		taskqueue_drain(work->taskqueue, &work->work_task);
-	return 0;
-}
+#define	cancel_delayed_work(dwork) \
+	linux_cancel_delayed_work(dwork)
 
-/*
- * This may leave work running on another CPU as it does on Linux.
- */
-static inline int
-cancel_delayed_work(struct delayed_work *work)
-{
+#define	cancel_work_sync(work) \
+	linux_cancel_work_sync(work)
 
-	callout_stop(&work->timer);
-	if (work->work.taskqueue)
-		return (taskqueue_cancel(work->work.taskqueue,
-		    &work->work.work_task, NULL) == 0);
-	return 0;
-}
+#define	cancel_delayed_work_sync(dwork) \
+	linux_cancel_delayed_work_sync(dwork)
 
-static inline int
-cancel_delayed_work_sync(struct delayed_work *work)
-{
+#define	flush_work(work) \
+	linux_flush_work(work)
 
-        callout_drain(&work->timer);
-        if (work->work.taskqueue &&
-            taskqueue_cancel(work->work.taskqueue, &work->work.work_task, NULL))
-                taskqueue_drain(work->work.taskqueue, &work->work.work_task);
-        return 0;
-}
+#define	flush_delayed_work(dwork) \
+	linux_flush_delayed_work(dwork)
 
-static inline bool
-mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
-    unsigned long delay)
-{
-	cancel_delayed_work(dwork);
-	queue_delayed_work(wq, dwork, delay);
-	return false;
-}
+#define	work_pending(work) \
+	linux_work_pending(work)
+
+#define	work_busy(work) \
+	linux_work_busy(work)
+
+#define	destroy_work_on_stack(work) \
+	do { } while (0)
+
+#define	destroy_delayed_work_on_stack(dwork) \
+	do { } while (0)
+
+#define	destroy_workqueue(wq) \
+	linux_destroy_workqueue(wq)
+
+/* prototypes */
+
+extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_long_wq;
+extern struct workqueue_struct *system_unbound_wq;
+extern struct workqueue_struct *system_power_efficient_wq;
+
+extern void linux_init_delayed_work(struct delayed_work *, work_func_t);
+extern void linux_work_fn(void *, int);
+extern struct workqueue_struct *linux_create_workqueue_common(const char *, int);
+extern void linux_destroy_workqueue(struct workqueue_struct *);
+extern bool linux_queue_work_on(int cpu, struct workqueue_struct *, struct work_struct *);
+extern bool linux_queue_delayed_work_on(int cpu, struct workqueue_struct *,
+    struct delayed_work *, unsigned delay);
+extern bool linux_cancel_delayed_work(struct delayed_work *);
+extern bool linux_cancel_work_sync(struct work_struct *);
+extern bool linux_cancel_delayed_work_sync(struct delayed_work *);
+extern bool linux_flush_work(struct work_struct *);
+extern bool linux_flush_delayed_work(struct delayed_work *);
+extern bool linux_work_pending(struct work_struct *);
+extern bool linux_work_busy(struct work_struct *);
 
-#endif	/* _LINUX_WORKQUEUE_H_ */
+#endif					/* _LINUX_WORKQUEUE_H_ */

Modified: head/sys/compat/linuxkpi/common/src/linux_compat.c
==============================================================================
--- head/sys/compat/linuxkpi/common/src/linux_compat.c	Tue Mar  7 09:18:52 2017	(r314842)
+++ head/sys/compat/linuxkpi/common/src/linux_compat.c	Tue Mar  7 12:09:14 2017	(r314843)
@@ -68,7 +68,6 @@ __FBSDID("$FreeBSD$");
 #include <linux/vmalloc.h>
 #include <linux/netdevice.h>
 #include <linux/timer.h>
-#include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
@@ -1164,50 +1163,6 @@ linux_completion_done(struct completion 
 	return (isdone);
 }
 
-void
-linux_delayed_work_fn(void *arg)
-{
-	struct delayed_work *work;
-
-	work = arg;
-	taskqueue_enqueue(work->work.taskqueue, &work->work.work_task);
-}
-
-void
-linux_work_fn(void *context, int pending)
-{
-	struct work_struct *work;
-
-	work = context;
-	work->fn(work);
-}
-
-void
-linux_flush_fn(void *context, int pending)
-{
-}
-
-struct workqueue_struct *
-linux_create_workqueue_common(const char *name, int cpus)
-{
-	struct workqueue_struct *wq;
-
-	wq = kmalloc(sizeof(*wq), M_WAITOK);
-	wq->taskqueue = taskqueue_create(name, M_WAITOK,
-	    taskqueue_thread_enqueue,  &wq->taskqueue);
-	atomic_set(&wq->draining, 0);
-	taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name);
-
-	return (wq);
-}
-
-void
-destroy_workqueue(struct workqueue_struct *wq)
-{
-	taskqueue_free(wq->taskqueue);
-	kfree(wq);
-}
-
 static void
 linux_cdev_release(struct kobject *kobj)
 {

Added: head/sys/compat/linuxkpi/common/src/linux_work.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/compat/linuxkpi/common/src/linux_work.c	Tue Mar  7 12:09:14 2017	(r314843)
@@ -0,0 +1,563 @@
+/*-
+ * Copyright (c) 2017 Hans Petter Selasky
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/compat.h>
+#include <linux/spinlock.h>
+
+#include <sys/kernel.h>
+
+/*
+ * Define all work struct states
+ */
+enum {
+	WORK_ST_IDLE,			/* idle - not started */
+	WORK_ST_TIMER,			/* timer is being started */
+	WORK_ST_TASK,			/* taskqueue is being queued */
+	WORK_ST_EXEC,			/* callback is being called */
+	WORK_ST_CANCEL,			/* cancel is being requested */
+	WORK_ST_MAX,
+};
+
+/*
+ * Define global workqueues
+ */
+static struct workqueue_struct *linux_system_short_wq;
+static struct workqueue_struct *linux_system_long_wq;
+
+struct workqueue_struct *system_wq;
+struct workqueue_struct *system_long_wq;
+struct workqueue_struct *system_unbound_wq;
+struct workqueue_struct *system_power_efficient_wq;
+
+static void linux_delayed_work_timer_fn(void *);
+
+/*
+ * This function atomically updates the work state and returns the
+ * previous state at the time of update.
+ */
+static const uint8_t
+linux_update_state(atomic_t *v, const uint8_t *pstate)
+{
+	int c, old;
+
+	c = v->counter;
+
+	while ((old = atomic_cmpxchg(v, c, pstate[c])) != c)
+		c = old;
+
+	return (c);
+}
+
+/*
+ * A LinuxKPI task is allowed to free itself inside the callback function
+ * and cannot safely be referred after the callback function has
+ * completed. This function gives the linux_work_fn() function a hint,
+ * that the task is not going away and can have its state checked
+ * again. Without this extra hint LinuxKPI tasks cannot be serialized
+ * accross multiple worker threads.
+ */
+static const bool
+linux_work_exec_unblock(struct work_struct *work)
+{
+	struct workqueue_struct *wq;
+	struct work_exec *exec;
+	bool retval = 0;
+
+	wq = work->work_queue;
+	if (unlikely(wq == NULL))
+		goto done;
+
+	WQ_EXEC_LOCK(wq);
+	TAILQ_FOREACH(exec, &wq->exec_head, entry) {
+		if (exec->target == work) {
+			exec->target = NULL;
+			retval = 1;
+			break;
+		}
+	}
+	WQ_EXEC_UNLOCK(wq);
+done:
+	return (retval);
+}
+
+static void
+linux_delayed_work_enqueue(struct delayed_work *dwork)
+{
+	struct taskqueue *tq;
+
+	tq = dwork->work.work_queue->taskqueue;
+	taskqueue_enqueue(tq, &dwork->work.work_task);
+}
+
+/*
+ * This function queues the given work structure on the given
+ * workqueue. It returns non-zero if the work was successfully
+ * [re-]queued. Else the work is already pending for completion.
+ */
+bool
+linux_queue_work_on(int cpu __unused, struct workqueue_struct *wq,
+    struct work_struct *work)
+{
+	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
+		[WORK_ST_IDLE] = WORK_ST_TASK,		/* start queuing task */
+		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* NOP */
+		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
+		[WORK_ST_EXEC] = WORK_ST_TASK,		/* queue task another time */
+		[WORK_ST_CANCEL] = WORK_ST_TASK,	/* start queuing task again */
+	};
+
+	if (atomic_read(&wq->draining) != 0)
+		return (!work_pending(work));
+
+	switch (linux_update_state(&work->state, states)) {
+	case WORK_ST_EXEC:
+	case WORK_ST_CANCEL:
+		if (linux_work_exec_unblock(work) != 0)
+			return (1);
+		/* FALLTHROUGH */
+	case WORK_ST_IDLE:
+		work->work_queue = wq;
+		taskqueue_enqueue(wq->taskqueue, &work->work_task);
+		return (1);
+	default:
+		return (0);		/* already on a queue */
+	}
+}
+
+/*
+ * This function queues the given work structure on the given
+ * workqueue after a given delay in ticks. It returns non-zero if the
+ * work was successfully [re-]queued. Else the work is already pending
+ * for completion.
+ */
+bool
+linux_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+    struct delayed_work *dwork, unsigned delay)
+{
+	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
+		[WORK_ST_IDLE] = WORK_ST_TIMER,		/* start timeout */
+		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* NOP */
+		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
+		[WORK_ST_EXEC] = WORK_ST_TIMER,		/* start timeout */
+		[WORK_ST_CANCEL] = WORK_ST_TIMER,	/* start timeout */
+	};
+
+	if (atomic_read(&wq->draining) != 0)
+		return (!work_pending(&dwork->work));
+
+	switch (linux_update_state(&dwork->work.state, states)) {
+	case WORK_ST_EXEC:
+	case WORK_ST_CANCEL:
+		if (delay == 0 && linux_work_exec_unblock(&dwork->work) != 0) {
+			dwork->timer.expires = jiffies;
+			return (1);
+		}
+		/* FALLTHROUGH */
+	case WORK_ST_IDLE:
+		dwork->work.work_queue = wq;
+		dwork->timer.expires = jiffies + delay;
+
+		if (delay == 0) {
+			linux_delayed_work_enqueue(dwork);
+		} else if (unlikely(cpu != WORK_CPU_UNBOUND)) {
+			mtx_lock(&dwork->timer.mtx);
+			callout_reset_on(&dwork->timer.callout, delay,
+			    &linux_delayed_work_timer_fn, dwork, cpu);
+			mtx_unlock(&dwork->timer.mtx);
+		} else {
+			mtx_lock(&dwork->timer.mtx);
+			callout_reset(&dwork->timer.callout, delay,
+			    &linux_delayed_work_timer_fn, dwork);
+			mtx_unlock(&dwork->timer.mtx);
+		}
+		return (1);
+	default:
+		return (0);		/* already on a queue */
+	}
+}
+
+void
+linux_work_fn(void *context, int pending)
+{
+	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
+		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
+		[WORK_ST_TIMER] = WORK_ST_EXEC,		/* delayed work w/o timeout */
+		[WORK_ST_TASK] = WORK_ST_EXEC,		/* call callback */
+		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* complete callback */
+		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* complete cancel */
+	};
+	struct work_struct *work;
+	struct workqueue_struct *wq;
+	struct work_exec exec;
+
+	linux_set_current(curthread);
+
+	/* setup local variables */
+	work = context;
+	wq = work->work_queue;
+
+	/* store target pointer */
+	exec.target = work;
+
+	/* insert executor into list */
+	WQ_EXEC_LOCK(wq);
+	TAILQ_INSERT_TAIL(&wq->exec_head, &exec, entry);
+	while (1) {
+		switch (linux_update_state(&work->state, states)) {
+		case WORK_ST_TIMER:
+		case WORK_ST_TASK:
+			WQ_EXEC_UNLOCK(wq);
+
+			/* call work function */
+			work->func(work);
+
+			WQ_EXEC_LOCK(wq);
+			/* check if unblocked */
+			if (exec.target != work) {
+				/* reapply block */
+				exec.target = work;
+				break;
+			}
+			/* FALLTHROUGH */
+		default:
+			goto done;
+		}
+	}
+done:
+	/* remove executor from list */
+	TAILQ_REMOVE(&wq->exec_head, &exec, entry);
+	WQ_EXEC_UNLOCK(wq);
+}
+
+static void
+linux_delayed_work_timer_fn(void *arg)
+{
+	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
+		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
+		[WORK_ST_TIMER] = WORK_ST_TASK,		/* start queueing task */
+		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
+		[WORK_ST_EXEC] = WORK_ST_TASK,		/* queue task another time */
+		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* complete cancel */
+	};
+	struct delayed_work *dwork = arg;
+
+	switch (linux_update_state(&dwork->work.state, states)) {
+	case WORK_ST_TIMER:
+		linux_delayed_work_enqueue(dwork);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * This function cancels the given work structure in a synchronous
+ * fashion. It returns non-zero if the work was successfully
+ * cancelled. Else the work was already cancelled.
+ */
+bool
+linux_cancel_work_sync(struct work_struct *work)
+{
+	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
+		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
+		[WORK_ST_TIMER] = WORK_ST_IDLE,		/* idle */
+		[WORK_ST_TASK] = WORK_ST_IDLE,		/* idle */
+		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* idle */
+		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* idle */
+	};
+	struct taskqueue *tq;
+
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
+	    "linux_cancel_work_sync() might sleep");
+
+	switch (linux_update_state(&work->state, states)) {
+	case WORK_ST_IDLE:
+		return (0);
+	default:
+		tq = work->work_queue->taskqueue;
+		if (taskqueue_cancel(tq, &work->work_task, NULL) != 0)
+			taskqueue_drain(tq, &work->work_task);
+		return (1);
+	}
+}
+
+/*
+ * This function atomically stops the timer and callback. The timer
+ * callback will not be called after this function returns. This
+ * functions returns true when the timeout was cancelled. Else the
+ * timeout was not started or has already been called.
+ */
+static inline bool
+linux_cancel_timer(struct delayed_work *dwork, bool drain)
+{
+	bool cancelled;
+
+	mtx_lock(&dwork->timer.mtx);
+	cancelled = (callout_stop(&dwork->timer.callout) == 1);
+	mtx_unlock(&dwork->timer.mtx);
+
+	/* check if we should drain */
+	if (drain)
+		callout_drain(&dwork->timer.callout);
+	return (cancelled);
+}
+
+/*
+ * This function cancels the given delayed work structure in a
+ * non-blocking fashion. It returns non-zero if the work was
+ * successfully cancelled. Else the work may still be busy or already
+ * cancelled.
+ */
+bool
+linux_cancel_delayed_work(struct delayed_work *dwork)
+{
+	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
+		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
+		[WORK_ST_TIMER] = WORK_ST_CANCEL,	/* cancel */
+		[WORK_ST_TASK] = WORK_ST_CANCEL,	/* cancel */
+		[WORK_ST_EXEC] = WORK_ST_CANCEL,	/* cancel */
+		[WORK_ST_CANCEL] = WORK_ST_CANCEL,	/* cancel */
+	};
+	struct taskqueue *tq;
+
+	switch (linux_update_state(&dwork->work.state, states)) {
+	case WORK_ST_TIMER:
+		if (linux_cancel_timer(dwork, 0))
+			return (1);
+		/* FALLTHROUGH */
+	case WORK_ST_TASK:
+	case WORK_ST_EXEC:
+		tq = dwork->work.work_queue->taskqueue;
+		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) == 0)
+			return (1);
+		/* FALLTHROUGH */
+	default:
+		return (0);
+	}
+}
+
+/*
+ * This function cancels the given work structure in a synchronous
+ * fashion. It returns non-zero if the work was successfully
+ * cancelled. Else the work was already cancelled.
+ */
+bool
+linux_cancel_delayed_work_sync(struct delayed_work *dwork)
+{
+	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
+		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
+		[WORK_ST_TIMER] = WORK_ST_IDLE,		/* idle */
+		[WORK_ST_TASK] = WORK_ST_IDLE,		/* idle */
+		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* idle */
+		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* idle */
+	};
+	struct taskqueue *tq;
+
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
+	    "linux_cancel_delayed_work_sync() might sleep");
+
+	switch (linux_update_state(&dwork->work.state, states)) {
+	case WORK_ST_IDLE:
+		return (0);
+	case WORK_ST_TIMER:
+		if (linux_cancel_timer(dwork, 1)) {
+			/*
+			 * Make sure taskqueue is also drained before
+			 * returning:
+			 */
+			tq = dwork->work.work_queue->taskqueue;
+			taskqueue_drain(tq, &dwork->work.work_task);
+			return (1);
+		}
+		/* FALLTHROUGH */
+	default:
+		tq = dwork->work.work_queue->taskqueue;
+		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0)
+			taskqueue_drain(tq, &dwork->work.work_task);
+		return (1);
+	}
+}
+
+/*
+ * This function waits until the given work structure is completed.
+ * It returns non-zero if the work was successfully
+ * waited for. Else the work was not waited for.
+ */
+bool
+linux_flush_work(struct work_struct *work)
+{
+	struct taskqueue *tq;
+
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
+	    "linux_flush_work() might sleep");
+
+	switch (atomic_read(&work->state)) {
+	case WORK_ST_IDLE:
+		return (0);
+	default:
+		tq = work->work_queue->taskqueue;
+		taskqueue_drain(tq, &work->work_task);
+		return (1);
+	}
+}
+
+/*
+ * This function waits until the given delayed work structure is
+ * completed. It returns non-zero if the work was successfully waited
+ * for. Else the work was not waited for.
+ */
+bool
+linux_flush_delayed_work(struct delayed_work *dwork)
+{
+	struct taskqueue *tq;
+
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
+	    "linux_flush_delayed_work() might sleep");
+
+	switch (atomic_read(&dwork->work.state)) {
+	case WORK_ST_IDLE:
+		return (0);
+	case WORK_ST_TIMER:
+		if (linux_cancel_timer(dwork, 1))
+			linux_delayed_work_enqueue(dwork);
+		/* FALLTHROUGH */
+	default:
+		tq = dwork->work.work_queue->taskqueue;
+		taskqueue_drain(tq, &dwork->work.work_task);
+		return (1);
+	}
+}
+
+/*
+ * This function returns true if the given work is pending, and not
+ * yet executing:
+ */
+bool
+linux_work_pending(struct work_struct *work)
+{
+	switch (atomic_read(&work->state)) {
+	case WORK_ST_TIMER:
+	case WORK_ST_TASK:
+		return (1);
+	default:
+		return (0);
+	}
+}
+
+/*
+ * This function returns true if the given work is busy.
+ */
+bool
+linux_work_busy(struct work_struct *work)
+{
+	struct taskqueue *tq;
+
+	switch (atomic_read(&work->state)) {
+	case WORK_ST_IDLE:
+		return (0);
+	case WORK_ST_EXEC:
+	case WORK_ST_CANCEL:
+		tq = work->work_queue->taskqueue;
+		return (taskqueue_poll_is_busy(tq, &work->work_task));
+	default:
+		return (1);
+	}
+}
+
+struct workqueue_struct *
+linux_create_workqueue_common(const char *name, int cpus)
+{
+	struct workqueue_struct *wq;
+
+	wq = kmalloc(sizeof(*wq), M_WAITOK | M_ZERO);
+	wq->taskqueue = taskqueue_create(name, M_WAITOK,
+	    taskqueue_thread_enqueue, &wq->taskqueue);
+	atomic_set(&wq->draining, 0);
+	taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name);
+	TAILQ_INIT(&wq->exec_head);
+	mtx_init(&wq->exec_mtx, "linux_wq_exec", NULL, MTX_DEF);
+
+	return (wq);
+}
+
+void
+linux_destroy_workqueue(struct workqueue_struct *wq)
+{
+	atomic_inc(&wq->draining);
+	drain_workqueue(wq);
+	taskqueue_free(wq->taskqueue);
+	mtx_destroy(&wq->exec_mtx);
+	kfree(wq);
+}
+
+void
+linux_init_delayed_work(struct delayed_work *dwork, work_func_t func)
+{
+	memset(dwork, 0, sizeof(*dwork));
+	INIT_WORK(&dwork->work, func);
+	mtx_init(&dwork->timer.mtx, spin_lock_name("lkpi-dwork"), NULL,
+	    MTX_DEF | MTX_NOWITNESS);
+	callout_init_mtx(&dwork->timer.callout, &dwork->timer.mtx, 0);
+}
+
+static void
+linux_work_init(void *arg)
+{
+	int max_wq_cpus = mp_ncpus + 1;
+
+	/* avoid deadlock when there are too few threads */
+	if (max_wq_cpus < 4)
+		max_wq_cpus = 4;
+
+	linux_system_short_wq = alloc_workqueue("linuxkpi_short_wq", 0, max_wq_cpus);
+	linux_system_long_wq = alloc_workqueue("linuxkpi_long_wq", 0, max_wq_cpus);
+
+	/* populate the workqueue pointers */
+	system_long_wq = linux_system_long_wq;
+	system_wq = linux_system_short_wq;
+	system_power_efficient_wq = linux_system_short_wq;
+	system_unbound_wq = linux_system_short_wq;
+}
+SYSINIT(linux_work_init, SI_SUB_LOCK, SI_ORDER_SECOND, linux_work_init, NULL);
+
+static void
+linux_work_uninit(void *arg)
+{
+	destroy_workqueue(linux_system_short_wq);
+	destroy_workqueue(linux_system_long_wq);
+
+	/* clear workqueue pointers */
+	system_long_wq = NULL;
+	system_wq = NULL;
+	system_power_efficient_wq = NULL;
+	system_unbound_wq = NULL;
+}
+SYSUNINIT(linux_work_uninit, SI_SUB_LOCK, SI_ORDER_SECOND, linux_work_uninit, NULL);

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Tue Mar  7 09:18:52 2017	(r314842)
+++ head/sys/conf/files	Tue Mar  7 12:09:14 2017	(r314843)
@@ -4298,6 +4298,8 @@ compat/linuxkpi/common/src/linux_rcu.c		
 	compile-with "${LINUXKPI_C} -I$S/contrib/ck/include"
 compat/linuxkpi/common/src/linux_usb.c		optional compat_linuxkpi usb \
 	compile-with "${LINUXKPI_C}"
+compat/linuxkpi/common/src/linux_work.c		optional compat_linuxkpi \
+	compile-with "${LINUXKPI_C}"
 
 # OpenFabrics Enterprise Distribution (Infiniband)
 ofed/drivers/infiniband/core/addr.c		optional ofed		\

Modified: head/sys/modules/linuxkpi/Makefile
==============================================================================
--- head/sys/modules/linuxkpi/Makefile	Tue Mar  7 09:18:52 2017	(r314842)
+++ head/sys/modules/linuxkpi/Makefile	Tue Mar  7 12:09:14 2017	(r314843)
@@ -12,7 +12,8 @@ SRCS=	linux_kmod.c \
 	linux_rcu.c \
 	linux_tasklet.c \
 	linux_idr.c \
-	linux_usb.c
+	linux_usb.c \
+	linux_work.c
 
 SRCS+=	bus_if.h \
 	device_if.h \