git: ec25b6fa5ffd - main - LinuxKPI: Reimplement irq_work queue on top of fast taskqueue
Emmanuel Vadot
manu at FreeBSD.org
Sun Jan 17 11:47:44 UTC 2021
The branch main has been updated by manu:
URL: https://cgit.FreeBSD.org/src/commit/?id=ec25b6fa5ffde89f202c2caa77268ed1eed12158
commit ec25b6fa5ffde89f202c2caa77268ed1eed12158
Author: Vladimir Kondratyev <wulf at FreeBSD.org>
AuthorDate: 2021-01-17 11:21:49 +0000
Commit: Emmanuel Vadot <manu at FreeBSD.org>
CommitDate: 2021-01-17 11:47:28 +0000
LinuxKPI: Reimplement irq_work queue on top of fast taskqueue
Summary:
Linux's irq_work queue was created for asynchronous execution of code from contexts where spin_lock's are not available like "hardware interrupt context". FreeBSD's fast taskqueues was created for the same purposes.
Drm-kmod 5.4 uses irq_work_queue() at least in one place to schedule execution of task/work from the critical section that triggers following INVARIANTS-induced panic:
```
panic: acquiring blockable sleep lock with spinlock or critical section held (sleep mutex) linuxkpi_short_wq @ /usr/src/sys/kern/subr_taskqueue.c:281
cpuid = 6
time = 1605048416
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe006b538c90
vpanic() at vpanic+0x182/frame 0xfffffe006b538ce0
panic() at panic+0x43/frame 0xfffffe006b538d40
witness_checkorder() at witness_checkorder+0xf3e/frame 0xfffffe006b538f00
__mtx_lock_flags() at __mtx_lock_flags+0x94/frame 0xfffffe006b538f50
taskqueue_enqueue() at taskqueue_enqueue+0x42/frame 0xfffffe006b538f70
linux_queue_work_on() at linux_queue_work_on+0xe9/frame 0xfffffe006b538fb0
irq_work_queue() at irq_work_queue+0x21/frame 0xfffffe006b538fd0
semaphore_notify() at semaphore_notify+0xb2/frame 0xfffffe006b539020
__i915_sw_fence_notify() at __i915_sw_fence_notify+0x2e/frame 0xfffffe006b539050
__i915_sw_fence_complete() at __i915_sw_fence_complete+0x63/frame 0xfffffe006b539080
i915_sw_fence_complete() at i915_sw_fence_complete+0x8e/frame 0xfffffe006b5390c0
dma_i915_sw_fence_wake() at dma_i915_sw_fence_wake+0x4f/frame 0xfffffe006b539100
dma_fence_signal_locked() at dma_fence_signal_locked+0x105/frame 0xfffffe006b539180
dma_fence_signal() at dma_fence_signal+0x72/frame 0xfffffe006b5391c0
dma_fence_is_signaled() at dma_fence_is_signaled+0x80/frame 0xfffffe006b539200
dma_resv_add_shared_fence() at dma_resv_add_shared_fence+0xb3/frame 0xfffffe006b539270
i915_vma_move_to_active() at i915_vma_move_to_active+0x18a/frame 0xfffffe006b5392b0
eb_move_to_gpu() at eb_move_to_gpu+0x3ad/frame 0xfffffe006b539320
eb_submit() at eb_submit+0x15/frame 0xfffffe006b539350
i915_gem_do_execbuffer() at i915_gem_do_execbuffer+0x7d4/frame 0xfffffe006b539570
i915_gem_execbuffer2_ioctl() at i915_gem_execbuffer2_ioctl+0x1c1/frame 0xfffffe006b539600
drm_ioctl_kernel() at drm_ioctl_kernel+0xd9/frame 0xfffffe006b539670
drm_ioctl() at drm_ioctl+0x5cd/frame 0xfffffe006b539820
linux_file_ioctl() at linux_file_ioctl+0x323/frame 0xfffffe006b539880
kern_ioctl() at kern_ioctl+0x1f4/frame 0xfffffe006b5398f0
sys_ioctl() at sys_ioctl+0x12a/frame 0xfffffe006b5399c0
amd64_syscall() at amd64_syscall+0x121/frame 0xfffffe006b539af0
fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe006b539af0
--- syscall (54, FreeBSD ELF64, sys_ioctl), rip = 0x800a6f09a, rsp = 0x7fffffffe588, rbp = 0x7fffffffe640 ---
KDB: enter: panic
```
Here, the dma_resv_add_shared_fence() performs a critical_enter() and following call of schedule_work() from semaphore_notify() triggers 'acquiring blockable sleep lock with spinlock or critical section held' panic.
Switching irq_work implementation to fast taskqueue fixes the panic for me.
Other report with the similar bug: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=247166
Reviewed By: hselasky
Differential Revision: https://reviews.freebsd.org/D27171
---
.../linuxkpi/common/include/linux/irq_work.h | 25 ++++-
sys/compat/linuxkpi/common/include/linux/llist.h | 101 +++++++++++++++++++++
sys/compat/linuxkpi/common/include/linux/slab.h | 16 ++++
sys/compat/linuxkpi/common/src/linux_slab.c | 27 ++++++
sys/compat/linuxkpi/common/src/linux_work.c | 48 ++++++++++
5 files changed, 212 insertions(+), 5 deletions(-)
diff --git a/sys/compat/linuxkpi/common/include/linux/irq_work.h b/sys/compat/linuxkpi/common/include/linux/irq_work.h
index b44e78230b0d..eb1798a4e450 100644
--- a/sys/compat/linuxkpi/common/include/linux/irq_work.h
+++ b/sys/compat/linuxkpi/common/include/linux/irq_work.h
@@ -31,22 +31,37 @@
#ifndef __LINUX_IRQ_WORK_H__
#define __LINUX_IRQ_WORK_H__
-#include <linux/workqueue.h>
+#include <sys/param.h>
+#include <sys/taskqueue.h>
+
+struct irq_work;
+typedef void (*irq_work_func_t)(struct irq_work *);
struct irq_work {
- struct work_struct work;
+ struct task irq_task;
+ irq_work_func_t func;
};
+extern struct taskqueue *linux_irq_work_tq;
+
+#define DEFINE_IRQ_WORK(name, _func) struct irq_work name = { \
+ .irq_task = TASK_INITIALIZER(0, linux_irq_work_fn, &(name)), \
+ .func = (_func), \
+}
+
+void linux_irq_work_fn(void *, int);
+
static inline void
-init_irq_work(struct irq_work *irqw, void (*func)(struct irq_work *))
+init_irq_work(struct irq_work *irqw, irq_work_func_t func)
{
- INIT_WORK(&irqw->work, (work_func_t)func);
+ TASK_INIT(&irqw->irq_task, 0, linux_irq_work_fn, irqw);
+ irqw->func = func;
}
static inline void
irq_work_queue(struct irq_work *irqw)
{
- schedule_work(&irqw->work);
+ taskqueue_enqueue(linux_irq_work_tq, &irqw->irq_task);
}
#endif /* __LINUX_IRQ_WORK_H__ */
diff --git a/sys/compat/linuxkpi/common/include/linux/llist.h b/sys/compat/linuxkpi/common/include/linux/llist.h
new file mode 100644
index 000000000000..b3c89516e710
--- /dev/null
+++ b/sys/compat/linuxkpi/common/include/linux/llist.h
@@ -0,0 +1,101 @@
+/* Public domain. */
+
+#ifndef _LINUX_LLIST_H
+#define _LINUX_LLIST_H
+
+#include <sys/types.h>
+#include <machine/atomic.h>
+
+struct llist_node {
+ struct llist_node *next;
+};
+
+struct llist_head {
+ struct llist_node *first;
+};
+
+#define LLIST_HEAD_INIT(name) { NULL }
+#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name)
+
+#define llist_entry(ptr, type, member) \
+ ((ptr) ? container_of(ptr, type, member) : NULL)
+
+static inline struct llist_node *
+llist_del_all(struct llist_head *head)
+{
+ return ((void *)atomic_readandclear_ptr((uintptr_t *)&head->first));
+}
+
+static inline struct llist_node *
+llist_del_first(struct llist_head *head)
+{
+ struct llist_node *first, *next;
+
+ do {
+ first = head->first;
+ if (first == NULL)
+ return NULL;
+ next = first->next;
+ } while (atomic_cmpset_ptr((uintptr_t *)&head->first,
+ (uintptr_t)first, (uintptr_t)next) == 0);
+
+ return (first);
+}
+
+static inline bool
+llist_add(struct llist_node *new, struct llist_head *head)
+{
+ struct llist_node *first;
+
+ do {
+ new->next = first = head->first;
+ } while (atomic_cmpset_ptr((uintptr_t *)&head->first,
+ (uintptr_t)first, (uintptr_t)new) == 0);
+
+ return (first == NULL);
+}
+
+static inline bool
+llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+ struct llist_head *head)
+{
+ struct llist_node *first;
+
+ do {
+ new_last->next = first = head->first;
+ } while (atomic_cmpset_ptr((uintptr_t *)&head->first,
+ (uintptr_t)first, (uintptr_t)new_first) == 0);
+
+ return (first == NULL);
+}
+
+static inline void
+init_llist_head(struct llist_head *head)
+{
+ head->first = NULL;
+}
+
+static inline bool
+llist_empty(struct llist_head *head)
+{
+ return (head->first == NULL);
+}
+
+#define llist_for_each_safe(pos, n, node) \
+ for ((pos) = (node); \
+ (pos) != NULL && \
+ ((n) = (pos)->next, pos); \
+ (pos) = (n))
+
+#define llist_for_each_entry_safe(pos, n, node, member) \
+ for (pos = llist_entry((node), __typeof(*pos), member); \
+ pos != NULL && \
+ (n = llist_entry(pos->member.next, __typeof(*pos), member), pos); \
+ pos = n)
+
+#define llist_for_each_entry(pos, node, member) \
+ for ((pos) = llist_entry((node), __typeof(*(pos)), member); \
+ (pos) != NULL; \
+ (pos) = llist_entry((pos)->member.next, __typeof(*(pos)), member))
+
+#endif
diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h
index ae1c9d81843e..0cd748b7ecb9 100644
--- a/sys/compat/linuxkpi/common/include/linux/slab.h
+++ b/sys/compat/linuxkpi/common/include/linux/slab.h
@@ -35,10 +35,12 @@
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/limits.h>
+#include <sys/proc.h>
#include <vm/uma.h>
#include <linux/types.h>
#include <linux/gfp.h>
+#include <linux/llist.h>
MALLOC_DECLARE(M_KMALLOC);
@@ -90,6 +92,19 @@ struct linux_kmem_cache {
#define ARCH_KMALLOC_MINALIGN \
__alignof(unsigned long long)
+/*
+ * Critical section-friendly version of kfree().
+ * Requires knowledge of the allocation size at build time.
+ */
+#define kfree_async(ptr) do { \
+ _Static_assert(sizeof(*(ptr)) >= sizeof(struct llist_node), \
+ "Size of object to free is unknown or too small"); \
+ if (curthread->td_critnest != 0) \
+ linux_kfree_async(ptr); \
+ else \
+ kfree(ptr); \
+} while (0)
+
static inline gfp_t
linux_check_m_flags(gfp_t flags)
{
@@ -189,5 +204,6 @@ linux_kmem_cache_free(struct linux_kmem_cache *c, void *m)
}
extern void linux_kmem_cache_destroy(struct linux_kmem_cache *);
+void linux_kfree_async(void *);
#endif /* _LINUX_SLAB_H_ */
diff --git a/sys/compat/linuxkpi/common/src/linux_slab.c b/sys/compat/linuxkpi/common/src/linux_slab.c
index c8deab01731a..3304c34b1dee 100644
--- a/sys/compat/linuxkpi/common/src/linux_slab.c
+++ b/sys/compat/linuxkpi/common/src/linux_slab.c
@@ -30,6 +30,11 @@ __FBSDID("$FreeBSD$");
#include <linux/slab.h>
#include <linux/rcupdate.h>
#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
+
+#include <sys/param.h>
+#include <sys/taskqueue.h>
struct linux_kmem_rcu {
struct rcu_head rcu_head;
@@ -44,6 +49,8 @@ struct linux_kmem_rcu {
((void *)((char *)(r) + sizeof(struct linux_kmem_rcu) - \
(r)->cache->cache_size))
+static LLIST_HEAD(linux_kfree_async_list);
+
static int
linux_kmem_ctor(void *mem, int size, void *arg, int flags)
{
@@ -126,3 +133,23 @@ linux_kmem_cache_destroy(struct linux_kmem_cache *c)
uma_zdestroy(c->cache_zone);
free(c, M_KMALLOC);
}
+
+static void
+linux_kfree_async_fn(void *context, int pending)
+{
+ struct llist_node *freed;
+
+ while((freed = llist_del_first(&linux_kfree_async_list)) != NULL)
+ kfree(freed);
+}
+static struct task linux_kfree_async_task =
+ TASK_INITIALIZER(0, linux_kfree_async_fn, &linux_kfree_async_task);
+
+void
+linux_kfree_async(void *addr)
+{
+ if (addr == NULL)
+ return;
+ llist_add(addr, &linux_kfree_async_list);
+ taskqueue_enqueue(linux_irq_work_tq, &linux_kfree_async_task);
+}
diff --git a/sys/compat/linuxkpi/common/src/linux_work.c b/sys/compat/linuxkpi/common/src/linux_work.c
index 043c5b7d1aff..45025378baa9 100644
--- a/sys/compat/linuxkpi/common/src/linux_work.c
+++ b/sys/compat/linuxkpi/common/src/linux_work.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <linux/compat.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
+#include <linux/irq_work.h>
#include <sys/kernel.h>
@@ -59,6 +60,8 @@ struct workqueue_struct *system_unbound_wq;
struct workqueue_struct *system_highpri_wq;
struct workqueue_struct *system_power_efficient_wq;
+struct taskqueue *linux_irq_work_tq;
+
static int linux_default_wq_cpus = 4;
static void linux_delayed_work_timer_fn(void *);
@@ -683,3 +686,48 @@ linux_work_uninit(void *arg)
system_highpri_wq = NULL;
}
SYSUNINIT(linux_work_uninit, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_uninit, NULL);
+
+void
+linux_irq_work_fn(void *context, int pending)
+{
+ struct irq_work *irqw = context;
+
+ irqw->func(irqw);
+}
+
+static void
+linux_irq_work_init_fn(void *context, int pending)
+{
+ /*
+ * LinuxKPI performs lazy allocation of memory structures required by
+ * current on the first access to it. As some irq_work clients read
+ * it with spinlock taken, we have to preallocate td_lkpi_task before
+ * first call to irq_work_queue(). As irq_work uses a single thread,
+ * it is enough to read current once at SYSINIT stage.
+ */
+ if (current == NULL)
+ panic("irq_work taskqueue is not initialized");
+}
+static struct task linux_irq_work_init_task =
+ TASK_INITIALIZER(0, linux_irq_work_init_fn, &linux_irq_work_init_task);
+
+static void
+linux_irq_work_init(void *arg)
+{
+ linux_irq_work_tq = taskqueue_create_fast("linuxkpi_irq_wq",
+ M_WAITOK, taskqueue_thread_enqueue, &linux_irq_work_tq);
+ taskqueue_start_threads(&linux_irq_work_tq, 1, PWAIT,
+ "linuxkpi_irq_wq");
+ taskqueue_enqueue(linux_irq_work_tq, &linux_irq_work_init_task);
+}
+SYSINIT(linux_irq_work_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
+ linux_irq_work_init, NULL);
+
+static void
+linux_irq_work_uninit(void *arg)
+{
+ taskqueue_drain_all(linux_irq_work_tq);
+ taskqueue_free(linux_irq_work_tq);
+}
+SYSUNINIT(linux_irq_work_uninit, SI_SUB_TASKQ, SI_ORDER_SECOND,
+ linux_irq_work_uninit, NULL);
More information about the dev-commits-src-all
mailing list