git: 05fe82455f26 - main - linuxkpi: races between linux_queue_delayed_work_on() and linux_cancel_delayed_work_sync()
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 07 Nov 2023 11:23:03 UTC
The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=05fe82455f263ad107a860ce20dd89e1a5c1619c commit 05fe82455f263ad107a860ce20dd89e1a5c1619c Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2023-11-04 07:45:48 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2023-11-07 10:58:04 +0000 linuxkpi: races between linux_queue_delayed_work_on() and linux_cancel_delayed_work_sync() 1. Suppose that linux_queue_delayed_work_on() is called with non-zero delay and found the work.state WORK_ST_IDLE. It resets the state to WORK_ST_TIMER and locks timer.mtx. Now, if linux_cancel_delayed_work_sync() was also called meantime, read state as WORK_ST_TIMER and already taken the mutex, it is executing callout_stop() on non-armed callout. Then linux_queue_delayed_work_on() continues and schedules callout. But the return value from cancel() is false, making it possible to the requeue from callback to slip in. 2. If linux_cancel_delayed_work_sync() returned true, we need to cancel again. The requeue from callback could have revived the work. The end result is that we schedule callout that might be freed, since cancel_delayed_work_sync() claims that everything was stopped. This contradicts the way the KPI is used in Linux, where consumers expect that cancel_delayed_work_sync() is reliable on its own. Reviewed by: markj Discussed with: bz Sponsored by: NVidia networking MFC after: 1 week Differential revision: https://reviews.freebsd.org/D42468 --- sys/compat/linuxkpi/common/src/linux_work.c | 36 ++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/sys/compat/linuxkpi/common/src/linux_work.c b/sys/compat/linuxkpi/common/src/linux_work.c index 990ba5d20fd5..888ac97dbff6 100644 --- a/sys/compat/linuxkpi/common/src/linux_work.c +++ b/sys/compat/linuxkpi/common/src/linux_work.c @@ -221,16 +221,19 @@ linux_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, [WORK_ST_EXEC] = WORK_ST_TIMER, /* start timeout */ [WORK_ST_CANCEL] = WORK_ST_TIMER, /* start timeout */ }; + bool res; if (atomic_read(&wq->draining) != 0) return (!work_pending(&dwork->work)); + mtx_lock(&dwork->timer.mtx); switch (linux_update_state(&dwork->work.state, states)) { case WORK_ST_EXEC: case WORK_ST_CANCEL: if (delay == 0 && linux_work_exec_unblock(&dwork->work) != 0) { dwork->timer.expires = jiffies; - return (true); + res = true; + goto out; } /* FALLTHROUGH */ case WORK_ST_IDLE: @@ -240,20 +243,21 @@ linux_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, if (delay == 0) { linux_delayed_work_enqueue(dwork); } else if (unlikely(cpu != WORK_CPU_UNBOUND)) { - mtx_lock(&dwork->timer.mtx); callout_reset_on(&dwork->timer.callout, delay, &linux_delayed_work_timer_fn, dwork, cpu); - mtx_unlock(&dwork->timer.mtx); } else { - mtx_lock(&dwork->timer.mtx); callout_reset(&dwork->timer.callout, delay, &linux_delayed_work_timer_fn, dwork); - mtx_unlock(&dwork->timer.mtx); } - return (true); + res = true; + break; default: - return (false); /* already on a queue */ + res = false; + break; } +out: + mtx_unlock(&dwork->timer.mtx); + return (res); } void @@ -467,8 +471,8 @@ linux_cancel_delayed_work(struct delayed_work *dwork) * fashion. It returns non-zero if the work was successfully * cancelled. Else the work was already cancelled. */ -bool -linux_cancel_delayed_work_sync(struct delayed_work *dwork) +static bool +linux_cancel_delayed_work_sync_int(struct delayed_work *dwork) { static const uint8_t states[WORK_ST_MAX] __aligned(8) = { [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ @@ -478,7 +482,6 @@ linux_cancel_delayed_work_sync(struct delayed_work *dwork) [WORK_ST_CANCEL] = WORK_ST_IDLE, /* cancel and drain */ }; struct taskqueue *tq; - bool retval = false; int ret, state; bool cancelled; @@ -490,7 +493,7 @@ linux_cancel_delayed_work_sync(struct delayed_work *dwork) switch (state) { case WORK_ST_IDLE: mtx_unlock(&dwork->timer.mtx); - return (retval); + return (false); case WORK_ST_TIMER: case WORK_ST_CANCEL: cancelled = (callout_stop(&dwork->timer.callout) == 1); @@ -512,6 +515,17 @@ linux_cancel_delayed_work_sync(struct delayed_work *dwork) } } +bool +linux_cancel_delayed_work_sync(struct delayed_work *dwork) +{ + bool res; + + res = false; + while (linux_cancel_delayed_work_sync_int(dwork)) + res = true; + return (res); +} + /* * This function waits until the given work structure is completed. * It returns non-zero if the work was successfully