git: 245157fd8a38 - main - ktrcsw(): should not be called when the thread is owning interlock or on sleepq
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 30 Jan 2026 18:46:03 UTC
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=245157fd8a382c3989075789ee98582665f3b31d
commit 245157fd8a382c3989075789ee98582665f3b31d
Author: Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2026-01-22 21:46:01 +0000
Commit: Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2026-01-30 18:45:48 +0000
ktrcsw(): should not be called when the thread is owning interlock or on sleepq
The issue is that for ktrcsw() we lock the ktrace_mtx mutex while owning
the interlock from a subsystem that called msleep(). In particular, the
VM subsystem might call msleep() if page allocation failed. This
establishes order VM locks (e.g. domain free queue lock) -> ktrace_mtx.
Calling free() while owning ktrace_mtx gives the reverse order.
Worse, msleep_spin_sbt() call s ktrcsw() while the thread is put on
sleep queue. Then, since the mutex might be contested, the thread needs
to be put on turnstil, which cannot work.
Move the ktrcsw() call for switch-out after the wakeup, when the thread
does not yet re-obtained any locks. From there, we call a special
version of ktrcsw(), which is passed the actual time when the context
switch occured.
The drawback is that the switch-out record is only written in the
ktrace.out file after the switch-in occurred, but this is probably not
too serious.
Reported and tested by: pho
Reviewed by: markj
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
Differential revision: https://reviews.freebsd.org/D54831
---
sys/kern/kern_ktrace.c | 18 ++++++++++++++++--
sys/kern/kern_synch.c | 26 +++++++++++---------------
sys/sys/ktrace.h | 1 +
3 files changed, 28 insertions(+), 17 deletions(-)
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index b58e69a3f38e..f3ee1c53fafd 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -838,8 +838,8 @@ ktrpsig(int sig, sig_t action, sigset_t *mask, int code)
ktrace_exit(td);
}
-void
-ktrcsw(int out, int user, const char *wmesg)
+static void
+ktrcsw_impl(int out, int user, const char *wmesg, const struct timespec *tv)
{
struct thread *td = curthread;
struct ktr_request *req;
@@ -854,6 +854,8 @@ ktrcsw(int out, int user, const char *wmesg)
kc = &req->ktr_data.ktr_csw;
kc->out = out;
kc->user = user;
+ if (tv != NULL)
+ req->ktr_header.ktr_time = *tv;
if (wmesg != NULL)
strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg));
else
@@ -862,6 +864,18 @@ ktrcsw(int out, int user, const char *wmesg)
ktrace_exit(td);
}
+void
+ktrcsw(int out, int user, const char *wmesg)
+{
+ ktrcsw_impl(out, user, wmesg, NULL);
+}
+
+void
+ktrcsw_out(const struct timespec *tv, const char *wmesg)
+{
+ ktrcsw_impl(1, 0, wmesg, tv);
+}
+
void
ktrstruct(const char *name, const void *data, size_t datalen)
{
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index fc6c9857463c..22628c78d8a5 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -133,6 +133,7 @@ _sleep(const void *ident, struct lock_object *lock, int priority,
{
struct thread *td __ktrace_used;
struct lock_class *class;
+ struct timespec sw_out_tv __ktrace_used;
uintptr_t lock_state;
int catch, pri, rval, sleepq_flags;
WITNESS_SAVE_DECL(lock_witness);
@@ -141,7 +142,7 @@ _sleep(const void *ident, struct lock_object *lock, int priority,
td = curthread;
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
- ktrcsw(1, 0, wmesg);
+ nanotime(&sw_out_tv);
#endif
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
"Sleeping on \"%s\"", wmesg);
@@ -222,8 +223,10 @@ _sleep(const void *ident, struct lock_object *lock, int priority,
rval = 0;
}
#ifdef KTRACE
- if (KTRPOINT(td, KTR_CSW))
+ if (KTRPOINT(td, KTR_CSW)) {
+ ktrcsw_out(&sw_out_tv, wmesg);
ktrcsw(0, 0, wmesg);
+ }
#endif
PICKUP_GIANT();
if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) {
@@ -239,6 +242,7 @@ msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg,
sbintime_t sbt, sbintime_t pr, int flags)
{
struct thread *td __ktrace_used;
+ struct timespec sw_out_tv __ktrace_used;
int rval;
WITNESS_SAVE_DECL(mtx);
@@ -266,19 +270,9 @@ msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg,
if (sbt != 0)
sleepq_set_timeout_sbt(ident, sbt, pr, flags);
- /*
- * Can't call ktrace with any spin locks held so it can lock the
- * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold
- * any spin lock. Thus, we have to drop the sleepq spin lock while
- * we handle those requests. This is safe since we have placed our
- * thread on the sleep queue already.
- */
#ifdef KTRACE
- if (KTRPOINT(td, KTR_CSW)) {
- sleepq_release(ident);
- ktrcsw(1, 0, wmesg);
- sleepq_lock(ident);
- }
+ if (KTRPOINT(td, KTR_CSW))
+ nanotime(&sw_out_tv);
#endif
#ifdef WITNESS
sleepq_release(ident);
@@ -293,8 +287,10 @@ msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg,
rval = 0;
}
#ifdef KTRACE
- if (KTRPOINT(td, KTR_CSW))
+ if (KTRPOINT(td, KTR_CSW)) {
+ ktrcsw_out(&sw_out_tv, wmesg);
ktrcsw(0, 0, wmesg);
+ }
#endif
PICKUP_GIANT();
mtx_lock_spin(mtx);
diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h
index 822822a6ffe7..1ba5c84000b0 100644
--- a/sys/sys/ktrace.h
+++ b/sys/sys/ktrace.h
@@ -340,6 +340,7 @@ ktr_get_tracevp(struct proc *p, bool ref)
void ktr_io_params_free(struct ktr_io_params *);
void ktrnamei(const char *);
void ktrcsw(int, int, const char *);
+void ktrcsw_out(const struct timespec *, const char *);
void ktrpsig(int, sig_t, sigset_t *, int);
void ktrfault(vm_offset_t, int);
void ktrfaultend(int);