sched_pin() bug in SCHED_ULE
mdf at FreeBSD.org
mdf at FreeBSD.org
Tue Aug 31 17:16:01 UTC 2010
I recorded the stack any time ts->ts_cpu was set and when a thread was
migrated by sched_switch() I printed out the recorded info. Here's
what I found:
XXX bug 67957: moving 0xffffff003ff9b800 from 3 to 1
[1]: pin 0 state 4 move 3 -> 1 done by 0xffffff000cc44000:
#0 0xffffffff802b36b4 at bug67957+0x84
#1 0xffffffff802b5dd4 at sched_affinity+0xd4
#2 0xffffffff8024a707 at cpuset_setthread+0x137
#3 0xffffffff8024aeae at cpuset_setaffinity+0x21e
#4 0xffffffff804a82df at freebsd32_cpuset_setaffinity+0x4f
#5 0xffffffff80295f49 at isi_syscall+0x99
#6 0xffffffff804a630e at ia32_syscall+0x1ce
#7 0xffffffff8046dc60 at Xint0x80_syscall+0x60
[0]: pin 0 state 2 move 0 -> 3 done by 0xffffff000cc44000:
#0 0xffffffff802b36b4 at bug67957+0x84
#1 0xffffffff802b4ad8 at sched_add+0xe8
#2 0xffffffff8029b96a at create_thread+0x34a
#3 0xffffffff8029badc at kern_thr_new+0x8c
#4 0xffffffff804a8912 at freebsd32_thr_new+0x122
#5 0xffffffff80295f49 at isi_syscall+0x99
#6 0xffffffff804a630e at ia32_syscall+0x1ce
#7 0xffffffff8046dc60 at Xint0x80_syscall+0x60
So one thread in the process called cpuset_setaffinity(2), and another
thread in the process was forcibly migrated by the IPI while returning
from a syscall, while it had td_pinned set.
Given this path, it seems reasonable to me to skip the migrate if we
notice THREAD_CAN_MIGRATE is false.
Opinions? My debug code is below. I'll try to write a short testcase
that exhibits this bug.
Thanks,
matthew
Index: kern/sched_ule.c
===================================================================
--- kern/sched_ule.c (revision 158580)
+++ kern/sched_ule.c (working copy)
@@ -697,6 +697,41 @@
return;
}
+static void
+bug67957(struct thread *td)
+{
+ int idx;
+
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+ idx = (td->xxx_idx++ % 5);
+ stack_save(&td->xxx[idx].td_preempt);
+ td->xxx[idx].td_moveto = td->td_sched->ts_cpu;
+ td->xxx[idx].td_movefrom = (td->td_oncpu == NOCPU) ? td->td_lastcpu
: td->td_oncpu;
+ td->xxx[idx].td_statewas = td->td_state;
+ td->xxx[idx].td_pinned = td->td_pinned;
+ td->xxx[idx].td_by = curthread;
+}
+
+static void
+pr_bug67957(struct thread *td, int idx)
+{
+ int idx, i;
+
+ printf("XXX bug 67957: moving %p from %d to %d\n",
+ td, td->td_lastcpu, td->td_sched->ts_cpu);
+ for (i = 0, idx = td->xxx_idx - 1;
+ i < 5 && idx >= 0;
+ i++, idx--) {
+ printf("[%d]: pin %d state %d move %d -> %d done by %p:\n",
+ idx, td->xxx[idx % 5].td_pinned,
+ td->xxx[idx % 5].td_statewas,
+ td->xxx[idx % 5].td_movefrom,
+ td->xxx[idx % 5].td_moveto,
+ td->xxx[idx % 5].td_by);
+ stack_print_ddb(&td->xxx[idx % 5].td_preempt);
+ }
+}
+
/*
* Move a thread from one thread queue to another.
*/
@@ -739,6 +774,7 @@
TDQ_UNLOCK(from);
sched_rem(td);
ts->ts_cpu = cpu;
+ bug67957(td);
td->td_lock = TDQ_LOCKPTR(to);
tdq_add(to, td, SRQ_YIELDING);
}
@@ -971,6 +1007,7 @@
tdq = TDQ_CPU(cpu);
td = ts->ts_thread;
ts->ts_cpu = cpu;
+ bug67957(td);
/* If the lock matches just return the queue. */
if (td->td_lock == TDQ_LOCKPTR(tdq))
@@ -1890,8 +1964,15 @@
SRQ_OURSELF|SRQ_YIELDING;
if (ts->ts_cpu == cpuid)
tdq_add(tdq, td, srqflag);
- else
+ else {
+ if (!THREAD_CAN_MIGRATE(td) &&
+ (ts->ts_flags & TSF_BOUND) == 0) {
+ pr_bug67957(td, idx);
+ panic("XXX");
+ }
mtx = sched_switch_migrate(tdq, td, srqflag);
+ }
+ td->xxx_idx = 0;
} else {
/* This thread must be going to sleep. */
TDQ_LOCK(tdq);
@@ -2479,8 +2560,10 @@
* target cpu.
*/
if (td->td_priority <= PRI_MAX_ITHD && THREAD_CAN_MIGRATE(td) &&
- curthread->td_intr_nesting_level)
+ curthread->td_intr_nesting_level) {
ts->ts_cpu = cpuid;
+ bug67957(td);
+ }
if (!THREAD_CAN_MIGRATE(td))
cpu = ts->ts_cpu;
else
@@ -2590,6 +2673,7 @@
*/
cpu = ts->ts_cpu;
ts->ts_cpu = sched_pickcpu(td, 0);
+ bug67957(td);
if (cpu != PCPU_GET(cpuid))
ipi_selected(1 << cpu, IPI_PREEMPT);
#endif
@@ -2613,6 +2697,7 @@
if (PCPU_GET(cpuid) == cpu)
return;
ts->ts_cpu = cpu;
+ bug67957(td);
/* When we return from mi_switch we'll be on the correct cpu. */
mi_switch(SW_VOL, NULL);
#endif
Index: sys/proc.h
===================================================================
--- sys/proc.h (revision 158580)
+++ sys/proc.h (working copy)
@@ -68,6 +68,8 @@
#include <sys/isi_mountroot.h>
#include <sys/isi_oplock.h>
+#include <sys/stack.h> /* XXX bug 67957 */
+
/*
* One structure allocated per session.
*
@@ -356,6 +358,16 @@
int td_errno; /* Error returned by last syscall. */
uint64_t td_nfs_root_lin;/* (k) Root lin for vis .snapshot*/
struct osd td_osd; /* (k) Object specific data. */
+
+ struct {
+ struct stack td_preempt;/* XXX bug 67957 */
+ u_char td_movefrom;
+ u_char td_moveto;/* XXX bug 67957 */
+ u_char td_statewas;/* XXX bug 67957 */
+ u_char td_pinned;
+ struct thread *td_by;
+ } xxx[5];
+ int xxx_idx;/* XXX bug 67957 */
};
struct mtx *thread_lock_block(struct thread *);
More information about the freebsd-current
mailing list