sched_pin() bug in SCHED_ULE

mdf at FreeBSD.org mdf at FreeBSD.org
Tue Aug 31 17:16:01 UTC 2010


I recorded the stack any time ts->ts_cpu was set and when a thread was
migrated by sched_switch() I printed out the recorded info.  Here's
what I found:


XXX bug 67957: moving 0xffffff003ff9b800 from 3 to 1
[1]: pin 0 state 4 move 3 -> 1 done by 0xffffff000cc44000:
#0 0xffffffff802b36b4 at bug67957+0x84
#1 0xffffffff802b5dd4 at sched_affinity+0xd4
#2 0xffffffff8024a707 at cpuset_setthread+0x137
#3 0xffffffff8024aeae at cpuset_setaffinity+0x21e
#4 0xffffffff804a82df at freebsd32_cpuset_setaffinity+0x4f
#5 0xffffffff80295f49 at isi_syscall+0x99
#6 0xffffffff804a630e at ia32_syscall+0x1ce
#7 0xffffffff8046dc60 at Xint0x80_syscall+0x60
[0]: pin 0 state 2 move 0 -> 3 done by 0xffffff000cc44000:
#0 0xffffffff802b36b4 at bug67957+0x84
#1 0xffffffff802b4ad8 at sched_add+0xe8
#2 0xffffffff8029b96a at create_thread+0x34a
#3 0xffffffff8029badc at kern_thr_new+0x8c
#4 0xffffffff804a8912 at freebsd32_thr_new+0x122
#5 0xffffffff80295f49 at isi_syscall+0x99
#6 0xffffffff804a630e at ia32_syscall+0x1ce
#7 0xffffffff8046dc60 at Xint0x80_syscall+0x60

So one thread in the process called cpuset_setaffinity(2), and another
thread in the process was forcibly migrated by the IPI while returning
from a syscall, while it had td_pinned set.

Given this path, it seems reasonable to me to skip the migrate if we
notice THREAD_CAN_MIGRATE is false.

Opinions?  My debug code is below.  I'll try to write a short testcase
that exhibits this bug.

Thanks,
matthew


Index: kern/sched_ule.c
===================================================================
--- kern/sched_ule.c	(revision 158580)
+++ kern/sched_ule.c	(working copy)
@@ -697,6 +697,41 @@
 	return;
 }

+static void
+bug67957(struct thread *td)
+{
+	int idx;
+
+	THREAD_LOCK_ASSERT(td, MA_OWNED);
+	idx = (td->xxx_idx++ % 5);
+	stack_save(&td->xxx[idx].td_preempt);
+	td->xxx[idx].td_moveto = td->td_sched->ts_cpu;
+	td->xxx[idx].td_movefrom = (td->td_oncpu == NOCPU) ? td->td_lastcpu
: td->td_oncpu;
+	td->xxx[idx].td_statewas = td->td_state;
+	td->xxx[idx].td_pinned = td->td_pinned;
+	td->xxx[idx].td_by = curthread;
+}
+
+static void
+pr_bug67957(struct thread *td, int idx)
+{
+	int idx, i;
+
+	printf("XXX bug 67957: moving %p from %d to %d\n",
+	    td, td->td_lastcpu, td->td_sched->ts_cpu);
+	for (i = 0, idx = td->xxx_idx - 1;
+	    i < 5 && idx >= 0;
+	    i++, idx--) {
+		printf("[%d]: pin %d state %d move %d -> %d done by %p:\n",
+		    idx, td->xxx[idx % 5].td_pinned,
+		    td->xxx[idx % 5].td_statewas,
+		    td->xxx[idx % 5].td_movefrom,
+		    td->xxx[idx % 5].td_moveto,
+		    td->xxx[idx % 5].td_by);
+		stack_print_ddb(&td->xxx[idx % 5].td_preempt);
+	}
+}
+
 /*
  * Move a thread from one thread queue to another.
  */
@@ -739,6 +774,7 @@
 	TDQ_UNLOCK(from);
 	sched_rem(td);
 	ts->ts_cpu = cpu;
+	bug67957(td);
 	td->td_lock = TDQ_LOCKPTR(to);
 	tdq_add(to, td, SRQ_YIELDING);
 }
@@ -971,6 +1007,7 @@
 	tdq = TDQ_CPU(cpu);
 	td = ts->ts_thread;
 	ts->ts_cpu = cpu;
+	bug67957(td);

 	/* If the lock matches just return the queue. */
 	if (td->td_lock == TDQ_LOCKPTR(tdq))
@@ -1890,8 +1964,15 @@
 		    SRQ_OURSELF|SRQ_YIELDING;
 		if (ts->ts_cpu == cpuid)
 			tdq_add(tdq, td, srqflag);
-		else
+		else {
+			if (!THREAD_CAN_MIGRATE(td) &&
+			    (ts->ts_flags & TSF_BOUND) == 0) {
+				pr_bug67957(td, idx);
+				panic("XXX");
+			}
 			mtx = sched_switch_migrate(tdq, td, srqflag);
+		}
+		td->xxx_idx = 0;
 	} else {
 		/* This thread must be going to sleep. */
 		TDQ_LOCK(tdq);
@@ -2479,8 +2560,10 @@
 	 * target cpu.
 	 */
 	if (td->td_priority <= PRI_MAX_ITHD && THREAD_CAN_MIGRATE(td) &&
-	    curthread->td_intr_nesting_level)
+	    curthread->td_intr_nesting_level) {
 		ts->ts_cpu = cpuid;
+		bug67957(td);
+	}
 	if (!THREAD_CAN_MIGRATE(td))
 		cpu = ts->ts_cpu;
 	else
@@ -2590,6 +2673,7 @@
 	 */
 	cpu = ts->ts_cpu;
 	ts->ts_cpu = sched_pickcpu(td, 0);
+	bug67957(td);
 	if (cpu != PCPU_GET(cpuid))
 		ipi_selected(1 << cpu, IPI_PREEMPT);
 #endif
@@ -2613,6 +2697,7 @@
 	if (PCPU_GET(cpuid) == cpu)
 		return;
 	ts->ts_cpu = cpu;
+	bug67957(td);
 	/* When we return from mi_switch we'll be on the correct cpu. */
 	mi_switch(SW_VOL, NULL);
 #endif
Index: sys/proc.h
===================================================================
--- sys/proc.h	(revision 158580)
+++ sys/proc.h	(working copy)
@@ -68,6 +68,8 @@
 #include <sys/isi_mountroot.h>
 #include <sys/isi_oplock.h>

+#include <sys/stack.h> /* XXX bug 67957 */
+
 /*
  * One structure allocated per session.
  *
@@ -356,6 +358,16 @@
 	int		td_errno;	/* Error returned by last syscall. */
 	uint64_t	td_nfs_root_lin;/* (k) Root lin for vis .snapshot*/
 	struct osd	td_osd;		/* (k) Object specific data. */
+
+	struct {
+		struct stack td_preempt;/* XXX bug 67957 */
+		u_char	td_movefrom;
+		u_char	td_moveto;/* XXX bug 67957 */
+		u_char	td_statewas;/* XXX bug 67957 */
+		u_char	td_pinned;
+		struct thread *td_by;
+	} xxx[5];
+	int xxx_idx;/* XXX bug 67957 */
 };

 struct mtx *thread_lock_block(struct thread *);


More information about the freebsd-current mailing list