scheduler IPIs during shutdown (was: svn commit: r193170 -
projects/pnet/sys/kern (fwd))
Robert Watson
rwatson at FreeBSD.org
Sun May 31 16:24:05 UTC 2009
Hi Jeff--
I think the reason we're now seeing panics like the:
spin lock 0xc08a7e80 (sched lock 1) held by 0xc508b460 (tid 100005) too long
As well as the ones occasionally reported on the lists on SMP boxes
(kern/134584, etc) is that we're shutting down CPUs while holding their thread
locks and then trying to start work on them. Obviously, this is doomed to
failure for a number of reasons :-).
Without trying to solve the general problem of CPUs coming and going for 8.0,
I think we do need some short-term solution for the release. The basic
scenario, as I understand it so far, is:
- System shutdown kicks off, and CPU 0 starts IPIing CPUs to shut them down,
which apparently can occur while holding the thread lock:
db> trace 100005
Tracing pid 11 tid 100005 td 0xc4d546c0
cpustop_handler(2,c4a53b64,c0b70cad,c0d9c680,c4a53ae4,...) at
cpustop_handler+0x32
ipi_nmi_handler(c0d9c680,c4a53ae4,2,f,c4d52a90,...) at ipi_nmi_handler+0x2f
trap(c4a53b70) at trap+0x2d
calltrap() at calltrap+0x6
--- trap 0x13, eip = 0xc0887a61, esp = 0xc4a53bb0, ebp = 0xc4a53bdc ---
sched_switch(c4d546c0,0,60c,187,5dd9d2a1,...) at sched_switch+0x401
mi_switch(60c,0,c0c3d1ae,813,1,...) at mi_switch+0x200
sched_preempt(c4d546c0,1,c4d54d80,c4a53cb4,c0b5428e,...) at sched_preempt+0x9f
ipi_bitmap_handler(8,28,28,31,c0d8e500,...) at ipi_bitmap_handler+0x34
Xipi_intr_bitmap_handler() at Xipi_intr_bitmap_handler+0x2e
--- interrupt, eip = 0xc0856a17, esp = 0xc4a53c8c, ebp = 0xc4a53cb4 ---
_thread_lock_flags(c4d546c0,0,c0c3d1ae,a04,c4d546c0,...) at
_thread_lock_flags+0x177
sched_idletd(0,c4a53d38,c0c36ad3,335,c4d52a90,...) at sched_idletd+0x251
fork_exit(c0888510,0,c4a53d38) at fork_exit+0xb8
fork_trampoline() at fork_trampoline+0x8
--- trap 0, eip = 0, esp = 0xc4a53d70, ebp = 0 ---
- hardclock fires on CPU 0 and tries to schedule a SWI -- typically softclock
or netisr, which bumps into the "leaked" thread lock:
db> bt
Tracing pid 1 tid 100002 td 0xc4d54d80
kdb_enter(c0c3b5e5,c0c3b5e5,c0c39ccd,c4a49ad0,0,...) at kdb_enter+0x3a
panic(c0c39ccd,c4d546c0,c0d8eb44,c4d546c0,186a5,...) at panic+0x136
_mtx_lock_spin_failed(1,9,c0c36d52,320,0,...) at _mtx_lock_spin_failed+0x51
_thread_lock_flags(c4d54240,0,c0c36d52,320,dd313180,...) at
_thread_lock_flags+0x175
intr_event_schedule_thread(c4a49b58,c4a49b68,c0915dc4,c4d3a180,0,...) at
intr_event_schedule_thread+0xc7
swi_sched(c4d3a180,0,c4a49b74,c0859a2f,c0d893d4,...) at swi_sched+0x28
netisr_sched_poll(c0d893d4,c4a49b88,c0824064,0,2,...) at
netisr_sched_poll+0x24
hardclock_device_poll(0,2) at hardclock_device_poll+0xcf
hardclock(0,c0b74a49,0,27f,5dd9e8a2,...) at hardclock+0x44
lapic_handle_timer(c4a49bb0) at lapic_handle_timer+0x9f
Xtimerint() at Xtimerint+0x1f
--- interrupt, eip = 0xc0b74a49, esp = 0xc4a49bf0, ebp = 0xc4a49c0c ---
DELAY(f4240,0,c4d40980,c4a49c2c,c08653d3,...) at DELAY+0x89
cpu_reset(f4240,c4a49c68,c0865eaf,0,0,...) at cpu_reset+0xd5
shutdown_reset(0,0,c0c3b4a3,1a7,0,...) at shutdown_reset+0x23
boot(c0d89210,0,c0c3b4a3,ad,c4a49d2c,...) at boot+0x75f
reboot(c4d54d80,c4a49cf8,4,c0c428a4,c0d1dc28,...) at reboot+0x4b
syscall(c4a49d38) at syscall+0x2a3
Xint0x80_syscall() at Xint0x80_syscall+0x20
Possibly hardclock should simply know not to kick off softclock at that point
in the shutdown (just like I've taught it not to kick off netisr in the
device_polling code in my branch), but more generally, sched_ule probably
needs to not hold the per-cpu runqueue lock, and/or sched_ule needs to not try
to start work on CPUs that aren't running. If the SWI is pinned on or bound
to an already shut down CPU, then we should in fact panic, but otherwise it
seems it should preempt the current thread and run it on the CPU managing the
shutdown.
Or, we could go with the old-world order view and not allow interrupts to
fire, but I wonder if all our device drivers would be comfortable with that.
Robert N M Watson
Computer Laboratory
University of Cambridge
---------- Forwarded message ----------
Date: Sun, 31 May 2009 13:52:17 +0000 (UTC)
From: Robert Watson <rwatson at FreeBSD.org>
To: src-committers at freebsd.org, svn-src-projects at freebsd.org
Subject: svn commit: r193170 - projects/pnet/sys/kern
Author: rwatson
Date: Sun May 31 13:52:17 2009
New Revision: 193170
URL: http://svn.freebsd.org/changeset/base/193170
Log:
Add a shutdown handler for device polling -- don't issue wakeups to the
netisr once file systems are done syncing, otherwise the scheduler may
generate IPIs to CPUs that have already been shutdown, leading to a
panic.
As similar panics (spin lock 0xc0d8e500 (sched lock 1) held by
0xc4d546c0 (tid 100005) too long) have been reported on both 7.x and 8.x
in other code, we might want to think about whether there's some missing
scheduler shutdown logic to handle this case for unpinned/unbound
threads by migrating them to the CPU managing the shutdown and allowing
them to preempt.
Modified:
projects/pnet/sys/kern/kern_poll.c
Modified: projects/pnet/sys/kern/kern_poll.c
==============================================================================
--- projects/pnet/sys/kern/kern_poll.c Sun May 31 12:36:14 2009 (r193169)
+++ projects/pnet/sys/kern/kern_poll.c Sun May 31 13:52:17 2009 (r193170)
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/proc.h>
+#include <sys/eventhandler.h>
#include <sys/resourcevar.h>
#include <sys/socket.h> /* needed by net/if.h */
#include <sys/sockio.h>
@@ -110,6 +111,7 @@ SYSCTL_UINT(_kern_polling, OID_AUTO, bur
static int netisr_poll_scheduled;
static int netisr_pollmore_scheduled;
+static int poll_shutting_down;
static int poll_burst_max_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -261,10 +263,19 @@ struct pollrec {
static struct pollrec pr[POLL_LIST_LEN];
static void
+poll_shutdown(void *arg, int howto)
+{
+
+ poll_shutting_down = 1;
+}
+
+static void
init_device_poll(void)
{
mtx_init(&poll_mtx, "polling", NULL, MTX_DEF);
+ EVENTHANDLER_REGISTER(shutdown_post_sync, poll_shutdown, NULL,
+ SHUTDOWN_PRI_LAST);
}
SYSINIT(device_poll, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, init_device_poll, NULL);
@@ -288,7 +299,7 @@ hardclock_device_poll(void)
static struct timeval prev_t, t;
int delta;
- if (poll_handlers == 0)
+ if (poll_handlers == 0 || poll_shutting_down)
return;
microuptime(&t);
More information about the freebsd-current
mailing list