lockups
Bruce Evans
bde at zeta.org.au
Fri May 14 15:32:09 PDT 2004
On Fri, 14 May 2004, John Baldwin wrote:
> On Thursday 13 May 2004 11:35 pm, Jason King wrote:
> > I'm having some problems with -CURRENT locking up, I'm hoping maybe
> > someone will have some suggestions.
> >
> > Symptoms:
> >
> > (This is both on 5.2.1-CURRENT as well as booting off the 5.2.1-RELEASE
> > cdrom)
> >
> > During normal boot, kernel freezes before it even starts init.
> > Ctrl-Alt-Delete does not work, power button must be used to reboot.
> >
> > boot -v reveals 'Interrupt storm on "dc0"; throttling interrupt
> > source'. Thinking it might be a bad network card (though it works fine
> > in XP and worked fine when I had 5.1-RELEASE installed), I removed the
> > card, and I got the same error, just on a different device (pcm0).
>
> Ok, this would explain the slow boot w/o ACPI as well if interrupts are not
> routed correctly. Does the machine boot ok if you do 'set
> hint.apic.0.disabled=1' from the boot loader?
Here are my current quick fixes for interrupt storm handling on another
nForce2 system (A7N8X-E) which has interrupt storms on all interrupts
>= 16. Interrupt storms are often not detected because the next
interrupt doesn't happen until a little after the loop exits, and when
they are detected you wish that they weren't because non-storming
interrupts (with the same interrupt number as storming ones) are
throttled to a very low rate too. The DELAY() in kern_intr.c may be
unnecessary now that there is a DELAY() in intr_machdep.c. A
fully-storming interrupt is supposed to be reduced to polling after
every clock tick, but without the latter only every second fully-storming
interrupt was detected as storming. Without either, only every Nth
(N large) fully-storming interrupt was detected as storming (probably
due to accidental delays for other interrupt handling).
%%%
Index: kern/kern_intr.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_intr.c,v
retrieving revision 1.108
diff -u -2 -r1.108 kern_intr.c
--- kern/kern_intr.c 17 Apr 2004 02:46:05 -0000 1.108
+++ kern/kern_intr.c 24 Apr 2004 14:57:39 -0000
@@ -39,4 +39,5 @@
#include <sys/kthread.h>
#include <sys/ktr.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -495,5 +496,5 @@
struct thread *td;
struct proc *p;
- int count, warned;
+ int count, warming, warned;
td = curthread;
@@ -502,5 +503,5 @@
KASSERT(ithd->it_td == td && td->td_ithd == ithd,
("%s: ithread and proc linkage out of sync", __func__));
- count = 0;
+ warming = 10 * intr_storm_threshold;
warned = 0;
@@ -524,4 +525,5 @@
CTR4(KTR_INTR, "%s: pid %d: (%s) need=%d", __func__,
p->p_pid, p->p_comm, ithd->it_need);
+ count = 0;
while (ithd->it_need) {
/*
@@ -532,23 +534,4 @@
*/
atomic_store_rel_int(&ithd->it_need, 0);
-
- /*
- * If we detect an interrupt storm, pause with
- * the source masked for 1/10th of a second.
- */
- if (intr_storm_threshold != 0 && count >=
- intr_storm_threshold) {
- if (!warned) {
- printf(
- "Interrupt storm detected on \"%s\"; throttling interrupt source\n",
- p->p_comm);
- warned = 1;
- }
- tsleep(&count, td->td_priority, "istorm",
- hz / 10);
- count = 0;
- } else
- count++;
-
restart:
TAILQ_FOREACH(ih, &ithd->it_handlers, ih_next) {
@@ -576,6 +559,53 @@
mtx_unlock(&Giant);
}
- if (ithd->it_enable != NULL)
+ if (ithd->it_enable != NULL) {
ithd->it_enable(ithd->it_vector);
+
+ /*
+ * Storm detection needs a delay here
+ * to see slightly delayed interrupts
+ * on some machines, but we don't
+ * want to always delay, so only delay
+ * while warming up.
+ */
+ if (warming != 0) {
+ DELAY(1);
+ --warming;
+ }
+ }
+
+ /*
+ * If we detect an interrupt storm, sleep until
+ * the next hardclock tick. We sleep at the
+ * end of the loop instead of at the beginning
+ * to ensure that we see slightly delayed
+ * interrupts.
+ */
+ if (count >= intr_storm_threshold) {
+ if (!warned) {
+ printf(
+ "Interrupt storm detected on \"%s\"; throttling interrupt source\n",
+ p->p_comm);
+ warned = 1;
+ }
+ if (cold)
+ Debugger("istorm botch");
+ tsleep(&count, td->td_priority, "istorm", 1);
+
+ /*
+ * Fudge the count to re-throttle if the
+ * interrupt is still active. Our storm
+ * detection is too primitive to detect
+ * whether the storm has gone away
+ * reliably, even if we were to waste a
+ * lot of time spinning for the next
+ * intr_storm_threshold interrupts, so
+ * we assume that the storm hasn't gone
+ * away unless the interrupt repeats
+ * less often the hardclock interrupt.
+ */
+ count = INT_MAX - 1;
+ }
+ count++;
}
WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
@@ -590,5 +620,4 @@
if (!ithd->it_need) {
TD_SET_IWAIT(td);
- count = 0;
CTR2(KTR_INTR, "%s: pid %d: done", __func__, p->p_pid);
mi_switch(SW_VOL);
Index: i386/i386/intr_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/intr_machdep.c,v
retrieving revision 1.5
diff -u -2 -r1.5 intr_machdep.c
--- i386/i386/intr_machdep.c 4 May 2004 21:02:56 -0000 1.5
+++ i386/i386/intr_machdep.c 5 May 2004 19:55:30 -0000
@@ -38,4 +38,5 @@
*/
+#include "opt_apic.h"
#include "opt_ddb.h"
@@ -211,4 +212,7 @@
*/
isrc->is_pic->pic_disable_source(isrc);
+#ifdef DEV_APIC
+ DELAY(1); /* XXX */
+#endif
isrc->is_pic->pic_eoi_source(isrc);
if (ih == NULL)
%%%
More information about the freebsd-current
mailing list