Lost interrupts on SMP systems
John Baldwin
jhb at FreeBSD.org
Mon Jan 17 18:48:07 PST 2005
On Saturday 15 January 2005 03:03 am, Peter Trifonov wrote:
> Hello John,
> On Friday 14 January 2005 22:18, John Baldwin wrote:
>
> Among those bug reports the followup submitted by cguthrie at clubphoto.co
> (http://www.freebsd.org/cgi/query-pr.cgi?pr=i386/40274)
> looks like the most close one to my situation.
>
> > > > I've gone ahead and committed the fix for the MPTable global
> > > > entries btw. I don't think there is a routing or edge/level
> > > > problem though because the devices do work until you do a
> > > > ping flood. One thing we can try is that Linux has a
> > >
> > > IMPORTANT: I can do flood ping over either of them without any problems
> > > (at least, if the system is booted with -p -v, I don't know why).
> > > They break down ONLY if flood ping is SIMULTANEOUSLY performed over
> > > both of them.
>
> Another observation: doing simultaneous flood ping over xl0 AND xl1, xl0
> AND xl2 also causes xl1 or xl2 respectively (but not both of them) to say
> "watchdog timeout". In both cases they can be fixed by doing
> ifconfig xl1 down
> ifconfig xl2 down
> ifconfig xl1 up
> ifconfig xl2 up
> i.e. even if flood ping has not been done over xl2, it still has to be
> brought down& up.
>
> xl0 works fine in all cases.
> flood ping over just one interface (either of them) always works fine.
>
> > More interrupt load that way, which would indicate maybe the bug Linux
> > tries to work around except that your intpins are edge triggered. :(
>
> Just a guess:
> Maybe also there is some kind of race condition in the interrupt handling
> system, so that if too many interrupts are coming from different sources,
> some of them are not properly processed? However, this should be somehow
> related to IRQ sharing.
>
> > I've included a little test program below that you can run as root to do
> > arbitrary port reads (inb). Please compile it and mail me the output of:
> >
> > inb 0x4d0
> > inb 0x4d1
>
> Here is what it says:
> # ./inb 0x4d0
> inb(0x4d0) = 0x0 = 0d = '^@'
> # ./inb 0x4d1
> inb(0x4d1) = 0xe = 14d = '^N'
Ok, this is good, it means you do have an ELCR. Let me give you a quick patch
to try. This will be relative to your existing mptable.c file since i've
committed the first mptable patch to current already.
--- //depot/vendor/freebsd/src/sys/i386/i386/io_apic.c 2004/08/02 15:35:28
+++ //depot/user/jhb/acpipci/i386/i386/io_apic.c 2005/01/18 02:26:39
@@ -423,7 +423,7 @@
* them to be set to active low.
*
* XXX: Should we write to the ELCR if the trigger mode changes for
- * an EISA IRQ?
+ * an EISA IRQ or an ISA IRQ with the ELCR present?
*/
if (intpin->io_bus == APIC_BUS_EISA)
pol = INTR_POLARITY_HIGH;
--- //depot/vendor/freebsd/src/sys/i386/i386/machdep.c 2004/11/27 06:55:50
+++ //depot/user/jhb/acpipci/i386/i386/machdep.c 2005/01/18 02:26:39
@@ -2098,6 +2098,7 @@
printf("WARNING: loader(8) metadata is missing!\n");
#ifdef DEV_ISA
+ elcr_probe();
atpic_startup();
#endif
--- //depot/vendor/freebsd/src/sys/i386/i386/mptable.c 2005/01/12 18:25:23
+++ //depot/user/jhb/acpipci/i386/i386/mptable.c 2005/01/18 02:26:39
@@ -580,12 +580,18 @@
KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
switch (busses[src_bus].bus_type) {
case ISA:
- return (INTR_TRIGGER_EDGE);
+#ifndef PC98
+ if (elcr_found)
+ return (elcr_read_trigger(src_bus_irq));
+ else
+#endif
+ return (INTR_TRIGGER_EDGE);
case PCI:
return (INTR_TRIGGER_LEVEL);
#ifndef PC98
case EISA:
KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq));
+ KASSERT(elcr_found, ("Missing ELCR"));
return (elcr_read_trigger(src_bus_irq));
#endif
default:
--- //depot/vendor/freebsd/src/sys/i386/include/intr_machdep.h 2004/12/23
20:35:42
+++ //depot/user/jhb/acpipci/i386/include/intr_machdep.h 2005/01/18 02:26:39
@@ -84,6 +84,7 @@
struct intrframe;
extern struct mtx icu_lock;
+extern int elcr_found;
/* XXX: The elcr_* prototypes probably belong somewhere else. */
int elcr_probe(void);
--- //depot/vendor/freebsd/src/sys/i386/isa/atpic.c 2004/08/02 15:35:28
+++ //depot/user/jhb/acpipci/i386/isa/atpic.c 2005/01/18 02:26:39
@@ -112,9 +112,6 @@
static void atpic_init(void *dummy);
unsigned int imen; /* XXX */
-#ifndef PC98
-static int using_elcr;
-#endif
inthand_t
IDTVEC(atpic_intr0), IDTVEC(atpic_intr1), IDTVEC(atpic_intr2),
@@ -313,7 +310,7 @@
if (ai->at_irq == 0) {
i8259_init(ap, ap == &atpics[SLAVE]);
#ifndef PC98
- if (ap == &atpics[SLAVE] && using_elcr)
+ if (ap == &atpics[SLAVE] && elcr_found)
elcr_resume();
#endif
}
@@ -369,7 +366,7 @@
vector);
return (EINVAL);
}
- if (!using_elcr) {
+ if (!elcr_found) {
if (bootverbose)
printf("atpic: No ELCR to configure IRQ%u as %s\n",
vector, trig == INTR_TRIGGER_EDGE ? "edge/high" :
@@ -492,8 +489,7 @@
* assume level trigger for any interrupt that we aren't sure is
* edge triggered.
*/
- if (elcr_probe() == 0) {
- using_elcr = 1;
+ if (elcr_found) {
for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++)
ai->at_trigger = elcr_read_trigger(i);
} else {
--- //depot/vendor/freebsd/src/sys/i386/isa/elcr.c 2004/05/04 20:10:24
+++ //depot/user/jhb/acpipci/i386/isa/elcr.c 2005/01/18 02:26:39
@@ -57,9 +57,7 @@
#define ELCR_MASK(irq) (1 << (irq))
static int elcr_status;
-#ifdef INVARIANTS
-static int elcr_found;
-#endif
+int elcr_found;
/*
* Check to see if we have what looks like a valid ELCR. We do this by
@@ -88,9 +86,7 @@
}
if (resource_disabled("elcr", 0))
return (ENXIO);
-#ifdef INVARIANTS
elcr_found = 1;
-#endif
return (0);
}
--
John Baldwin <jhb at FreeBSD.org> <>< http://www.FreeBSD.org/~jhb/
"Power Users Use the Power to Serve" = http://www.FreeBSD.org
More information about the freebsd-smp
mailing list