Lost interrupts on SMP systems

John Baldwin jhb at FreeBSD.org
Mon Jan 17 18:48:07 PST 2005


On Saturday 15 January 2005 03:03 am, Peter Trifonov wrote:
> Hello John,
> On Friday 14 January 2005 22:18, John Baldwin wrote:
>
> Among those bug reports the followup submitted by cguthrie at clubphoto.co
> (http://www.freebsd.org/cgi/query-pr.cgi?pr=i386/40274)
> looks like the most close one to my situation.
>
> > > > I've gone ahead and committed the fix for the MPTable global
> > > > entries btw.  I don't think there is a routing or edge/level
> > > > problem though because the devices do work until you do a
> > > > ping flood.  One thing we can try is that Linux has a
> > >
> > > IMPORTANT: I can do flood ping over either of them without any problems
> > > (at least, if the system is booted with -p -v, I don't know why).
> > > They break down ONLY if flood ping is SIMULTANEOUSLY  performed over
> > > both of them.
>
> Another observation: doing simultaneous flood ping over xl0 AND xl1, xl0
> AND xl2 also causes xl1 or xl2 respectively (but not both of them) to say
> "watchdog timeout". In both cases they can be fixed by doing
> ifconfig xl1 down
> ifconfig xl2 down
> ifconfig xl1 up
> ifconfig xl2 up
> i.e. even if flood ping has not been done over xl2, it still has to be
> brought down& up.
>
> xl0 works fine in all cases.
> flood ping over just one interface (either of them) always works fine.
>
> > More interrupt load that way, which would indicate maybe the bug Linux
> > tries to work around except that your intpins are edge triggered. :(
>
> Just a guess:
> Maybe also there is some kind of race condition in the interrupt handling
> system, so that if too many interrupts are coming from different sources,
> some of them are not properly processed? However, this should be somehow
> related to IRQ sharing.
>
> > I've included a little test program below that you can run as root to do
> > arbitrary port reads (inb).  Please compile it and mail me the output of:
> >
> > inb 0x4d0
> > inb 0x4d1
>
> Here is what it says:
> # ./inb 0x4d0
> inb(0x4d0) = 0x0 = 0d = '^@'
> # ./inb 0x4d1
> inb(0x4d1) = 0xe = 14d = '^N'

Ok, this is good, it means you do have an ELCR.  Let me give you a quick patch 
to try.  This will be relative to your existing mptable.c file since i've 
committed the first mptable patch to current already.

--- //depot/vendor/freebsd/src/sys/i386/i386/io_apic.c	2004/08/02 15:35:28
+++ //depot/user/jhb/acpipci/i386/i386/io_apic.c	2005/01/18 02:26:39
@@ -423,7 +423,7 @@
 	 * them to be set to active low.
 	 *
 	 * XXX: Should we write to the ELCR if the trigger mode changes for
-	 * an EISA IRQ?
+	 * an EISA IRQ or an ISA IRQ with the ELCR present?
 	 */
 	if (intpin->io_bus == APIC_BUS_EISA)
 		pol = INTR_POLARITY_HIGH;
--- //depot/vendor/freebsd/src/sys/i386/i386/machdep.c	2004/11/27 06:55:50
+++ //depot/user/jhb/acpipci/i386/i386/machdep.c	2005/01/18 02:26:39
@@ -2098,6 +2098,7 @@
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
+	elcr_probe();
 	atpic_startup();
 #endif
 
--- //depot/vendor/freebsd/src/sys/i386/i386/mptable.c	2005/01/12 18:25:23
+++ //depot/user/jhb/acpipci/i386/i386/mptable.c	2005/01/18 02:26:39
@@ -580,12 +580,18 @@
 	KASSERT(src_bus <= mptable_maxbusid, ("bus id %d too large", src_bus));
 	switch (busses[src_bus].bus_type) {
 	case ISA:
-		return (INTR_TRIGGER_EDGE);
+#ifndef PC98
+		if (elcr_found)
+			return (elcr_read_trigger(src_bus_irq));
+		else
+#endif
+			return (INTR_TRIGGER_EDGE);
 	case PCI:
 		return (INTR_TRIGGER_LEVEL);
 #ifndef PC98
 	case EISA:
 		KASSERT(src_bus_irq < 16, ("Invalid EISA IRQ %d", src_bus_irq));
+		KASSERT(elcr_found, ("Missing ELCR"));
 		return (elcr_read_trigger(src_bus_irq));
 #endif
 	default:
--- //depot/vendor/freebsd/src/sys/i386/include/intr_machdep.h	2004/12/23 
20:35:42
+++ //depot/user/jhb/acpipci/i386/include/intr_machdep.h	2005/01/18 02:26:39
@@ -84,6 +84,7 @@
 struct intrframe;
 
 extern struct mtx icu_lock;
+extern int elcr_found;
 
 /* XXX: The elcr_* prototypes probably belong somewhere else. */
 int	elcr_probe(void);
--- //depot/vendor/freebsd/src/sys/i386/isa/atpic.c	2004/08/02 15:35:28
+++ //depot/user/jhb/acpipci/i386/isa/atpic.c	2005/01/18 02:26:39
@@ -112,9 +112,6 @@
 static void	atpic_init(void *dummy);
 
 unsigned int imen;	/* XXX */
-#ifndef PC98
-static int using_elcr;
-#endif
 
 inthand_t
 	IDTVEC(atpic_intr0), IDTVEC(atpic_intr1), IDTVEC(atpic_intr2),
@@ -313,7 +310,7 @@
 	if (ai->at_irq == 0) {
 		i8259_init(ap, ap == &atpics[SLAVE]);
 #ifndef PC98
-		if (ap == &atpics[SLAVE] && using_elcr)
+		if (ap == &atpics[SLAVE] && elcr_found)
 			elcr_resume();
 #endif
 	}
@@ -369,7 +366,7 @@
 			    vector);
 		return (EINVAL);
 	}
-	if (!using_elcr) {
+	if (!elcr_found) {
 		if (bootverbose)
 			printf("atpic: No ELCR to configure IRQ%u as %s\n",
 			    vector, trig == INTR_TRIGGER_EDGE ? "edge/high" :
@@ -492,8 +489,7 @@
 	 * assume level trigger for any interrupt that we aren't sure is
 	 * edge triggered.
 	 */
-	if (elcr_probe() == 0) {
-		using_elcr = 1;
+	if (elcr_found) {
 		for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++)
 			ai->at_trigger = elcr_read_trigger(i);
 	} else {
--- //depot/vendor/freebsd/src/sys/i386/isa/elcr.c	2004/05/04 20:10:24
+++ //depot/user/jhb/acpipci/i386/isa/elcr.c	2005/01/18 02:26:39
@@ -57,9 +57,7 @@
 #define	ELCR_MASK(irq)	(1 << (irq))
 
 static int elcr_status;
-#ifdef INVARIANTS
-static int elcr_found;
-#endif
+int elcr_found;
 
 /*
  * Check to see if we have what looks like a valid ELCR.  We do this by
@@ -88,9 +86,7 @@
 	}
 	if (resource_disabled("elcr", 0))
 		return (ENXIO);
-#ifdef INVARIANTS
 	elcr_found = 1;
-#endif
 	return (0);
 }
 

-- 
John Baldwin <jhb at FreeBSD.org>  <><  http://www.FreeBSD.org/~jhb/
"Power Users Use the Power to Serve"  =  http://www.FreeBSD.org


More information about the freebsd-smp mailing list