KDB entry on NMI

Konstantin Belousov kostikbel at gmail.com
Fri Jul 18 16:07:15 UTC 2014


It was mentioned somewhere recently, that typical BIOS today configures
NMI delivery on the hardware events as broadcast.  When I developerd
the dmar(4) busdma backend, I indeed met the problem, and wrote a
prototype which avoided startup of ddb on all cores.  Instead, the patch
implements custom spinlock, which allows only one core to win, other
cores ignore the NMI, by spinning on lock.

The issue which I see on at least two different machines with different
Intel chipsets, is that NMI is somehow sticky, i.e. it is re-delivered
after the handler executes iret.  I am not sure what the problem is,
whether it is due to hardware needing some ACK, or a bug in code.

Anyway, even on two-cores machine, having both cores simultaneously
enter NMI makes the use of ddb impossible, so I believe the patch is
improvement.  I make measures to ensure that reboot from ddb prompt
works.

Thought ?

diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 9b12449..76b992a 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -32,14 +32,18 @@ __FBSDID("$FreeBSD$");
 #include "opt_kstack_pages.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
+#include "opt_isa.h"
+#include "opt_kdb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
+#include <sys/cpu.h>
 #include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
+#include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
@@ -60,6 +64,7 @@ __FBSDID("$FreeBSD$");
 
 #include <x86/apicreg.h>
 #include <machine/clock.h>
+#include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/cpufunc.h>
 #include <x86/mca.h>
@@ -164,6 +169,7 @@ static int cpu_logical;			/* logical cpus per core */
 static int cpu_cores;			/* cores per package */
 
 static void	assign_cpu_ids(void);
+static void	cpustop_handler_post(u_int cpu);
 static void	set_interrupt_apic_ids(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
@@ -1415,26 +1421,44 @@ ipi_nmi_handler()
 	cpustop_handler();
 	return (0);
 }
-     
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpustop_handler(void)
-{
-	u_int cpu;
 
-	cpu = PCPU_GET(cpuid);
+#ifdef DEV_ISA
+static int nmi_kdb_lock;
+#endif
 
-	savectx(&stoppcbs[cpu]);
+#ifdef DEV_ISA
+bool
+nmi_call_kdb_smp(u_int type, int code, struct trapframe *frame, bool do_panic)
+{
+	int cpu;
+	bool call_post, ret;
 
-	/* Indicate that we are stopped */
-	CPU_SET_ATOMIC(cpu, &stopped_cpus);
+	call_post = false;
+	cpu = PCPU_GET(cpuid);
+	if (atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) {
+		ret = nmi_call_kdb(cpu, type, code, frame, do_panic);
+	} else {
+		ret = true;
+		savectx(&stoppcbs[cpu]);
+		while (!atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) {
+			if (CPU_ISSET(cpu, &ipi_nmi_pending)) {
+				CPU_CLR_ATOMIC(cpu, &ipi_nmi_pending);
+				call_post = true;
+			}
+			cpustop_handler_post(cpu);
+			cpu_spinwait();
+		}
+	}
+	atomic_store_rel_int(&nmi_kdb_lock, 0);
+	if (call_post)
+		cpustop_handler_post(cpu);
+	return (ret);
+}
+#endif
 
-	/* Wait for restart */
-	while (!CPU_ISSET(cpu, &started_cpus))
-	    ia32_pause();
+static void
+cpustop_handler_post(u_int cpu)
+{
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
@@ -1450,6 +1474,25 @@ cpustop_handler(void)
 }
 
 /*
+ * Handle an IPI_STOP by saving our current context and spinning until we
+ * are resumed.
+ */
+void
+cpustop_handler(void)
+{
+	u_int cpu;
+
+	cpu = PCPU_GET(cpuid);
+	savectx(&stoppcbs[cpu]);
+	/* Indicate that we are stopped */
+	CPU_SET_ATOMIC(cpu, &stopped_cpus);
+	/* Wait for restart */
+	while (!CPU_ISSET(cpu, &started_cpus))
+	    ia32_pause();
+	cpustop_handler_post(cpu);
+}
+
+/*
  * Handle an IPI_SUSPEND by saving our current context and spinning until we
  * are resumed.
  */
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index d9203bc..6fa576e 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -74,6 +74,7 @@ PMC_SOFT_DEFINE( , , page_fault, all);
 PMC_SOFT_DEFINE( , , page_fault, read);
 PMC_SOFT_DEFINE( , , page_fault, write);
 #endif
+#include <dev/pci/pcivar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -158,6 +159,44 @@ SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN,
     &uprintf_signal, 0,
     "Print debugging information on trap signal to ctty");
 
+#ifdef DEV_ISA
+bool
+nmi_call_kdb(u_int cpu, u_int type, int code, struct trapframe *frame,
+    bool do_panic)
+{
+
+	/* machine/parity/power fail/"kitchen sink" faults */
+	if (isa_nmi(code) == 0) {
+#ifdef KDB
+		/*
+		 * NMI can be hooked up to a pushbutton for debugging.
+		 */
+		if (kdb_on_nmi) {
+			printf ("NMI/cpu%d ... going to debugger\n", cpu);
+			kdb_trap(type, 0, frame);
+			return (true);
+		}
+	} else
+#endif /* KDB */
+	if (do_panic)
+		panic("NMI indicates hardware failure");
+	return (false);
+}
+#endif
+
+static int
+handle_nmi_intr(u_int type, int code, struct trapframe *frame, bool panic)
+{
+
+#ifdef DEV_ISA
+#ifdef SMP
+	return (nmi_call_kdb_smp(type, code, frame, panic));
+#else
+	return (nmi_call_kdb(0, type, code, frame, panic));
+#endif
+#endif
+}
+
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
@@ -357,25 +396,9 @@ trap(struct trapframe *frame)
 			i = SIGFPE;
 			break;
 
-#ifdef DEV_ISA
 		case T_NMI:
-			/* machine/parity/power fail/"kitchen sink" faults */
-			if (isa_nmi(code) == 0) {
-#ifdef KDB
-				/*
-				 * NMI can be hooked up to a pushbutton
-				 * for debugging.
-				 */
-				if (kdb_on_nmi) {
-					printf ("NMI ... going to debugger\n");
-					kdb_trap(type, 0, frame);
-				}
-#endif /* KDB */
-				goto userout;
-			} else if (panic_on_nmi)
-				panic("NMI indicates hardware failure");
+			handle_nmi_intr(type, code, frame, true);
 			break;
-#endif /* DEV_ISA */
 
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF;
@@ -543,25 +566,11 @@ trap(struct trapframe *frame)
 #endif
 			break;
 
-#ifdef DEV_ISA
 		case T_NMI:
-			/* machine/parity/power fail/"kitchen sink" faults */
-			if (isa_nmi(code) == 0) {
-#ifdef KDB
-				/*
-				 * NMI can be hooked up to a pushbutton
-				 * for debugging.
-				 */
-				if (kdb_on_nmi) {
-					printf ("NMI ... going to debugger\n");
-					kdb_trap(type, 0, frame);
-				}
-#endif /* KDB */
-				goto out;
-			} else if (panic_on_nmi == 0)
+			if (handle_nmi_intr(type, code, frame, false) ||
+			    !panic_on_nmi)
 				goto out;
 			/* FALLTHROUGH */
-#endif /* DEV_ISA */
 		}
 
 		trap_fatal(frame, 0);
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 5ddfbbd..da170f2 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -120,5 +120,9 @@ struct savefpu *get_pcb_user_save_td(struct thread *td);
 struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
 struct pcb *get_pcb_td(struct thread *td);
 void	amd64_db_resume_dbreg(void);
+bool	nmi_call_kdb(u_int cpu, u_int type, int code, struct trapframe *frame,
+	    bool panic);
+bool	nmi_call_kdb_smp(u_int type, int code, struct trapframe *frame,
+	    bool panic);
 
 #endif /* !_MACHINE_MD_VAR_H_ */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 819 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-arch/attachments/20140718/a2681273/attachment.sig>


More information about the freebsd-arch mailing list