svn commit: r196198 - in stable/8: . sys/amd64/amd64 sys/amd64/conf sys/amd64/include sys/conf sys/i386/conf sys/i386/i386 sys/i386/include sys/i386/xen sys/ia64/ia64 sys/ia64/include sys/kern sys/...

Attilio Rao attilio at FreeBSD.org
Thu Aug 13 17:54:13 UTC 2009


Author: attilio
Date: Thu Aug 13 17:54:11 2009
New Revision: 196198
URL: http://svn.freebsd.org/changeset/base/196198

Log:
  MFC r196196:
  
  * Completely remove the option STOP_NMI from the kernel.  This option
    has proven to have a good effect when entering KDB by using a NMI,
    but it completely violates all the good rules about interrupts
    disabled while holding a spinlock in other occasions.  This can be the
    cause of deadlocks on events where a normal IPI_STOP is expected.
  * Add an new IPI called IPI_STOP_HARD on all the supported architectures.
    This IPI is responsible for sending a stop message among CPUs using a
    privileged channel when disponible. In other cases it just does match a
    normal IPI_STOP.
    Right now the IPI_STOP_HARD functionality uses a NMI on ia32 and amd64
    architectures, while on the other has a normal IPI_STOP effect. It is
    responsibility of maintainers to eventually implement an hard stop
    when necessary and possible.
  * Use the new IPI facility in order to implement a new userend SMP kernel
    function called stop_cpus_hard(). That is specular to stop_cpu() but
    it does use the privileged channel for the stopping facility.
  * Let KDB use the newly introduced function stop_cpus_hard() and leave
    stop_cpus() for all the other cases
  * Disable interrupts on CPU0 when starting the process of APs suspension.
  * Style cleanup and comments adding
  
  This patch should fix the reboot/shutdown deadlocks many users are
  constantly reporting on mailing lists.
  
  Please don't forget to update your config file with the STOP_NMI
  option removal
  
  Reviewed by:  jhb
  Tested by:    pho, bz, rink
  Approved by:  re (kib)

Modified:
  stable/8/UPDATING   (contents, props changed)
  stable/8/sys/amd64/amd64/local_apic.c
  stable/8/sys/amd64/amd64/mp_machdep.c
  stable/8/sys/amd64/amd64/trap.c
  stable/8/sys/amd64/conf/GENERIC
  stable/8/sys/amd64/conf/NOTES
  stable/8/sys/amd64/conf/XENHVM
  stable/8/sys/amd64/include/apicvar.h
  stable/8/sys/amd64/include/smp.h
  stable/8/sys/conf/options.amd64
  stable/8/sys/conf/options.i386
  stable/8/sys/conf/options.pc98
  stable/8/sys/i386/conf/GENERIC
  stable/8/sys/i386/conf/NOTES
  stable/8/sys/i386/i386/local_apic.c
  stable/8/sys/i386/i386/mp_machdep.c
  stable/8/sys/i386/i386/trap.c
  stable/8/sys/i386/include/apicvar.h
  stable/8/sys/i386/include/smp.h
  stable/8/sys/i386/xen/mp_machdep.c
  stable/8/sys/ia64/ia64/interrupt.c
  stable/8/sys/ia64/include/smp.h
  stable/8/sys/kern/kern_shutdown.c
  stable/8/sys/kern/subr_kdb.c
  stable/8/sys/kern/subr_smp.c
  stable/8/sys/mips/include/smp.h
  stable/8/sys/mips/mips/mp_machdep.c
  stable/8/sys/pc98/conf/NOTES
  stable/8/sys/powerpc/include/smp.h
  stable/8/sys/powerpc/powerpc/mp_machdep.c
  stable/8/sys/sparc64/include/smp.h
  stable/8/sys/sun4v/include/smp.h
  stable/8/sys/sys/smp.h

Modified: stable/8/UPDATING
==============================================================================
--- stable/8/UPDATING	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/UPDATING	Thu Aug 13 17:54:11 2009	(r196198)
@@ -22,6 +22,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.
 	to maximize performance.  (To disable malloc debugging, run
 	ln -s aj /etc/malloc.conf.)
 
+20090813:
+	Remove the option STOP_NMI.  The default action is now to use NMI
+	only for KDB via the newly introduced function stop_cpus_hard()
+	and maintain stop_cpus() to just use a normal IPI_STOP on ia32
+	and amd64.
+
 20090803:
 	RELENG_8 branched.
 

Modified: stable/8/sys/amd64/amd64/local_apic.c
==============================================================================
--- stable/8/sys/amd64/amd64/local_apic.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/amd64/amd64/local_apic.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -1238,8 +1238,17 @@ lapic_ipi_vectored(u_int vector, int des
 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
 	    ("%s: invalid vector %d", __func__, vector));
 
-	icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY |
-	    APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE;
+	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
+
+	/*
+	 * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
+	 * Use special rules regard NMI if passed, otherwise specify
+	 * the vector.
+	 */
+	if (vector == IPI_STOP_HARD)
+		icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
+	else
+		icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
 	destfield = 0;
 	switch (dest) {
 	case APIC_IPI_DEST_SELF:

Modified: stable/8/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- stable/8/sys/amd64/amd64/mp_machdep.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/amd64/amd64/mp_machdep.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -114,31 +114,12 @@ volatile int smp_tlb_wait;
 
 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
 
-#ifdef STOP_NMI
-static volatile cpumask_t ipi_nmi_pending;
-
-static void	ipi_nmi_selected(cpumask_t cpus);
-#endif 
-
 /*
  * Local data and functions.
  */
 
-#ifdef STOP_NMI
-/* 
- * Provide an alternate method of stopping other CPUs. If another CPU has
- * disabled interrupts the conventional STOP IPI will be blocked. This 
- * NMI-based stop should get through in that case.
- */
-static int stop_cpus_with_nmi = 1;
-SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
-    &stop_cpus_with_nmi, 0, "");
-TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
-#else
-#define	stop_cpus_with_nmi	0
-#endif
-
 static u_int logical_cpus;
+static volatile cpumask_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -1158,12 +1139,14 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 		ipi = IPI_BITMAP_VECTOR;
 	}
 
-#ifdef STOP_NMI
-	if (ipi == IPI_STOP && stop_cpus_with_nmi) {
-		ipi_nmi_selected(cpus);
-		return;
-	}
-#endif
+	/*
+	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+	 * of help in order to understand what is the source.
+	 * Set the mask of receiving CPUs for this purpose.
+	 */
+	if (ipi == IPI_STOP_HARD)
+		atomic_set_int(&ipi_nmi_pending, cpus);
+
 	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 	while ((cpu = ffs(cpus)) != 0) {
 		cpu--;
@@ -1194,64 +1177,43 @@ void
 ipi_all_but_self(u_int ipi)
 {
 
-	if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
+	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(PCPU_GET(other_cpus), ipi);
 		return;
 	}
-	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
-	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
-}
 
-#ifdef STOP_NMI
-/*
- * send NMI IPI to selected CPUs
- */
-
-#define	BEFORE_SPIN	1000000
-
-static void
-ipi_nmi_selected(cpumask_t cpus)
-{
-	int cpu;
-	register_t icrlo;
-
-	icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 
-		| APIC_TRIGMOD_EDGE; 
-	
-	CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
-
-	atomic_set_int(&ipi_nmi_pending, cpus);
-
-	while ((cpu = ffs(cpus)) != 0) {
-		cpu--;
-		cpus &= ~(1 << cpu);
-
-		KASSERT(cpu_apic_ids[cpu] != -1,
-		    ("IPI NMI to non-existent CPU %d", cpu));
-		
-		/* Wait for an earlier IPI to finish. */
-		if (!lapic_ipi_wait(BEFORE_SPIN))
-			panic("ipi_nmi_selected: previous IPI has not cleared");
+	/*
+	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+	 * of help in order to understand what is the source.
+	 * Set the mask of receiving CPUs for this purpose.
+	 */
+	if (ipi == IPI_STOP_HARD)
+		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
 
-		lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
-	}
+	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
 int
-ipi_nmi_handler(void)
+ipi_nmi_handler()
 {
-	int cpumask = PCPU_GET(cpumask);
+	cpumask_t cpumask;
 
-	if (!(ipi_nmi_pending & cpumask))
-		return 1;
+	/*
+	 * As long as there is not a simple way to know about a NMI's
+	 * source, if the bitmask for the current CPU is present in
+	 * the global pending bitword an IPI_STOP_HARD has been issued
+	 * and should be handled.
+	 */
+	cpumask = PCPU_GET(cpumask);
+	if ((ipi_nmi_pending & cpumask) == 0)
+		return (1);
 
 	atomic_clear_int(&ipi_nmi_pending, cpumask);
 	cpustop_handler();
-	return 0;
+	return (0);
 }
      
-#endif /* STOP_NMI */
-
 /*
  * Handle an IPI_STOP by saving our current context and spinning until we
  * are resumed.

Modified: stable/8/sys/amd64/amd64/trap.c
==============================================================================
--- stable/8/sys/amd64/amd64/trap.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/amd64/amd64/trap.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -239,13 +239,11 @@ trap(struct trapframe *frame)
 	type = frame->tf_trapno;
 
 #ifdef SMP
-#ifdef STOP_NMI
 	/* Handler for NMI IPIs used for stopping CPUs. */
 	if (type == T_NMI) {
 	         if (ipi_nmi_handler() == 0)
 	                   goto out;
 	}
-#endif /* STOP_NMI */
 #endif /* SMP */
 
 #ifdef KDB

Modified: stable/8/sys/amd64/conf/GENERIC
==============================================================================
--- stable/8/sys/amd64/conf/GENERIC	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/amd64/conf/GENERIC	Thu Aug 13 17:54:11 2009	(r196198)
@@ -69,7 +69,6 @@ options 	P1003_1B_SEMAPHORES	# POSIX-sty
 options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
 options 	PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
-options 	STOP_NMI		# Stop CPUS using NMI instead of IPI
 options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 options 	MAC			# TrustedBSD MAC Framework

Modified: stable/8/sys/amd64/conf/NOTES
==============================================================================
--- stable/8/sys/amd64/conf/NOTES	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/amd64/conf/NOTES	Thu Aug 13 17:54:11 2009	(r196198)
@@ -30,11 +30,6 @@ device		mptable			# Optional MPSPEC mpta
 #
 options 	MP_WATCHDOG
 
-# 
-# Debugging options.
-#
-options 	STOP_NMI		# Stop CPUS using NMI instead of IPI
-
 
 
 #####################################################################

Modified: stable/8/sys/amd64/conf/XENHVM
==============================================================================
--- stable/8/sys/amd64/conf/XENHVM	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/amd64/conf/XENHVM	Thu Aug 13 17:54:11 2009	(r196198)
@@ -68,7 +68,6 @@ options 	SYSVMSG			# SYSV-style message 
 options 	SYSVSEM			# SYSV-style semaphores
 options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
-options 	STOP_NMI		# Stop CPUS using NMI instead of IPI
 options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 #options 	KDTRACE_FRAME		# Ensure frames are compiled in

Modified: stable/8/sys/amd64/include/apicvar.h
==============================================================================
--- stable/8/sys/amd64/include/apicvar.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/amd64/include/apicvar.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -102,11 +102,6 @@
  * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user 
  * at a time) The second group uses a single interrupt and a bitmap to avoid
  * redundant IPI interrupts.
- *
- * Right now IPI_STOP used by kdb shares the interrupt priority class with
- * the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock.
- * Eventually IPI_STOP should use NMI IPIs - this would eliminate this and
- * other deadlocks caused by IPI_STOP.
  */ 
 
 /* Interrupts for local APIC LVT entries other than the timer. */
@@ -134,6 +129,7 @@
 
 #define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
 #define	IPI_SUSPEND	(APIC_IPI_INTS + 8)	/* Suspend CPU until restarted. */
+#define	IPI_STOP_HARD	(APIC_IPI_INTS + 9)	/* Stop CPU with a NMI. */
 
 /*
  * The spurious interrupt can share the priority class with the IPIs since

Modified: stable/8/sys/amd64/include/smp.h
==============================================================================
--- stable/8/sys/amd64/include/smp.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/amd64/include/smp.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -52,6 +52,7 @@ void	cpu_add(u_int apic_id, char boot_cp
 void	cpustop_handler(void);
 void	cpususpend_handler(void);
 void	init_secondary(void);
+int	ipi_nmi_handler(void);
 void	ipi_selected(cpumask_t cpus, u_int ipi);
 void	ipi_all_but_self(u_int ipi);
 void 	ipi_bitmap_handler(struct trapframe frame);
@@ -66,10 +67,6 @@ void	smp_masked_invlpg_range(cpumask_t m
 void	smp_invltlb(void);
 void	smp_masked_invltlb(cpumask_t mask);
 
-#ifdef STOP_NMI
-int	ipi_nmi_handler(void);
-#endif
-
 #endif /* !LOCORE */
 #endif /* SMP */
 

Modified: stable/8/sys/conf/options.amd64
==============================================================================
--- stable/8/sys/conf/options.amd64	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/conf/options.amd64	Thu Aug 13 17:54:11 2009	(r196198)
@@ -52,7 +52,6 @@ PSM_DEBUG		opt_psm.h
 DEV_ATPIC		opt_atpic.h
 
 # Debugging
-STOP_NMI		opt_cpu.h
 KDTRACE_FRAME		opt_kdtrace.h
 
 # BPF just-in-time compiler

Modified: stable/8/sys/conf/options.i386
==============================================================================
--- stable/8/sys/conf/options.i386	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/conf/options.i386	Thu Aug 13 17:54:11 2009	(r196198)
@@ -110,7 +110,6 @@ ASR_COMPAT		opt_asr.h
 
 # Debugging
 NPX_DEBUG		opt_npx.h
-STOP_NMI		opt_cpu.h
 
 # BPF just-in-time compiler
 BPF_JITTER		opt_bpf.h

Modified: stable/8/sys/conf/options.pc98
==============================================================================
--- stable/8/sys/conf/options.pc98	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/conf/options.pc98	Thu Aug 13 17:54:11 2009	(r196198)
@@ -95,7 +95,6 @@ DEV_NPX			opt_npx.h
 
 # Debugging
 NPX_DEBUG		opt_npx.h
-STOP_NMI		opt_cpu.h
 AGP_DEBUG		opt_agp.h
 
 # BPF just-in-time compiler

Modified: stable/8/sys/i386/conf/GENERIC
==============================================================================
--- stable/8/sys/i386/conf/GENERIC	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/i386/conf/GENERIC	Thu Aug 13 17:54:11 2009	(r196198)
@@ -70,7 +70,6 @@ options 	P1003_1B_SEMAPHORES	# POSIX-sty
 options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
 options 	PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
-options 	STOP_NMI		# Stop CPUS using NMI instead of IPI
 options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 options 	MAC			# TrustedBSD MAC Framework

Modified: stable/8/sys/i386/conf/NOTES
==============================================================================
--- stable/8/sys/i386/conf/NOTES	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/i386/conf/NOTES	Thu Aug 13 17:54:11 2009	(r196198)
@@ -49,7 +49,6 @@ options 	MP_WATCHDOG
 
 # Debugging options.
 #
-options 	STOP_NMI		# Stop CPUS using NMI instead of IPI
 options 	COUNT_XINVLTLB_HITS	# Counters for TLB events
 options 	COUNT_IPIS		# Per-CPU IPI interrupt counters
 

Modified: stable/8/sys/i386/i386/local_apic.c
==============================================================================
--- stable/8/sys/i386/i386/local_apic.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/i386/i386/local_apic.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -1248,8 +1248,17 @@ lapic_ipi_vectored(u_int vector, int des
 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
 	    ("%s: invalid vector %d", __func__, vector));
 
-	icrlo = vector | APIC_DELMODE_FIXED | APIC_DESTMODE_PHY |
-	    APIC_LEVEL_DEASSERT | APIC_TRIGMOD_EDGE;
+	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
+
+	/*
+	 * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
+	 * Use special rules regard NMI if passed, otherwise specify
+	 * the vector.
+	 */
+	if (vector == IPI_STOP_HARD)
+		icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
+	else
+		icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
 	destfield = 0;
 	switch (dest) {
 	case APIC_IPI_DEST_SELF:

Modified: stable/8/sys/i386/i386/mp_machdep.c
==============================================================================
--- stable/8/sys/i386/i386/mp_machdep.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/i386/i386/mp_machdep.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -155,12 +155,6 @@ vm_offset_t smp_tlb_addr1;
 vm_offset_t smp_tlb_addr2;
 volatile int smp_tlb_wait;
 
-#ifdef STOP_NMI
-static volatile cpumask_t ipi_nmi_pending;
-
-static void	ipi_nmi_selected(cpumask_t cpus);
-#endif 
-
 #ifdef COUNT_IPIS
 /* Interrupt counts. */
 static u_long *ipi_preempt_counts[MAXCPU];
@@ -177,21 +171,8 @@ u_long *ipi_lazypmap_counts[MAXCPU];
  * Local data and functions.
  */
 
-#ifdef STOP_NMI
-/* 
- * Provide an alternate method of stopping other CPUs. If another CPU has
- * disabled interrupts the conventional STOP IPI will be blocked. This 
- * NMI-based stop should get through in that case.
- */
-static int stop_cpus_with_nmi = 1;
-SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW,
-    &stop_cpus_with_nmi, 0, "");
-TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi);
-#else
-#define	stop_cpus_with_nmi	0
-#endif
-
 static u_int logical_cpus;
+static volatile cpumask_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -1318,12 +1299,14 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 		ipi = IPI_BITMAP_VECTOR;
 	}
 
-#ifdef STOP_NMI
-	if (ipi == IPI_STOP && stop_cpus_with_nmi) {
-		ipi_nmi_selected(cpus);
-		return;
-	}
-#endif
+	/*
+	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+	 * of help in order to understand what is the source.
+	 * Set the mask of receiving CPUs for this purpose.
+	 */
+	if (ipi == IPI_STOP_HARD)
+		atomic_set_int(&ipi_nmi_pending, cpus);
+
 	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
 	while ((cpu = ffs(cpus)) != 0) {
 		cpu--;
@@ -1354,64 +1337,42 @@ void
 ipi_all_but_self(u_int ipi)
 {
 
-	if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
+	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(PCPU_GET(other_cpus), ipi);
 		return;
 	}
+
+	/*
+	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
+	 * of help in order to understand what is the source.
+	 * Set the mask of receiving CPUs for this purpose.
+	 */
+	if (ipi == IPI_STOP_HARD)
+		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
-#ifdef STOP_NMI
-/*
- * send NMI IPI to selected CPUs
- */
-
-#define	BEFORE_SPIN	1000000
-
-void
-ipi_nmi_selected(cpumask_t cpus)
-{
-	int cpu;
-	register_t icrlo;
-
-	icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 
-		| APIC_TRIGMOD_EDGE; 
-	
-	CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
-
-	atomic_set_int(&ipi_nmi_pending, cpus);
-
-	while ((cpu = ffs(cpus)) != 0) {
-		cpu--;
-		cpus &= ~(1 << cpu);
-
-		KASSERT(cpu_apic_ids[cpu] != -1,
-		    ("IPI NMI to non-existent CPU %d", cpu));
-		
-		/* Wait for an earlier IPI to finish. */
-		if (!lapic_ipi_wait(BEFORE_SPIN))
-			panic("ipi_nmi_selected: previous IPI has not cleared");
-
-		lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
-	}
-}
-
 int
-ipi_nmi_handler(void)
+ipi_nmi_handler()
 {
-	int cpumask = PCPU_GET(cpumask);
+	cpumask_t cpumask;
 
-	if (!(ipi_nmi_pending & cpumask))
-		return 1;
+	/*
+	 * As long as there is not a simple way to know about a NMI's
+	 * source, if the bitmask for the current CPU is present in
+	 * the global pending bitword an IPI_STOP_HARD has been issued
+	 * and should be handled.
+	 */
+	cpumask = PCPU_GET(cpumask);
+	if ((ipi_nmi_pending & cpumask) == 0)
+		return (1);
 
 	atomic_clear_int(&ipi_nmi_pending, cpumask);
 	cpustop_handler();
-	return 0;
+	return (0);
 }
 
-#endif /* STOP_NMI */
-
 /*
  * Handle an IPI_STOP by saving our current context and spinning until we
  * are resumed.

Modified: stable/8/sys/i386/i386/trap.c
==============================================================================
--- stable/8/sys/i386/i386/trap.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/i386/i386/trap.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -211,13 +211,11 @@ trap(struct trapframe *frame)
 	type = frame->tf_trapno;
 
 #ifdef SMP
-#ifdef STOP_NMI
 	/* Handler for NMI IPIs used for stopping CPUs. */
 	if (type == T_NMI) {
 	         if (ipi_nmi_handler() == 0)
 	                   goto out;
 	}
-#endif /* STOP_NMI */
 #endif /* SMP */
 
 #ifdef KDB

Modified: stable/8/sys/i386/include/apicvar.h
==============================================================================
--- stable/8/sys/i386/include/apicvar.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/i386/include/apicvar.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -100,11 +100,6 @@
  * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user 
  * at a time) The second group uses a single interrupt and a bitmap to avoid
  * redundant IPI interrupts.
- *
- * Right now IPI_STOP used by kdb shares the interrupt priority class with
- * the two IPI groups mentioned above. As such IPI_STOP may cause a deadlock.
- * Eventually IPI_STOP should use NMI IPIs - this would eliminate this and
- * other deadlocks caused by IPI_STOP.
  */ 
 
 /* Interrupts for local APIC LVT entries other than the timer. */
@@ -134,6 +129,7 @@
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
+#define	IPI_STOP_HARD	(APIC_IPI_INTS + 8)	/* Stop CPU with a NMI. */
 
 #else /* XEN */
 /* These are the normal i386 APIC definitions */
@@ -161,6 +157,7 @@
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
+#define	IPI_STOP_HARD	(APIC_IPI_INTS + 8)	/* Stop CPU with a NMI. */
 #endif /* XEN */
 
 /*

Modified: stable/8/sys/i386/include/smp.h
==============================================================================
--- stable/8/sys/i386/include/smp.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/i386/include/smp.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -60,7 +60,8 @@ inthand_t
 void	cpu_add(u_int apic_id, char boot_cpu);
 void	cpustop_handler(void);
 void	init_secondary(void);
-void	ipi_selected(u_int cpus, u_int ipi);
+int	ipi_nmi_handler(void);
+void	ipi_selected(cpumask_t cpus, u_int ipi);
 void	ipi_all_but_self(u_int ipi);
 #ifndef XEN
 void 	ipi_bitmap_handler(struct trapframe frame);
@@ -76,9 +77,6 @@ void	smp_masked_invlpg_range(cpumask_t m
 void	smp_invltlb(void);
 void	smp_masked_invltlb(cpumask_t mask);
 
-#ifdef STOP_NMI
-int	ipi_nmi_handler(void);
-#endif
 #ifdef XEN
 void ipi_to_irq_init(void);
 

Modified: stable/8/sys/i386/xen/mp_machdep.c
==============================================================================
--- stable/8/sys/i386/xen/mp_machdep.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/i386/xen/mp_machdep.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -90,8 +90,6 @@ __FBSDID("$FreeBSD$");
 #include <xen/hypervisor.h>
 #include <xen/interface/vcpu.h>
 
-#define stop_cpus_with_nmi	0
-
 
 int	mp_naps;		/* # of Applications processors */
 int	boot_cpu_id = -1;	/* designated BSP */

Modified: stable/8/sys/ia64/ia64/interrupt.c
==============================================================================
--- stable/8/sys/ia64/ia64/interrupt.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/ia64/ia64/interrupt.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -145,6 +145,8 @@ interrupt(struct trapframe *tf)
 	/*
 	 * Handle ExtINT interrupts by generating an INTA cycle to
 	 * read the vector.
+	 * IPI_STOP_HARD is mapped to IPI_STOP so it is not necessary
+	 * to add it to this switch-like construct.
 	 */
 	if (vector == 0) {
 		inta = ib->ib_inta;

Modified: stable/8/sys/ia64/include/smp.h
==============================================================================
--- stable/8/sys/ia64/include/smp.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/ia64/include/smp.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -21,6 +21,7 @@
 #define	IPI_AST			4
 #define	IPI_RENDEZVOUS		5
 #define	IPI_STOP		6
+#define	IPI_STOP_HARD		6
 #define	IPI_PREEMPT		7
 
 #define	IPI_COUNT		8

Modified: stable/8/sys/kern/kern_shutdown.c
==============================================================================
--- stable/8/sys/kern/kern_shutdown.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/kern/kern_shutdown.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -412,9 +412,6 @@ boot(int howto)
 	 */
 	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
 
-	/* XXX This doesn't disable interrupts any more.  Reconsider? */
-	splhigh();
-
 	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 
 		doadump();
 
@@ -488,6 +485,13 @@ static void
 shutdown_reset(void *junk, int howto)
 {
 
+	/*
+	 * Disable interrupts on CPU0 in order to avoid fast handlers
+	 * to preempt the stopping process and to deadlock against other
+	 * CPUs.
+	 */
+	spinlock_enter();
+
 	printf("Rebooting...\n");
 	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
 	/* cpu_boot(howto); */ /* doesn't do anything at the moment */

Modified: stable/8/sys/kern/subr_kdb.c
==============================================================================
--- stable/8/sys/kern/subr_kdb.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/kern/subr_kdb.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -88,7 +88,8 @@ SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_c
  * Flag indicating whether or not to IPI the other CPUs to stop them on
  * entering the debugger.  Sometimes, this will result in a deadlock as
  * stop_cpus() waits for the other cpus to stop, so we allow it to be
- * disabled.
+ * disabled.  In order to maximize the chances of success, use a hard
+ * stop for that.
  */
 #ifdef SMP
 static int kdb_stop_cpus = 1;
@@ -226,7 +227,7 @@ kdb_panic(const char *msg)
 {
 	
 #ifdef SMP
-	stop_cpus(PCPU_GET(other_cpus));
+	stop_cpus_hard(PCPU_GET(other_cpus));
 #endif
 	printf("KDB: panic\n");
 	panic(msg);
@@ -518,7 +519,7 @@ kdb_trap(int type, int code, struct trap
 
 #ifdef SMP
 	if ((did_stop_cpus = kdb_stop_cpus) != 0)
-		stop_cpus(PCPU_GET(other_cpus));
+		stop_cpus_hard(PCPU_GET(other_cpus));
 #endif
 
 	kdb_active++;

Modified: stable/8/sys/kern/subr_smp.c
==============================================================================
--- stable/8/sys/kern/subr_smp.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/kern/subr_smp.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -233,18 +233,21 @@ forward_roundrobin(void)
  * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
  *            from executing at same time.
  */
-int
-stop_cpus(cpumask_t map)
+static int
+generic_stop_cpus(cpumask_t map, u_int type)
 {
 	int i;
 
+	KASSERT(type == IPI_STOP || type == IPI_STOP_HARD,
+	    ("%s: invalid stop type", __func__));
+
 	if (!smp_started)
 		return 0;
 
-	CTR1(KTR_SMP, "stop_cpus(%x)", map);
+	CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type);
 
 	/* send the stop IPI to all CPUs in map */
-	ipi_selected(map, IPI_STOP);
+	ipi_selected(map, type);
 
 	i = 0;
 	while ((stopped_cpus & map) != map) {
@@ -262,6 +265,20 @@ stop_cpus(cpumask_t map)
 	return 1;
 }
 
+int
+stop_cpus(cpumask_t map)
+{
+
+	return (generic_stop_cpus(map, IPI_STOP));
+}
+
+int
+stop_cpus_hard(cpumask_t map)
+{
+
+	return (generic_stop_cpus(map, IPI_STOP_HARD));
+}
+
 #if defined(__amd64__)
 /*
  * When called the executing CPU will send an IPI to all other CPUs

Modified: stable/8/sys/mips/include/smp.h
==============================================================================
--- stable/8/sys/mips/include/smp.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/mips/include/smp.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -24,6 +24,7 @@
 #define	IPI_RENDEZVOUS		0x0002
 #define	IPI_AST			0x0004
 #define	IPI_STOP		0x0008
+#define	IPI_STOP_HARD		0x0008
 
 #ifndef LOCORE
 

Modified: stable/8/sys/mips/mips/mp_machdep.c
==============================================================================
--- stable/8/sys/mips/mips/mp_machdep.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/mips/mips/mp_machdep.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -129,7 +129,12 @@ smp_handle_ipi(struct trapframe *frame)
 			break;
 
 		case IPI_STOP:
-			CTR0(KTR_SMP, "IPI_STOP");
+
+			/*
+			 * IPI_STOP_HARD is mapped to IPI_STOP so it is not
+			 * necessary to add it in the switch.
+			 */
+			CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD");
 			atomic_set_int(&stopped_cpus, cpumask);
 
 			while ((started_cpus & cpumask) == 0)

Modified: stable/8/sys/pc98/conf/NOTES
==============================================================================
--- stable/8/sys/pc98/conf/NOTES	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/pc98/conf/NOTES	Thu Aug 13 17:54:11 2009	(r196198)
@@ -29,10 +29,6 @@ device		apic			# I/O apic
 #
 options 	MP_WATCHDOG
 
-# Debugging options.
-#
-options 	STOP_NMI		# Stop CPUS using NMI instead of IPI
-
 
 
 #####################################################################

Modified: stable/8/sys/powerpc/include/smp.h
==============================================================================
--- stable/8/sys/powerpc/include/smp.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/powerpc/include/smp.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -35,6 +35,7 @@
 #define	IPI_PREEMPT		1
 #define	IPI_RENDEZVOUS		2
 #define	IPI_STOP		3
+#define	IPI_STOP_HARD		3
 
 #ifndef LOCORE
 

Modified: stable/8/sys/powerpc/powerpc/mp_machdep.c
==============================================================================
--- stable/8/sys/powerpc/powerpc/mp_machdep.c	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/powerpc/powerpc/mp_machdep.c	Thu Aug 13 17:54:11 2009	(r196198)
@@ -281,7 +281,13 @@ powerpc_ipi_handler(void *arg)
 			smp_rendezvous_action();
 			break;
 		case IPI_STOP:
-			CTR1(KTR_SMP, "%s: IPI_STOP (stop)", __func__);
+
+			/*
+			 * IPI_STOP_HARD is mapped to IPI_STOP so it is not
+			 * necessary to add such case in the switch.
+			 */
+			CTR1(KTR_SMP, "%s: IPI_STOP or IPI_STOP_HARD (stop)",
+			    __func__);
 			self = PCPU_GET(cpumask);
 			savectx(PCPU_GET(curpcb));
 			atomic_set_int(&stopped_cpus, self);

Modified: stable/8/sys/sparc64/include/smp.h
==============================================================================
--- stable/8/sys/sparc64/include/smp.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/sparc64/include/smp.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -56,6 +56,7 @@
 #define	IPI_RENDEZVOUS	PIL_RENDEZVOUS
 #define	IPI_PREEMPT	PIL_PREEMPT
 #define	IPI_STOP	PIL_STOP
+#define	IPI_STOP_HARD	PIL_STOP
 
 #define	IPI_RETRIES	5000
 

Modified: stable/8/sys/sun4v/include/smp.h
==============================================================================
--- stable/8/sys/sun4v/include/smp.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/sun4v/include/smp.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -44,6 +44,7 @@
 #define	IPI_AST		PIL_AST
 #define	IPI_RENDEZVOUS	PIL_RENDEZVOUS
 #define	IPI_STOP	PIL_STOP
+#define	IPI_STOP_HARD	PIL_STOP
 #define IPI_PREEMPT     PIL_PREEMPT
 
 

Modified: stable/8/sys/sys/smp.h
==============================================================================
--- stable/8/sys/sys/smp.h	Thu Aug 13 17:51:26 2009	(r196197)
+++ stable/8/sys/sys/smp.h	Thu Aug 13 17:54:11 2009	(r196198)
@@ -123,6 +123,7 @@ void	forward_signal(struct thread *);
 void	forward_roundrobin(void);
 int	restart_cpus(cpumask_t);
 int	stop_cpus(cpumask_t);
+int	stop_cpus_hard(cpumask_t);
 #if defined(__amd64__)
 int	suspend_cpus(cpumask_t);
 #endif


More information about the svn-src-stable mailing list