svn commit: r331606 - in head/sys: amd64/include i386/include x86/x86 x86/xen

Jeff Roberson jroberson at jroberson.net
Tue Mar 27 21:08:33 UTC 2018


On Tue, 27 Mar 2018, Li-Wen Hsu wrote:

> On Mon, Mar 26, 2018 at 20:35:12 -1000, Jeff Roberson wrote:
>> The patch has been on my branch for weeks and has been tested by a half
>> dozen people.  I'm sorry it does not work for you.  If you reverted 331605
>> the change that followed should not have built properly.  Did you build
>> cleanly?  Can you share your kernel config?
>>
>> I tried with and without EARLY_AP_STARTUP and with and without NUMA.  I'm
>> not having any trouble booting.  Did you make cleandepend && make depend?
>
> It also hangs in our testing system:
>
> https://ci.freebsd.org/job/FreeBSD-head-amd64-test/6817/console
> https://ci.freebsd.org/job/FreeBSD-head-i386-test/1000/console
>
> It is built cleanly and uses unmodified GENERIC config.
>
> The artifacts used are available here:
>
> https://artifact.ci.freebsd.org/snapshot/head/r331606/amd64/amd64/
> https://artifact.ci.freebsd.org/snapshot/head/r331606/i386/i386/
>
> Hope these information help.

Could someone who was experiencing the hang try the enclosed patch?  I can 
only verify that it continues to boot for me but I believe this fixes the 
bug on other systems.

I believe the issue was that cpuset_domain[0] was initialized too late on 
some systems.  It depended on what kind of hardware was present and which 
sysinit with SI_ORDER_ANY ran first.

Thanks,
Jeff

>
> Li-Wen
>
> -- 
> Li-Wen Hsu <lwhsu at FreeBSD.org>
> https://lwhsu.org
>
-------------- next part --------------
Index: amd64/include/intr_machdep.h
===================================================================
--- amd64/include/intr_machdep.h	(revision 331610)
+++ amd64/include/intr_machdep.h	(working copy)
@@ -132,6 +132,7 @@ struct intsrc {
 	u_long *is_straycount;
 	u_int is_index;
 	u_int is_handlers;
+	u_int is_domain;
 	u_int is_cpu;
 };
 
@@ -168,7 +169,7 @@ void	intr_add_cpu(u_int cpu);
 #endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter, 
 			 driver_intr_t handler, void *arg, enum intr_type flags, 
-			 void **cookiep);    
+			 void **cookiep, int domain);    
 #ifdef SMP
 int	intr_bind(u_int vector, u_char cpu);
 #endif
@@ -176,7 +177,7 @@ int	intr_config_intr(int vector, enum intr_trigger
     enum intr_polarity pol);
 int	intr_describe(u_int vector, void *ih, const char *descr);
 void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
-u_int	intr_next_cpu(void);
+u_int	intr_next_cpu(int domain);
 struct intsrc *intr_lookup_source(int vector);
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);
Index: i386/include/intr_machdep.h
===================================================================
--- i386/include/intr_machdep.h	(revision 331610)
+++ i386/include/intr_machdep.h	(working copy)
@@ -132,6 +132,7 @@ struct intsrc {
 	u_long *is_straycount;
 	u_int is_index;
 	u_int is_handlers;
+	u_int is_domain;
 	u_int is_cpu;
 };
 
@@ -158,7 +159,8 @@ void	elcr_write_trigger(u_int irq, enum intr_trigg
 void	intr_add_cpu(u_int cpu);
 #endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter,
-    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
+    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
+    int domain);
 #ifdef SMP
 int	intr_bind(u_int vector, u_char cpu);
 #endif
@@ -166,7 +168,7 @@ int	intr_config_intr(int vector, enum intr_trigger
     enum intr_polarity pol);
 int	intr_describe(u_int vector, void *ih, const char *descr);
 void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
-u_int	intr_next_cpu(void);
+u_int	intr_next_cpu(int domain);
 struct intsrc *intr_lookup_source(int vector);
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);
Index: kern/kern_cpuset.c
===================================================================
--- kern/kern_cpuset.c	(revision 331604)
+++ kern/kern_cpuset.c	(working copy)
@@ -1363,6 +1363,7 @@ cpuset_thread0(void)
 {
 	struct cpuset *set;
 	int error;
+	int i;
 
 	cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
@@ -1374,11 +1375,11 @@ cpuset_thread0(void)
 	 * cpuset_create() due to NULL parent.
 	 */
 	set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO);
-	CPU_FILL(&set->cs_mask);
+	CPU_COPY(&all_cpus, &set->cs_mask);
 	LIST_INIT(&set->cs_children);
 	LIST_INSERT_HEAD(&cpuset_ids, set, cs_link);
 	set->cs_ref = 1;
-	set->cs_flags = CPU_SET_ROOT;
+	set->cs_flags = CPU_SET_ROOT | CPU_SET_RDONLY;
 	set->cs_domain = &domainset0;
 	cpuset_zero = set;
 	cpuset_root = &set->cs_mask;
@@ -1396,6 +1397,16 @@ cpuset_thread0(void)
 	 */
 	cpuset_unr = new_unrhdr(2, INT_MAX, NULL);
 
+	/*
+	 * If MD code has not initialized per-domain cpusets, place all
+	 * CPUs in domain 0.
+	 */
+	for (i = 0; i < MAXMEMDOM; i++)
+		if (!CPU_EMPTY(&cpuset_domain[i]))
+			goto domains_set;
+	CPU_COPY(&all_cpus, &cpuset_domain[0]);
+domains_set:
+
 	return (set);
 }
 
@@ -1447,34 +1458,6 @@ cpuset_setproc_update_set(struct proc *p, struct c
 	return (0);
 }
 
-/*
- * This is called once the final set of system cpus is known.  Modifies
- * the root set and all children and mark the root read-only.  
- */
-static void
-cpuset_init(void *arg)
-{
-	cpuset_t mask;
-	int i;
-
-	mask = all_cpus;
-	if (cpuset_modify(cpuset_zero, &mask))
-		panic("Can't set initial cpuset mask.\n");
-	cpuset_zero->cs_flags |= CPU_SET_RDONLY;
-
-	/*
-	 * If MD code has not initialized per-domain cpusets, place all
-	 * CPUs in domain 0.
-	 */
-	for (i = 0; i < MAXMEMDOM; i++)
-		if (!CPU_EMPTY(&cpuset_domain[i]))
-			goto domains_set;
-	CPU_COPY(&all_cpus, &cpuset_domain[0]);
-domains_set:
-	return;
-}
-SYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL);
-
 #ifndef _SYS_SYSPROTO_H_
 struct cpuset_args {
 	cpusetid_t	*setid;
Index: x86/x86/intr_machdep.c
===================================================================
--- x86/x86/intr_machdep.c	(revision 331610)
+++ x86/x86/intr_machdep.c	(working copy)
@@ -71,6 +71,8 @@
 #include <isa/isareg.h>
 #endif
 
+#include <vm/vm.h>
+
 #define	MAX_STRAY_LOG	5
 
 typedef void (*mask_fn)(void *);
@@ -185,7 +187,8 @@ intr_lookup_source(int vector)
 
 int
 intr_add_handler(const char *name, int vector, driver_filter_t filter,
-    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
+    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
+    int domain)
 {
 	struct intsrc *isrc;
 	int error;
@@ -200,6 +203,7 @@ intr_add_handler(const char *name, int vector, dri
 		intrcnt_updatename(isrc);
 		isrc->is_handlers++;
 		if (isrc->is_handlers == 1) {
+			isrc->is_domain = domain;
 			isrc->is_pic->pic_enable_intr(isrc);
 			isrc->is_pic->pic_enable_source(isrc);
 		}
@@ -507,14 +511,27 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  */
 
 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
-static int current_cpu;
+static int current_cpu[MAXMEMDOM];
 
+static void
+intr_init_cpus(void)
+{
+	int i;
+
+	for (i = 0; i < vm_ndomains; i++) {
+		current_cpu[i] = 0;
+		if (!CPU_ISSET(current_cpu[i], &intr_cpus) ||
+		    !CPU_ISSET(current_cpu[i], &cpuset_domain[i]))
+			intr_next_cpu(i);
+	}
+}
+
 /*
  * Return the CPU that the next interrupt source should use.  For now
  * this just returns the next local APIC according to round-robin.
  */
 u_int
-intr_next_cpu(void)
+intr_next_cpu(int domain)
 {
 	u_int apic_id;
 
@@ -529,12 +546,13 @@ u_int
 #endif
 
 	mtx_lock_spin(&icu_lock);
-	apic_id = cpu_apic_ids[current_cpu];
+	apic_id = cpu_apic_ids[current_cpu[domain]];
 	do {
-		current_cpu++;
-		if (current_cpu > mp_maxid)
-			current_cpu = 0;
-	} while (!CPU_ISSET(current_cpu, &intr_cpus));
+		current_cpu[domain]++;
+		if (current_cpu[domain] > mp_maxid)
+			current_cpu[domain] = 0;
+	} while (!CPU_ISSET(current_cpu[domain], &intr_cpus) ||
+	    !CPU_ISSET(current_cpu[domain], &cpuset_domain[domain]));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
@@ -568,7 +586,18 @@ intr_add_cpu(u_int cpu)
 	CPU_SET(cpu, &intr_cpus);
 }
 
-#ifndef EARLY_AP_STARTUP
+#ifdef EARLY_AP_STARTUP
+static void
+intr_smp_startup(void *arg __unused)
+{
+
+	intr_init_cpus();
+	return;
+}
+SYSINIT(intr_smp_startup, SI_SUB_SMP, SI_ORDER_SECOND, intr_smp_startup,
+    NULL);
+
+#else
 /*
  * Distribute all the interrupt sources among the available CPUs once the
  * AP's have been launched.
@@ -580,6 +609,7 @@ intr_shuffle_irqs(void *arg __unused)
 	u_int cpu;
 	int i;
 
+	intr_init_cpus();
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
@@ -599,12 +629,12 @@ intr_shuffle_irqs(void *arg __unused)
 			 */
 			cpu = isrc->is_event->ie_cpu;
 			if (cpu == NOCPU)
-				cpu = current_cpu;
+				cpu = current_cpu[isrc->is_domain];
 			if (isrc->is_pic->pic_assign_cpu(isrc,
 			    cpu_apic_ids[cpu]) == 0) {
 				isrc->is_cpu = cpu;
 				if (isrc->is_event->ie_cpu == NOCPU)
-					intr_next_cpu();
+					intr_next_cpu(isrc->is_domain);
 			}
 		}
 	}
@@ -635,10 +665,11 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
 		isrc = interrupt_sources[i];
 		if (isrc == NULL)
 			continue;
-		sbuf_printf(&sbuf, "%s:%d @%d: %ld\n",
+		sbuf_printf(&sbuf, "%s:%d @cpu%d(domain%d): %ld\n",
 		    isrc->is_event->ie_fullname,
 		    isrc->is_index,
 		    isrc->is_cpu,
+		    isrc->is_domain,
 		    *isrc->is_count);
 	}
 
@@ -697,7 +728,7 @@ intr_balance(void *dummy __unused, int pending __u
 	 * Restart the scan from the same location to avoid moving in the
 	 * common case.
 	 */
-	current_cpu = 0;
+	intr_init_cpus();
 
 	/*
 	 * Assign round-robin from most loaded to least.
@@ -706,8 +737,8 @@ intr_balance(void *dummy __unused, int pending __u
 		isrc = interrupt_sorted[i];
 		if (isrc == NULL  || isrc->is_event->ie_cpu != NOCPU)
 			continue;
-		cpu = current_cpu;
-		intr_next_cpu();
+		cpu = current_cpu[isrc->is_domain];
+		intr_next_cpu(isrc->is_domain);
 		if (isrc->is_cpu != cpu &&
 		    isrc->is_pic->pic_assign_cpu(isrc,
 		    cpu_apic_ids[cpu]) == 0)
@@ -735,7 +766,7 @@ SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_AN
  * Always route interrupts to the current processor in the UP case.
  */
 u_int
-intr_next_cpu(void)
+intr_next_cpu(int domain)
 {
 
 	return (PCPU_GET(apic_id));
Index: x86/x86/io_apic.c
===================================================================
--- x86/x86/io_apic.c	(revision 331610)
+++ x86/x86/io_apic.c	(working copy)
@@ -499,7 +499,7 @@ ioapic_enable_intr(struct intsrc *isrc)
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 
 	if (intpin->io_vector == 0)
-		if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0)
+		if (ioapic_assign_cpu(isrc, intr_next_cpu(isrc->is_domain)) != 0)
 			panic("Couldn't find an APIC vector for IRQ %d",
 			    intpin->io_irq);
 	apic_enable_vector(intpin->io_cpu, intpin->io_vector);
Index: x86/x86/msi.c
===================================================================
--- x86/x86/msi.c	(revision 331610)
+++ x86/x86/msi.c	(working copy)
@@ -363,7 +363,7 @@ int
 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
 {
 	struct msi_intsrc *msi, *fsrc;
-	u_int cpu;
+	u_int cpu, domain;
 	int cnt, i, *mirqs, vector;
 #ifdef ACPI_DMAR
 	u_int cookies[count];
@@ -373,6 +373,9 @@ msi_alloc(device_t dev, int count, int maxcount, i
 	if (!msi_enabled)
 		return (ENXIO);
 
+	if (bus_get_domain(dev, &domain) != 0)
+		domain = 0;
+
 	if (count > 1)
 		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
 	else
@@ -420,7 +423,7 @@ again:
 	KASSERT(cnt == count, ("count mismatch"));
 
 	/* Allocate 'count' IDT vectors. */
-	cpu = intr_next_cpu();
+	cpu = intr_next_cpu(domain);
 	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
 	if (vector == 0) {
 		mtx_unlock(&msi_lock);
@@ -610,7 +613,7 @@ int
 msix_alloc(device_t dev, int *irq)
 {
 	struct msi_intsrc *msi;
-	u_int cpu;
+	u_int cpu, domain;
 	int i, vector;
 #ifdef ACPI_DMAR
 	u_int cookie;
@@ -620,6 +623,9 @@ msix_alloc(device_t dev, int *irq)
 	if (!msi_enabled)
 		return (ENXIO);
 
+	if (bus_get_domain(dev, &domain) != 0)
+		domain = 0;
+
 again:
 	mtx_lock(&msi_lock);
 
@@ -651,7 +657,7 @@ again:
 	}
 
 	/* Allocate an IDT vector. */
-	cpu = intr_next_cpu();
+	cpu = intr_next_cpu(domain);
 	vector = apic_alloc_vector(cpu, i);
 	if (vector == 0) {
 		mtx_unlock(&msi_lock);
Index: x86/x86/nexus.c
===================================================================
--- x86/x86/nexus.c	(revision 331610)
+++ x86/x86/nexus.c	(working copy)
@@ -573,7 +573,7 @@ nexus_setup_intr(device_t bus, device_t child, str
 		 int flags, driver_filter_t filter, void (*ihand)(void *),
 		 void *arg, void **cookiep)
 {
-	int		error;
+	int		error, domain;
 
 	/* somebody tried to setup an irq that failed to allocate! */
 	if (irq == NULL)
@@ -589,9 +589,11 @@ nexus_setup_intr(device_t bus, device_t child, str
 	error = rman_activate_resource(irq);
 	if (error)
 		return (error);
+	if (bus_get_domain(child, &domain) != 0)
+		domain = 0;
 
 	error = intr_add_handler(device_get_nameunit(child),
-	    rman_get_start(irq), filter, ihand, arg, flags, cookiep);
+	    rman_get_start(irq), filter, ihand, arg, flags, cookiep, domain);
 
 	return (error);
 }
Index: x86/xen/xen_intr.c
===================================================================
--- x86/xen/xen_intr.c	(revision 331610)
+++ x86/xen/xen_intr.c	(working copy)
@@ -430,7 +430,7 @@ xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_
 		 * unless specified otherwise, so shuffle them to balance
 		 * the interrupt load.
 		 */
-		xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu());
+		xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu(0));
 	}
 #endif
 
@@ -1562,7 +1562,7 @@ xen_intr_add_handler(const char *name, driver_filt
 		return (EINVAL);
 
 	error = intr_add_handler(name, isrc->xi_vector,filter, handler, arg,
-	    flags|INTR_EXCL, &isrc->xi_cookie);
+	    flags|INTR_EXCL, &isrc->xi_cookie, 0);
 	if (error != 0) {
 		printf(
 		    "%s: xen_intr_add_handler: intr_add_handler failed: %d\n",


More information about the svn-src-all mailing list