svn commit: r187880 - in head/sys: amd64/amd64 amd64/include i386/i386 i386/include

Jeff Roberson jeff at FreeBSD.org
Thu Jan 29 01:22:57 PST 2009


Author: jeff
Date: Thu Jan 29 09:22:56 2009
New Revision: 187880
URL: http://svn.freebsd.org/changeset/base/187880

Log:
   - Allocate apic vectors on a per-cpu basis.  This allows us to allocate
     more irqs as we have more cpus.  This is principally useful on systems
     with msi devices which may want many irqs per-cpu.
  
  Discussed with:	jhb
  Sponsored by:	Nokia

Modified:
  head/sys/amd64/amd64/io_apic.c
  head/sys/amd64/amd64/local_apic.c
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/amd64/msi.c
  head/sys/amd64/include/apicvar.h
  head/sys/amd64/include/intr_machdep.h
  head/sys/i386/i386/io_apic.c
  head/sys/i386/i386/local_apic.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/i386/msi.c
  head/sys/i386/include/apicvar.h
  head/sys/i386/include/intr_machdep.h

Modified: head/sys/amd64/amd64/io_apic.c
==============================================================================
--- head/sys/amd64/amd64/io_apic.c	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/amd64/amd64/io_apic.c	Thu Jan 29 09:22:56 2009	(r187880)
@@ -327,39 +327,56 @@ ioapic_assign_cpu(struct intsrc *isrc, u
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	struct ioapic *io = (struct ioapic *)isrc->is_pic;
+	u_int old_vector;
+	u_int old_id;
 
+	/*
+	 * keep 1st core as the destination for NMI
+	 */
+	if (intpin->io_irq == IRQ_NMI)
+		apic_id = 0;
+
+	/*
+	 * Set us up to free the old irq.
+	 */
+	old_vector = intpin->io_vector;
+	old_id = intpin->io_cpu;
+	if (old_vector && apic_id == old_id)
+		return;
+
+	/*
+	 * Allocate an APIC vector for this interrupt pin.  Once
+	 * we have a vector we program the interrupt pin.
+	 */
 	intpin->io_cpu = apic_id;
+	intpin->io_vector = apic_alloc_vector(apic_id, intpin->io_irq);
 	if (bootverbose) {
-		printf("ioapic%u: Assigning ", io->io_id);
+		printf("ioapic%u: routing intpin %u (", io->io_id,
+		    intpin->io_intpin);
 		ioapic_print_irq(intpin);
-		printf(" to local APIC %u\n", intpin->io_cpu);
+		printf(") to lapic %u vector %u\n", intpin->io_cpu,
+		    intpin->io_vector);
 	}
 	ioapic_program_intpin(intpin);
+	/*
+	 * Free the old vector after the new one is established.  This is done
+	 * to prevent races where we could miss an interrupt.
+	 */
+	if (old_vector)
+		apic_free_vector(old_id, old_vector, intpin->io_irq);
 }
 
 static void
 ioapic_enable_intr(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
-	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 
-	if (intpin->io_vector == 0) {
-		/*
-		 * Allocate an APIC vector for this interrupt pin.  Once
-		 * we have a vector we program the interrupt pin.
-		 */
-		intpin->io_vector = apic_alloc_vector(intpin->io_irq);
-		if (bootverbose) {
-			printf("ioapic%u: routing intpin %u (", io->io_id,
-			    intpin->io_intpin);
-			ioapic_print_irq(intpin);
-			printf(") to vector %u\n", intpin->io_vector);
-		}
-		ioapic_program_intpin(intpin);
-		apic_enable_vector(intpin->io_vector);
-	}
+	if (intpin->io_vector == 0)
+		ioapic_assign_cpu(isrc, pcpu_find(0)->pc_apic_id);
+	apic_enable_vector(intpin->io_cpu, intpin->io_vector);
 }
 
+
 static void
 ioapic_disable_intr(struct intsrc *isrc)
 {
@@ -369,11 +386,11 @@ ioapic_disable_intr(struct intsrc *isrc)
 	if (intpin->io_vector != 0) {
 		/* Mask this interrupt pin and free its APIC vector. */
 		vector = intpin->io_vector;
-		apic_disable_vector(vector);
+		apic_disable_vector(intpin->io_cpu, vector);
 		intpin->io_masked = 1;
 		intpin->io_vector = 0;
 		ioapic_program_intpin(intpin);
-		apic_free_vector(vector, intpin->io_irq);
+		apic_free_vector(intpin->io_cpu, vector, intpin->io_irq);
 	}
 }
 

Modified: head/sys/amd64/amd64/local_apic.c
==============================================================================
--- head/sys/amd64/amd64/local_apic.c	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/amd64/amd64/local_apic.c	Thu Jan 29 09:22:56 2009	(r187880)
@@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
@@ -109,6 +111,8 @@ struct lapic {
 	u_long la_hard_ticks;
 	u_long la_stat_ticks;
 	u_long la_prof_ticks;
+	/* Include IDT_SYSCALL to make indexing easier. */
+	u_int la_ioint_irqs[APIC_NUM_IOINTS + 1];
 } static lapics[MAX_APIC_ID + 1];
 
 /* XXX: should thermal be an NMI? */
@@ -134,8 +138,6 @@ static inthand_t *ioint_handlers[] = {
 	IDTVEC(apic_isr7),	/* 224 - 255 */
 };
 
-/* Include IDT_SYSCALL to make indexing easier. */
-static u_int ioint_irqs[APIC_NUM_IOINTS + 1];
 
 static u_int32_t lapic_timer_divisors[] = { 
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
@@ -215,14 +217,12 @@ lapic_init(vm_paddr_t addr)
 
 	/* Perform basic initialization of the BSP's local APIC. */
 	lapic_enable();
-	ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
 
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYSIGT, SEL_KPL, 0);
-	ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER;
 
 	/* XXX: error/thermal interrupts */
 }
@@ -254,6 +254,9 @@ lapic_create(u_int apic_id, int boot_cpu
 		lapics[apic_id].la_lvts[i] = lvts[i];
 		lapics[apic_id].la_lvts[i].lvt_active = 0;
 	}
+	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
+	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
+	    IRQ_TIMER;
 
 #ifdef SMP
 	cpu_add(apic_id, boot_cpu);
@@ -664,7 +667,8 @@ lapic_handle_intr(int vector, struct tra
 
 	if (vector == -1)
 		panic("Couldn't get vector from ISR!");
-	isrc = intr_lookup_source(apic_idt_to_irq(vector));
+	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
+	    vector));
 	intr_execute_handlers(isrc, frame);
 }
 
@@ -779,9 +783,19 @@ lapic_timer_enable_intr(void)
 	lapic->lvt_timer = value;
 }
 
+u_int
+apic_cpuid(u_int apic_id)
+{
+#ifdef SMP
+	return apic_cpuids[apic_id];
+#else
+	return 0;
+#endif
+}
+
 /* Request a free IDT vector to be used by the specified IRQ. */
 u_int
-apic_alloc_vector(u_int irq)
+apic_alloc_vector(u_int apic_id, u_int irq)
 {
 	u_int vector;
 
@@ -793,9 +807,9 @@ apic_alloc_vector(u_int irq)
 	 */
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
-		if (ioint_irqs[vector] != 0)
+		if (lapics[apic_id].la_ioint_irqs[vector] != 0)
 			continue;
-		ioint_irqs[vector] = irq;
+		lapics[apic_id].la_ioint_irqs[vector] = irq;
 		mtx_unlock_spin(&icu_lock);
 		return (vector + APIC_IO_INTS);
 	}
@@ -810,7 +824,7 @@ apic_alloc_vector(u_int irq)
  * satisfied, 0 is returned.
  */
 u_int
-apic_alloc_vectors(u_int *irqs, u_int count, u_int align)
+apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 	u_int first, run, vector;
 
@@ -833,7 +847,7 @@ apic_alloc_vectors(u_int *irqs, u_int co
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 
 		/* Vector is in use, end run. */
-		if (ioint_irqs[vector] != 0) {
+		if (lapics[apic_id].la_ioint_irqs[vector] != 0) {
 			run = 0;
 			first = 0;
 			continue;
@@ -853,7 +867,8 @@ apic_alloc_vectors(u_int *irqs, u_int co
 
 		/* Found a run, assign IRQs and return the first vector. */
 		for (vector = 0; vector < count; vector++)
-			ioint_irqs[first + vector] = irqs[vector];
+			lapics[apic_id].la_ioint_irqs[first + vector] =
+			    irqs[vector];
 		mtx_unlock_spin(&icu_lock);
 		return (first + APIC_IO_INTS);
 	}
@@ -862,8 +877,14 @@ apic_alloc_vectors(u_int *irqs, u_int co
 	return (0);
 }
 
+/*
+ * Enable a vector for a particular apic_id.  Since all lapics share idt
+ * entries and ioint_handlers this enables the vector on all lapics.  lapics
+ * which do not have the vector configured would report spurious interrupts
+ * should it fire.
+ */
 void
-apic_enable_vector(u_int vector)
+apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
@@ -873,7 +894,7 @@ apic_enable_vector(u_int vector)
 }
 
 void
-apic_disable_vector(u_int vector)
+apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
@@ -884,27 +905,42 @@ apic_disable_vector(u_int vector)
 
 /* Release an APIC vector when it's no longer in use. */
 void
-apic_free_vector(u_int vector, u_int irq)
+apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
+	struct thread *td;
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
-	KASSERT(ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch"));
+	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
+	    irq, ("IRQ mismatch"));
+
+	/*
+	 * Bind us to the cpu that owned the vector before freeing it so
+	 * we don't lose an interrupt delivery race.
+	 */
+	td = curthread;
+	thread_lock(td);
+	if (sched_is_bound(td))
+		panic("apic_free_vector: Thread already bound.\n");
+	sched_bind(td, apic_cpuid(apic_id));
 	mtx_lock_spin(&icu_lock);
-	ioint_irqs[vector - APIC_IO_INTS] = 0;
+	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0;
 	mtx_unlock_spin(&icu_lock);
+	sched_unbind(td);
+	thread_unlock(td);
+
 }
 
 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
 u_int
-apic_idt_to_irq(u_int vector)
+apic_idt_to_irq(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
-	return (ioint_irqs[vector - APIC_IO_INTS]);
+	return (lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]);
 }
 
 #ifdef DDB
@@ -915,6 +951,7 @@ DB_SHOW_COMMAND(apic, db_show_apic)
 {
 	struct intsrc *isrc;
 	int i, verbose;
+	u_int apic_id;
 	u_int irq;
 
 	if (strcmp(modif, "vv") == 0)
@@ -923,9 +960,14 @@ DB_SHOW_COMMAND(apic, db_show_apic)
 		verbose = 1;
 	else
 		verbose = 0;
-	for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
-		irq = ioint_irqs[i];
-		if (irq != 0 && irq != IRQ_SYSCALL) {
+	for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
+		if (lapics[apic_id].la_present == 0)
+			continue;
+		db_printf("Interrupts bound to lapic %u\n", apic_id);
+		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
+			irq = lapics[apic_id].la_ioint_irqs[i];
+			if (irq == 0 || irq == IRQ_SYSCALL)
+				continue;
 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
 			if (irq == IRQ_TIMER)
 				db_printf("lapic timer\n");

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/amd64/amd64/mp_machdep.c	Thu Jan 29 09:22:56 2009	(r187880)
@@ -152,6 +152,7 @@ struct cpu_info {
 	int	cpu_disabled:1;
 } static cpu_info[MAX_APIC_ID + 1];
 int cpu_apic_ids[MAXCPU];
+int apic_cpuids[MAX_APIC_ID + 1];
 
 /* Holds pending bitmap based IPIs per CPU */
 static volatile u_int cpu_ipi_pending[MAXCPU];
@@ -349,6 +350,7 @@ cpu_mp_start(void)
 		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
 		    ("BSP's APIC ID doesn't match boot_cpu_id"));
 	cpu_apic_ids[0] = boot_cpu_id;
+	apic_cpuids[boot_cpu_id] = 0;
 
 	assign_cpu_ids();
 
@@ -656,6 +658,7 @@ assign_cpu_ids(void)
 
 		if (mp_ncpus < MAXCPU) {
 			cpu_apic_ids[mp_ncpus] = i;
+			apic_cpuids[i] = mp_ncpus;
 			mp_ncpus++;
 		} else
 			cpu_info[i].cpu_disabled = 1;

Modified: head/sys/amd64/amd64/msi.c
==============================================================================
--- head/sys/amd64/amd64/msi.c	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/amd64/amd64/msi.c	Thu Jan 29 09:22:56 2009	(r187880)
@@ -161,7 +161,9 @@ msi_enable_intr(struct intsrc *isrc)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
 
-	apic_enable_vector(msi->msi_vector);
+	if (msi->msi_vector == 0)
+		msi_assign_cpu(isrc, 0);
+	apic_enable_vector(msi->msi_cpu, msi->msi_vector);
 }
 
 static void
@@ -169,7 +171,7 @@ msi_disable_intr(struct intsrc *isrc)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
 
-	apic_disable_vector(msi->msi_vector);
+	apic_disable_vector(msi->msi_cpu, msi->msi_vector);
 }
 
 static int
@@ -199,15 +201,35 @@ static void
 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
-
+	int old_vector;
+	u_int old_id;
+	int vector;
+
+	/* Store information to free existing irq. */
+	old_vector = msi->msi_vector;
+	old_id = msi->msi_cpu;
+	if (old_vector && old_id == apic_id)
+		return;
+	/* Allocate IDT vector on this cpu. */
+	vector = apic_alloc_vector(apic_id, msi->msi_irq);
+	if (vector == 0)
+		return; /* XXX alloc_vector panics on failure. */
 	msi->msi_cpu = apic_id;
+	msi->msi_vector = vector;
 	if (bootverbose)
-		printf("msi: Assigning %s IRQ %d to local APIC %u\n",
+		printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
 		    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
-		    msi->msi_cpu);	
+		    msi->msi_cpu, msi->msi_vector);
 	pci_remap_msi_irq(msi->msi_dev, msi->msi_irq);
+	/*
+	 * Free the old vector after the new one is established.  This is done
+	 * to prevent races where we could miss an interrupt.
+	 */
+	if (old_vector)
+		apic_free_vector(old_id, old_vector, msi->msi_irq);
 }
 
+
 void
 msi_init(void)
 {
@@ -263,7 +285,7 @@ int
 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
 {
 	struct msi_intsrc *msi, *fsrc;
-	int cnt, i, vector;
+	int cnt, i;
 
 	if (!msi_enabled)
 		return (ENXIO);
@@ -309,22 +331,12 @@ again:
 	/* Ok, we now have the IRQs allocated. */
 	KASSERT(cnt == count, ("count mismatch"));
 
-	/* Allocate 'count' IDT vectors. */
-	vector = apic_alloc_vectors(irqs, count, maxcount);
-	if (vector == 0) {
-		mtx_unlock(&msi_lock);
-		return (ENOSPC);
-	}
-
 	/* Assign IDT vectors and make these messages owned by 'dev'. */
 	fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
 	for (i = 0; i < count; i++) {
 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
 		msi->msi_dev = dev;
-		msi->msi_vector = vector + i;
-		if (bootverbose)
-			printf("msi: routing MSI IRQ %d to vector %u\n",
-			    msi->msi_irq, msi->msi_vector);
+		msi->msi_vector = 0;
 		msi->msi_first = fsrc;
 		KASSERT(msi->msi_intsrc.is_handlers == 0,
 		    ("dead MSI has handlers"));
@@ -377,14 +389,18 @@ msi_release(int *irqs, int count)
 		KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
 		msi->msi_first = NULL;
 		msi->msi_dev = NULL;
-		apic_free_vector(msi->msi_vector, msi->msi_irq);
+		if (msi->msi_vector)
+			apic_free_vector(msi->msi_cpu, msi->msi_vector,
+			    msi->msi_irq);
 		msi->msi_vector = 0;
 	}
 
 	/* Clear out the first message. */
 	first->msi_first = NULL;
 	first->msi_dev = NULL;
-	apic_free_vector(first->msi_vector, first->msi_irq);
+	if (first->msi_vector)
+		apic_free_vector(first->msi_cpu, first->msi_vector,
+		    first->msi_irq);
 	first->msi_vector = 0;
 	first->msi_count = 0;
 
@@ -433,7 +449,7 @@ int
 msix_alloc(device_t dev, int *irq)
 {
 	struct msi_intsrc *msi;
-	int i, vector;
+	int i;
 
 	if (!msi_enabled)
 		return (ENXIO);
@@ -468,15 +484,9 @@ again:
 		goto again;
 	}
 
-	/* Allocate an IDT vector. */
-	vector = apic_alloc_vector(i);
-	if (bootverbose)
-		printf("msi: routing MSI-X IRQ %d to vector %u\n", msi->msi_irq,
-		    vector);
-
 	/* Setup source. */
 	msi->msi_dev = dev;
-	msi->msi_vector = vector;
+	msi->msi_vector = 0;
 	msi->msi_msix = 1;
 
 	KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
@@ -508,7 +518,8 @@ msix_release(int irq)
 
 	/* Clear out the message. */
 	msi->msi_dev = NULL;
-	apic_free_vector(msi->msi_vector, msi->msi_irq);
+	if (msi->msi_vector)
+		apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
 	msi->msi_vector = 0;
 	msi->msi_msix = 0;
 

Modified: head/sys/amd64/include/apicvar.h
==============================================================================
--- head/sys/amd64/include/apicvar.h	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/amd64/include/apicvar.h	Thu Jan 29 09:22:56 2009	(r187880)
@@ -176,14 +176,17 @@ inthand_t
 	IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint);
 
 extern vm_paddr_t lapic_paddr;
+extern int apic_cpuids[];
 
-u_int	apic_alloc_vector(u_int irq);
-u_int	apic_alloc_vectors(u_int *irqs, u_int count, u_int align);
-void	apic_disable_vector(u_int vector);
-void	apic_enable_vector(u_int vector);
-void	apic_free_vector(u_int vector, u_int irq);
-u_int	apic_idt_to_irq(u_int vector);
+u_int	apic_alloc_vector(u_int apic_id, u_int irq);
+u_int	apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count,
+	    u_int align);
+void	apic_disable_vector(u_int apic_id, u_int vector);
+void	apic_enable_vector(u_int apic_id, u_int vector);
+void	apic_free_vector(u_int apic_id, u_int vector, u_int irq);
+u_int	apic_idt_to_irq(u_int apic_id, u_int vector);
 void	apic_register_enumerator(struct apic_enumerator *enumerator);
+u_int	apic_cpuid(u_int apic_id);
 void	*ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase);
 int	ioapic_disable_pin(void *cookie, u_int pin);
 int	ioapic_get_vector(void *cookie, u_int pin);

Modified: head/sys/amd64/include/intr_machdep.h
==============================================================================
--- head/sys/amd64/include/intr_machdep.h	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/amd64/include/intr_machdep.h	Thu Jan 29 09:22:56 2009	(r187880)
@@ -47,7 +47,7 @@
  * IRQ values beyond 256 are used by MSI.  We leave 255 unused to avoid
  * confusion since 255 is used in PCI to indicate an invalid IRQ.
  */
-#define	NUM_MSI_INTS	128
+#define	NUM_MSI_INTS	512
 #define	FIRST_MSI_INT	256
 #define	NUM_IO_INTS	(FIRST_MSI_INT + NUM_MSI_INTS)
 

Modified: head/sys/i386/i386/io_apic.c
==============================================================================
--- head/sys/i386/i386/io_apic.c	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/i386/i386/io_apic.c	Thu Jan 29 09:22:56 2009	(r187880)
@@ -327,39 +327,56 @@ ioapic_assign_cpu(struct intsrc *isrc, u
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	struct ioapic *io = (struct ioapic *)isrc->is_pic;
+	u_int old_vector;
+	u_int old_id;
 
+	/*
+	 * keep 1st core as the destination for NMI
+	 */
+	if (intpin->io_irq == IRQ_NMI)
+		apic_id = 0;
+
+	/*
+	 * Set us up to free the old irq.
+	 */
+	old_vector = intpin->io_vector;
+	old_id = intpin->io_cpu;
+	if (old_vector && apic_id == old_id)
+		return;
+
+	/*
+	 * Allocate an APIC vector for this interrupt pin.  Once
+	 * we have a vector we program the interrupt pin.
+	 */
 	intpin->io_cpu = apic_id;
+	intpin->io_vector = apic_alloc_vector(apic_id, intpin->io_irq);
 	if (bootverbose) {
-		printf("ioapic%u: Assigning ", io->io_id);
+		printf("ioapic%u: routing intpin %u (", io->io_id,
+		    intpin->io_intpin);
 		ioapic_print_irq(intpin);
-		printf(" to local APIC %u\n", intpin->io_cpu);
+		printf(") to lapic %u vector %u\n", intpin->io_cpu,
+		    intpin->io_vector);
 	}
 	ioapic_program_intpin(intpin);
+	/*
+	 * Free the old vector after the new one is established.  This is done
+	 * to prevent races where we could miss an interrupt.
+	 */
+	if (old_vector)
+		apic_free_vector(old_id, old_vector, intpin->io_irq);
 }
 
 static void
 ioapic_enable_intr(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
-	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 
-	if (intpin->io_vector == 0) {
-		/*
-		 * Allocate an APIC vector for this interrupt pin.  Once
-		 * we have a vector we program the interrupt pin.
-		 */
-		intpin->io_vector = apic_alloc_vector(intpin->io_irq);
-		if (bootverbose) {
-			printf("ioapic%u: routing intpin %u (", io->io_id,
-			    intpin->io_intpin);
-			ioapic_print_irq(intpin);
-			printf(") to vector %u\n", intpin->io_vector);
-		}
-		ioapic_program_intpin(intpin);
-		apic_enable_vector(intpin->io_vector);
-	}
+	if (intpin->io_vector == 0)
+		ioapic_assign_cpu(isrc, pcpu_find(0)->pc_apic_id);
+	apic_enable_vector(intpin->io_cpu, intpin->io_vector);
 }
 
+
 static void
 ioapic_disable_intr(struct intsrc *isrc)
 {
@@ -369,11 +386,11 @@ ioapic_disable_intr(struct intsrc *isrc)
 	if (intpin->io_vector != 0) {
 		/* Mask this interrupt pin and free its APIC vector. */
 		vector = intpin->io_vector;
-		apic_disable_vector(vector);
+		apic_disable_vector(intpin->io_cpu, vector);
 		intpin->io_masked = 1;
 		intpin->io_vector = 0;
 		ioapic_program_intpin(intpin);
-		apic_free_vector(vector, intpin->io_irq);
+		apic_free_vector(intpin->io_cpu, vector, intpin->io_irq);
 	}
 }
 

Modified: head/sys/i386/i386/local_apic.c
==============================================================================
--- head/sys/i386/i386/local_apic.c	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/i386/i386/local_apic.c	Thu Jan 29 09:22:56 2009	(r187880)
@@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
@@ -109,6 +111,8 @@ struct lapic {
 	u_long la_hard_ticks;
 	u_long la_stat_ticks;
 	u_long la_prof_ticks;
+	/* Include IDT_SYSCALL to make indexing easier. */
+	u_int la_ioint_irqs[APIC_NUM_IOINTS + 1];
 } static lapics[MAX_APIC_ID + 1];
 
 /* XXX: should thermal be an NMI? */
@@ -134,8 +138,6 @@ static inthand_t *ioint_handlers[] = {
 	IDTVEC(apic_isr7),	/* 224 - 255 */
 };
 
-/* Include IDT_SYSCALL to make indexing easier. */
-static u_int ioint_irqs[APIC_NUM_IOINTS + 1];
 
 static u_int32_t lapic_timer_divisors[] = { 
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
@@ -216,7 +218,6 @@ lapic_init(vm_paddr_t addr)
 
 	/* Perform basic initialization of the BSP's local APIC. */
 	lapic_enable();
-	ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
@@ -224,7 +225,6 @@ lapic_init(vm_paddr_t addr)
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER;
 
 	/* XXX: error/thermal interrupts */
 }
@@ -256,6 +256,9 @@ lapic_create(u_int apic_id, int boot_cpu
 		lapics[apic_id].la_lvts[i] = lvts[i];
 		lapics[apic_id].la_lvts[i].lvt_active = 0;
 	}
+	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
+	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
+	    IRQ_TIMER;
 
 #ifdef SMP
 	cpu_add(apic_id, boot_cpu);
@@ -666,7 +669,8 @@ lapic_handle_intr(int vector, struct tra
 
 	if (vector == -1)
 		panic("Couldn't get vector from ISR!");
-	isrc = intr_lookup_source(apic_idt_to_irq(vector));
+	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
+	    vector));
 	intr_execute_handlers(isrc, frame);
 }
 
@@ -781,9 +785,19 @@ lapic_timer_enable_intr(void)
 	lapic->lvt_timer = value;
 }
 
+u_int
+apic_cpuid(u_int apic_id)
+{
+#ifdef SMP
+	return apic_cpuids[apic_id];
+#else
+	return 0;
+#endif
+}
+
 /* Request a free IDT vector to be used by the specified IRQ. */
 u_int
-apic_alloc_vector(u_int irq)
+apic_alloc_vector(u_int apic_id, u_int irq)
 {
 	u_int vector;
 
@@ -795,9 +809,9 @@ apic_alloc_vector(u_int irq)
 	 */
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
-		if (ioint_irqs[vector] != 0)
+		if (lapics[apic_id].la_ioint_irqs[vector] != 0)
 			continue;
-		ioint_irqs[vector] = irq;
+		lapics[apic_id].la_ioint_irqs[vector] = irq;
 		mtx_unlock_spin(&icu_lock);
 		return (vector + APIC_IO_INTS);
 	}
@@ -812,7 +826,7 @@ apic_alloc_vector(u_int irq)
  * satisfied, 0 is returned.
  */
 u_int
-apic_alloc_vectors(u_int *irqs, u_int count, u_int align)
+apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 	u_int first, run, vector;
 
@@ -835,7 +849,7 @@ apic_alloc_vectors(u_int *irqs, u_int co
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 
 		/* Vector is in use, end run. */
-		if (ioint_irqs[vector] != 0) {
+		if (lapics[apic_id].la_ioint_irqs[vector] != 0) {
 			run = 0;
 			first = 0;
 			continue;
@@ -855,7 +869,8 @@ apic_alloc_vectors(u_int *irqs, u_int co
 
 		/* Found a run, assign IRQs and return the first vector. */
 		for (vector = 0; vector < count; vector++)
-			ioint_irqs[first + vector] = irqs[vector];
+			lapics[apic_id].la_ioint_irqs[first + vector] =
+			    irqs[vector];
 		mtx_unlock_spin(&icu_lock);
 		return (first + APIC_IO_INTS);
 	}
@@ -864,8 +879,14 @@ apic_alloc_vectors(u_int *irqs, u_int co
 	return (0);
 }
 
+/*
+ * Enable a vector for a particular apic_id.  Since all lapics share idt
+ * entries and ioint_handlers this enables the vector on all lapics.  lapics
+ * which do not have the vector configured would report spurious interrupts
+ * should it fire.
+ */
 void
-apic_enable_vector(u_int vector)
+apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
@@ -876,7 +897,7 @@ apic_enable_vector(u_int vector)
 }
 
 void
-apic_disable_vector(u_int vector)
+apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
@@ -888,27 +909,42 @@ apic_disable_vector(u_int vector)
 
 /* Release an APIC vector when it's no longer in use. */
 void
-apic_free_vector(u_int vector, u_int irq)
+apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
+	struct thread *td;
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
-	KASSERT(ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch"));
+	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
+	    irq, ("IRQ mismatch"));
+
+	/*
+	 * Bind us to the cpu that owned the vector before freeing it so
+	 * we don't lose an interrupt delivery race.
+	 */
+	td = curthread;
+	thread_lock(td);
+	if (sched_is_bound(td))
+		panic("apic_free_vector: Thread already bound.\n");
+	sched_bind(td, apic_cpuid(apic_id));
 	mtx_lock_spin(&icu_lock);
-	ioint_irqs[vector - APIC_IO_INTS] = 0;
+	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0;
 	mtx_unlock_spin(&icu_lock);
+	sched_unbind(td);
+	thread_unlock(td);
+
 }
 
 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
 u_int
-apic_idt_to_irq(u_int vector)
+apic_idt_to_irq(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
-	return (ioint_irqs[vector - APIC_IO_INTS]);
+	return (lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]);
 }
 
 #ifdef DDB
@@ -919,6 +955,7 @@ DB_SHOW_COMMAND(apic, db_show_apic)
 {
 	struct intsrc *isrc;
 	int i, verbose;
+	u_int apic_id;
 	u_int irq;
 
 	if (strcmp(modif, "vv") == 0)
@@ -927,9 +964,14 @@ DB_SHOW_COMMAND(apic, db_show_apic)
 		verbose = 1;
 	else
 		verbose = 0;
-	for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
-		irq = ioint_irqs[i];
-		if (irq != 0 && irq != IRQ_SYSCALL) {
+	for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
+		if (lapics[apic_id].la_present == 0)
+			continue;
+		db_printf("Interrupts bound to lapic %u\n", apic_id);
+		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
+			irq = lapics[apic_id].la_ioint_irqs[i];
+			if (irq == 0 || irq == IRQ_SYSCALL)
+				continue;
 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
 			if (irq == IRQ_TIMER)
 				db_printf("lapic timer\n");

Modified: head/sys/i386/i386/mp_machdep.c
==============================================================================
--- head/sys/i386/i386/mp_machdep.c	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/i386/i386/mp_machdep.c	Thu Jan 29 09:22:56 2009	(r187880)
@@ -206,6 +206,7 @@ struct cpu_info {
 	int	cpu_disabled:1;
 } static cpu_info[MAX_APIC_ID + 1];
 int cpu_apic_ids[MAXCPU];
+int apic_cpuids[MAX_APIC_ID + 1];
 
 /* Holds pending bitmap based IPIs per CPU */
 static volatile u_int cpu_ipi_pending[MAXCPU];
@@ -397,6 +398,7 @@ cpu_mp_start(void)
 		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
 		    ("BSP's APIC ID doesn't match boot_cpu_id"));
 	cpu_apic_ids[0] = boot_cpu_id;
+	apic_cpuids[boot_cpu_id] = 0;
 
 	assign_cpu_ids();
 
@@ -705,6 +707,7 @@ assign_cpu_ids(void)
 
 		if (mp_ncpus < MAXCPU) {
 			cpu_apic_ids[mp_ncpus] = i;
+			apic_cpuids[i] = mp_ncpus;
 			mp_ncpus++;
 		} else
 			cpu_info[i].cpu_disabled = 1;

Modified: head/sys/i386/i386/msi.c
==============================================================================
--- head/sys/i386/i386/msi.c	Thu Jan 29 06:43:29 2009	(r187879)
+++ head/sys/i386/i386/msi.c	Thu Jan 29 09:22:56 2009	(r187880)
@@ -161,7 +161,9 @@ msi_enable_intr(struct intsrc *isrc)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
 
-	apic_enable_vector(msi->msi_vector);
+	if (msi->msi_vector == 0)
+		msi_assign_cpu(isrc, 0);
+	apic_enable_vector(msi->msi_cpu, msi->msi_vector);
 }
 
 static void
@@ -169,7 +171,7 @@ msi_disable_intr(struct intsrc *isrc)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
 
-	apic_disable_vector(msi->msi_vector);
+	apic_disable_vector(msi->msi_cpu, msi->msi_vector);
 }
 
 static int
@@ -199,15 +201,35 @@ static void
 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
-
+	int old_vector;
+	u_int old_id;
+	int vector;
+
+	/* Store information to free existing irq. */
+	old_vector = msi->msi_vector;
+	old_id = msi->msi_cpu;
+	if (old_vector && old_id == apic_id)
+		return;
+	/* Allocate IDT vector on this cpu. */
+	vector = apic_alloc_vector(apic_id, msi->msi_irq);
+	if (vector == 0)
+		return; /* XXX alloc_vector panics on failure. */
 	msi->msi_cpu = apic_id;
+	msi->msi_vector = vector;
 	if (bootverbose)
-		printf("msi: Assigning %s IRQ %d to local APIC %u\n",
+		printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
 		    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
-		    msi->msi_cpu);	
+		    msi->msi_cpu, msi->msi_vector);
 	pci_remap_msi_irq(msi->msi_dev, msi->msi_irq);
+	/*
+	 * Free the old vector after the new one is established.  This is done
+	 * to prevent races where we could miss an interrupt.
+	 */
+	if (old_vector)
+		apic_free_vector(old_id, old_vector, msi->msi_irq);
 }
 
+
 void
 msi_init(void)
 {
@@ -263,7 +285,7 @@ int
 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
 {
 	struct msi_intsrc *msi, *fsrc;
-	int cnt, i, vector;
+	int cnt, i;
 
 	if (!msi_enabled)
 		return (ENXIO);
@@ -309,22 +331,12 @@ again:
 	/* Ok, we now have the IRQs allocated. */
 	KASSERT(cnt == count, ("count mismatch"));
 
-	/* Allocate 'count' IDT vectors. */
-	vector = apic_alloc_vectors(irqs, count, maxcount);
-	if (vector == 0) {
-		mtx_unlock(&msi_lock);
-		return (ENOSPC);
-	}
-
 	/* Assign IDT vectors and make these messages owned by 'dev'. */
 	fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
 	for (i = 0; i < count; i++) {
 		msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
 		msi->msi_dev = dev;
-		msi->msi_vector = vector + i;
-		if (bootverbose)
-			printf("msi: routing MSI IRQ %d to vector %u\n",
-			    msi->msi_irq, msi->msi_vector);
+		msi->msi_vector = 0;
 		msi->msi_first = fsrc;
 		KASSERT(msi->msi_intsrc.is_handlers == 0,
 		    ("dead MSI has handlers"));
@@ -377,14 +389,18 @@ msi_release(int *irqs, int count)
 		KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
 		msi->msi_first = NULL;
 		msi->msi_dev = NULL;
-		apic_free_vector(msi->msi_vector, msi->msi_irq);
+		if (msi->msi_vector)
+			apic_free_vector(msi->msi_cpu, msi->msi_vector,
+			    msi->msi_irq);
 		msi->msi_vector = 0;
 	}
 
 	/* Clear out the first message. */
 	first->msi_first = NULL;
 	first->msi_dev = NULL;
-	apic_free_vector(first->msi_vector, first->msi_irq);
+	if (first->msi_vector)
+		apic_free_vector(first->msi_cpu, first->msi_vector,
+		    first->msi_irq);
 	first->msi_vector = 0;
 	first->msi_count = 0;
 
@@ -433,7 +449,7 @@ int
 msix_alloc(device_t dev, int *irq)
 {
 	struct msi_intsrc *msi;
-	int i, vector;
+	int i;
 
 	if (!msi_enabled)
 		return (ENXIO);
@@ -468,15 +484,9 @@ again:
 		goto again;
 	}
 
-	/* Allocate an IDT vector. */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list