svn commit: r195249 - in head/sys: amd64/amd64 amd64/include amd64/isa i386/i386 i386/include i386/isa kern sys

John Baldwin jhb at FreeBSD.org
Wed Jul 1 17:20:08 UTC 2009


Author: jhb
Date: Wed Jul  1 17:20:07 2009
New Revision: 195249
URL: http://svn.freebsd.org/changeset/base/195249

Log:
  Improve the handling of cpuset with interrupts.
  - For x86, change the interrupt source method to assign an interrupt source
    to a specific CPU to return an error value instead of void, thus allowing
    it to fail.
  - If moving an interrupt to a CPU fails due to a lack of IDT vectors in the
    destination CPU, fail the request with ENOSPC rather than panicing.
  - For MSI interrupts on x86 (but not MSI-X), only allow cpuset to be used
    on the first interrupt in a group.  Moving the first interrupt in a group
    moves the entire group.
  - Use the icu_lock to protect intr_next_cpu() on x86 instead of the
    intr_table_lock to fix a LOR introduced in the last set of MSI changes.
  - Add a new privilege PRIV_SCHED_CPUSET_INTR for using cpuset with
    interrupts.  Previously, binding an interrupt to a CPU only performed a
    privilege check if the interrupt had an interrupt thread.  Interrupts
    without a thread could be bound by non-root users as a result.
  - If an interrupt event's assign_cpu method fails, then restore the original
    cpuset mask for the associated interrupt thread.
  
  Approved by:	re (kib)

Modified:
  head/sys/amd64/amd64/intr_machdep.c
  head/sys/amd64/amd64/io_apic.c
  head/sys/amd64/amd64/local_apic.c
  head/sys/amd64/amd64/msi.c
  head/sys/amd64/include/intr_machdep.h
  head/sys/amd64/isa/atpic.c
  head/sys/i386/i386/intr_machdep.c
  head/sys/i386/i386/io_apic.c
  head/sys/i386/i386/local_apic.c
  head/sys/i386/i386/msi.c
  head/sys/i386/include/intr_machdep.h
  head/sys/i386/isa/atpic.c
  head/sys/kern/kern_intr.c
  head/sys/sys/priv.h

Modified: head/sys/amd64/amd64/intr_machdep.c
==============================================================================
--- head/sys/amd64/amd64/intr_machdep.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/amd64/amd64/intr_machdep.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -163,8 +163,8 @@ intr_lookup_source(int vector)
 }
 
 int
-intr_add_handler(const char *name, int vector, driver_filter_t filter, 
-    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)    
+intr_add_handler(const char *name, int vector, driver_filter_t filter,
+    driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
 {
 	struct intsrc *isrc;
 	int error;
@@ -302,7 +302,8 @@ static int
 intr_assign_cpu(void *arg, u_char cpu)
 {
 #ifdef SMP
-	struct intsrc *isrc;	
+	struct intsrc *isrc;
+	int error;
 
 	/*
 	 * Don't do anything during early boot.  We will pick up the
@@ -311,10 +312,11 @@ intr_assign_cpu(void *arg, u_char cpu)
 	if (assign_cpu && cpu != NOCPU) {
 		isrc = arg;
 		mtx_lock(&intr_table_lock);
-		isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
+		error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
 		mtx_unlock(&intr_table_lock);
-	}
-	return (0);
+	} else
+		error = 0;
+	return (error);
 #else
 	return (EOPNOTSUPP);
 #endif
@@ -371,7 +373,7 @@ intr_init(void *dummy __unused)
 	intrcnt_setname("???", 0);
 	intrcnt_index = 1;
 	STAILQ_INIT(&pics);
-	mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF | MTX_RECURSE);
+	mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF);
 	mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
 }
 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
@@ -441,14 +443,14 @@ intr_next_cpu(void)
 	if (!assign_cpu)
 		return (cpu_apic_ids[0]);
 
-	mtx_lock(&intr_table_lock);
+	mtx_lock_spin(&icu_lock);
 	apic_id = cpu_apic_ids[current_cpu];
 	do {
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
 	} while (!(intr_cpus & (1 << current_cpu)));
-	mtx_unlock(&intr_table_lock);
+	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
 
@@ -489,7 +491,6 @@ static void
 intr_shuffle_irqs(void *arg __unused)
 {
 	struct intsrc *isrc;
-	u_int apic_id;
 	int i;
 
 	/* Don't bother on UP. */
@@ -505,13 +506,17 @@ intr_shuffle_irqs(void *arg __unused)
 			/*
 			 * If this event is already bound to a CPU,
 			 * then assign the source to that CPU instead
-			 * of picking one via round-robin.
+			 * of picking one via round-robin.  Note that
+			 * this is careful to only advance the
+			 * round-robin if the CPU assignment succeeds.
 			 */
 			if (isrc->is_event->ie_cpu != NOCPU)
-				apic_id = isrc->is_event->ie_cpu;
-			else
-				apic_id = intr_next_cpu();
-			isrc->is_pic->pic_assign_cpu(isrc, apic_id);
+				(void)isrc->is_pic->pic_assign_cpu(isrc,
+				    isrc->is_event->ie_cpu);
+			else if (isrc->is_pic->pic_assign_cpu(isrc,
+				cpu_apic_ids[current_cpu]) == 0)
+				(void)intr_next_cpu();
+
 		}
 	}
 	mtx_unlock(&intr_table_lock);

Modified: head/sys/amd64/amd64/io_apic.c
==============================================================================
--- head/sys/amd64/amd64/io_apic.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/amd64/amd64/io_apic.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -120,7 +120,7 @@ static int	ioapic_source_pending(struct 
 static int	ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
 static void	ioapic_resume(struct pic *pic);
-static void	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
+static int	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void	ioapic_program_intpin(struct ioapic_intsrc *intpin);
 
 static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
@@ -322,7 +322,7 @@ ioapic_program_intpin(struct ioapic_ints
 	mtx_unlock_spin(&icu_lock);
 }
 
-static void
+static int
 ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
@@ -342,7 +342,7 @@ ioapic_assign_cpu(struct intsrc *isrc, u
 	old_vector = intpin->io_vector;
 	old_id = intpin->io_cpu;
 	if (old_vector && apic_id == old_id)
-		return;
+		return (0);
 
 	/*
 	 * Allocate an APIC vector for this interrupt pin.  Once
@@ -350,6 +350,9 @@ ioapic_assign_cpu(struct intsrc *isrc, u
 	 */
 	intpin->io_cpu = apic_id;
 	intpin->io_vector = apic_alloc_vector(apic_id, intpin->io_irq);
+	if (intpin->io_vector == 0)
+		return (ENOSPC);
+
 	if (bootverbose) {
 		printf("ioapic%u: routing intpin %u (", io->io_id,
 		    intpin->io_intpin);
@@ -364,6 +367,7 @@ ioapic_assign_cpu(struct intsrc *isrc, u
 	 */
 	if (old_vector)
 		apic_free_vector(old_id, old_vector, intpin->io_irq);
+	return (0);
 }
 
 static void
@@ -372,7 +376,9 @@ ioapic_enable_intr(struct intsrc *isrc)
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 
 	if (intpin->io_vector == 0)
-		ioapic_assign_cpu(isrc, intr_next_cpu());
+		if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0)
+			panic("Couldn't find an APIC vector for IRQ %d",
+			    intpin->io_irq);
 	apic_enable_vector(intpin->io_cpu, intpin->io_vector);
 }
 
@@ -496,7 +502,7 @@ ioapic_create(vm_paddr_t addr, int32_t a
 	io->io_pic = ioapic_template;
 	mtx_lock_spin(&icu_lock);
 	io->io_id = next_id++;
-	io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;	
+	io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;
 	if (apic_id != -1 && io->io_apic_id != apic_id) {
 		ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT);
 		mtx_unlock_spin(&icu_lock);
@@ -730,7 +736,7 @@ ioapic_set_triggermode(void *cookie, u_i
 	if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM)
 		return (EINVAL);
 	if (io->io_pins[pin].io_irq >= NUM_IO_INTS)
-		return (EINVAL);	
+		return (EINVAL);
 	edgetrigger = (trigger == INTR_TRIGGER_EDGE);
 	if (io->io_pins[pin].io_edgetrigger == edgetrigger)
 		return (0);

Modified: head/sys/amd64/amd64/local_apic.c
==============================================================================
--- head/sys/amd64/amd64/local_apic.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/amd64/amd64/local_apic.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -139,7 +139,7 @@ static inthand_t *ioint_handlers[] = {
 };
 
 
-static u_int32_t lapic_timer_divisors[] = { 
+static u_int32_t lapic_timer_divisors[] = {
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
 	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
 };
@@ -797,7 +797,7 @@ apic_alloc_vector(u_int apic_id, u_int i
 		return (vector + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
-	panic("Couldn't find an APIC vector for IRQ %u", irq);
+	return (0);
 }
 
 /*
@@ -1058,7 +1058,7 @@ DB_SHOW_COMMAND(lapic, db_show_lapic)
 static SLIST_HEAD(, apic_enumerator) enumerators =
 	SLIST_HEAD_INITIALIZER(enumerators);
 static struct apic_enumerator *best_enum;
-	
+
 void
 apic_register_enumerator(struct apic_enumerator *enumerator)
 {

Modified: head/sys/amd64/amd64/msi.c
==============================================================================
--- head/sys/amd64/amd64/msi.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/amd64/amd64/msi.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -113,6 +113,8 @@ struct msi_intsrc {
 	u_int msi_vector:8;		/* IDT vector. */
 	u_int msi_cpu:8;		/* Local APIC ID. (g) */
 	u_int msi_count:8;		/* Messages in this group. (g) */
+	u_int msi_maxcount:8;		/* Alignment for this group. (g) */
+	int *msi_irqs;			/* Group's IRQ list. (g) */
 };
 
 static void	msi_create_source(void);
@@ -125,7 +127,7 @@ static int	msi_vector(struct intsrc *isr
 static int	msi_source_pending(struct intsrc *isrc);
 static int	msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
-static void	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
+static int	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
 
 struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
 		       msi_enable_intr, msi_disable_intr, msi_vector,
@@ -195,32 +197,52 @@ msi_config_intr(struct intsrc *isrc, enu
 	return (ENODEV);
 }
 
-static void
+static int
 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
-	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+	struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc;
 	int old_vector;
 	u_int old_id;
-	int vector;
+	int i, vector;
+
+	/*
+	 * Only allow CPUs to be assigned to the first message for an
+	 * MSI group.
+	 */
+	if (msi->msi_first != msi)
+		return (EINVAL);
 
 	/* Store information to free existing irq. */
 	old_vector = msi->msi_vector;
 	old_id = msi->msi_cpu;
 	if (old_id == apic_id)
-		return;
-	if (!msi->msi_msix && msi->msi_first->msi_count > 1)
-		return;
+		return (0);
 
-	/* Allocate IDT vector on this cpu. */
-	vector = apic_alloc_vector(apic_id, msi->msi_irq);
+	/* Allocate IDT vectors on this cpu. */
+	if (msi->msi_count > 1) {
+		KASSERT(msi->msi_msix == 0, ("MSI-X message group"));
+		vector = apic_alloc_vectors(apic_id, msi->msi_irqs,
+		    msi->msi_count, msi->msi_maxcount);
+	} else
+		vector = apic_alloc_vector(apic_id, msi->msi_irq);
 	if (vector == 0)
-		return; /* XXX alloc_vector panics on failure. */
+		return (ENOSPC);
+
 	msi->msi_cpu = apic_id;
 	msi->msi_vector = vector;
 	if (bootverbose)
 		printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
 		    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
 		    msi->msi_cpu, msi->msi_vector);
+	for (i = 1; i < msi->msi_count; i++) {
+		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
+		sib->msi_cpu = apic_id;
+		sib->msi_vector = vector + i;
+		if (bootverbose)
+			printf(
+		    "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n",
+			    sib->msi_irq, sib->msi_cpu, sib->msi_vector);
+	}
 	pci_remap_msi_irq(msi->msi_dev, msi->msi_irq);
 
 	/*
@@ -228,6 +250,9 @@ msi_assign_cpu(struct intsrc *isrc, u_in
 	 * to prevent races where we could miss an interrupt.
 	 */
 	apic_free_vector(old_id, old_vector, msi->msi_irq);
+	for (i = 1; i < msi->msi_count; i++)
+		apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]);
+	return (0);
 }
 
 void
@@ -268,7 +293,7 @@ msi_create_source(void)
 	msi_last_irq++;
 	mtx_unlock(&msi_lock);
 
-	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);	
+	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
 	msi->msi_intsrc.is_pic = &msi_pic;
 	msi->msi_irq = irq;
 	intr_register_source(&msi->msi_intsrc);
@@ -276,21 +301,22 @@ msi_create_source(void)
 }
 
 /*
- * Try to allocate 'count' interrupt sources with contiguous IDT values.  If
- * we allocate any new sources, then their IRQ values will be at the end of
- * the irqs[] array, with *newirq being the index of the first new IRQ value
- * and *newcount being the number of new IRQ values added.
+ * Try to allocate 'count' interrupt sources with contiguous IDT values.
  */
 int
 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
 {
 	struct msi_intsrc *msi, *fsrc;
 	u_int cpu;
-	int cnt, i, vector;
+	int cnt, i, *mirqs, vector;
 
 	if (!msi_enabled)
 		return (ENXIO);
 
+	if (count > 1)
+		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
+	else
+		mirqs = NULL;
 again:
 	mtx_lock(&msi_lock);
 
@@ -317,6 +343,7 @@ again:
 		/* If we would exceed the max, give up. */
 		if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) {
 			mtx_unlock(&msi_lock);
+			free(mirqs, M_MSI);
 			return (ENXIO);
 		}
 		mtx_unlock(&msi_lock);
@@ -337,6 +364,7 @@ again:
 	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
 	if (vector == 0) {
 		mtx_unlock(&msi_lock);
+		free(mirqs, M_MSI);
 		return (ENOSPC);
 	}
 
@@ -356,6 +384,10 @@ again:
 		    ("dead MSI has handlers"));
 	}
 	fsrc->msi_count = count;
+	fsrc->msi_maxcount = maxcount;
+	if (count > 1)
+		bcopy(irqs, mirqs, count * sizeof(*mirqs));
+	fsrc->msi_irqs = mirqs;
 	mtx_unlock(&msi_lock);
 
 	return (0);
@@ -413,6 +445,9 @@ msi_release(int *irqs, int count)
 	apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq);
 	first->msi_vector = 0;
 	first->msi_count = 0;
+	first->msi_maxcount = 0;
+	free(first->msi_irqs, M_MSI);
+	first->msi_irqs = NULL;
 
 	mtx_unlock(&msi_lock);
 	return (0);
@@ -498,15 +533,23 @@ again:
 	/* Allocate an IDT vector. */
 	cpu = intr_next_cpu();
 	vector = apic_alloc_vector(cpu, i);
+	if (vector == 0) {
+		mtx_unlock(&msi_lock);
+		return (ENOSPC);
+	}
 	if (bootverbose)
 		printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n",
 		    msi->msi_irq, cpu, vector);
-	
+
 	/* Setup source. */
 	msi->msi_cpu = cpu;
 	msi->msi_dev = dev;
+	msi->msi_first = msi;
 	msi->msi_vector = vector;
 	msi->msi_msix = 1;
+	msi->msi_count = 1;
+	msi->msi_maxcount = 1;
+	msi->msi_irqs = NULL;
 
 	KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
 	mtx_unlock(&msi_lock);
@@ -536,10 +579,13 @@ msix_release(int irq)
 	KASSERT(msi->msi_dev != NULL, ("unowned message"));
 
 	/* Clear out the message. */
+	msi->msi_first = NULL;
 	msi->msi_dev = NULL;
 	apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
 	msi->msi_vector = 0;
 	msi->msi_msix = 0;
+	msi->msi_count = 0;
+	msi->msi_maxcount = 0;
 
 	mtx_unlock(&msi_lock);
 	return (0);

Modified: head/sys/amd64/include/intr_machdep.h
==============================================================================
--- head/sys/amd64/include/intr_machdep.h	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/amd64/include/intr_machdep.h	Wed Jul  1 17:20:07 2009	(r195249)
@@ -93,7 +93,7 @@ struct pic {
 	void (*pic_resume)(struct pic *);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
-	void (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
+	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
 	STAILQ_ENTRY(pic) pics;
 };
 

Modified: head/sys/amd64/isa/atpic.c
==============================================================================
--- head/sys/amd64/isa/atpic.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/amd64/isa/atpic.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -143,7 +143,7 @@ static void atpic_resume(struct pic *pic
 static int atpic_source_pending(struct intsrc *isrc);
 static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol);
-static void atpic_assign_cpu(struct intsrc *isrc, u_int apic_id);
+static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void i8259_init(struct atpic *pic, int slave);
 
 static struct atpic atpics[] = {
@@ -357,7 +357,7 @@ atpic_config_intr(struct intsrc *isrc, e
 	return (0);
 }
 
-static void
+static int
 atpic_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 

Modified: head/sys/i386/i386/intr_machdep.c
==============================================================================
--- head/sys/i386/i386/intr_machdep.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/i386/i386/intr_machdep.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -290,7 +290,8 @@ static int
 intr_assign_cpu(void *arg, u_char cpu)
 {
 #ifdef SMP
-	struct intsrc *isrc;	
+	struct intsrc *isrc;
+	int error;
 
 	/*
 	 * Don't do anything during early boot.  We will pick up the
@@ -299,10 +300,11 @@ intr_assign_cpu(void *arg, u_char cpu)
 	if (assign_cpu && cpu != NOCPU) {
 		isrc = arg;
 		mtx_lock(&intr_table_lock);
-		isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
+		error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
 		mtx_unlock(&intr_table_lock);
-	}
-	return (0);
+	} else
+		error = 0;
+	return (error);
 #else
 	return (EOPNOTSUPP);
 #endif
@@ -359,7 +361,7 @@ intr_init(void *dummy __unused)
 	intrcnt_setname("???", 0);
 	intrcnt_index = 1;
 	STAILQ_INIT(&pics);
-	mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF | MTX_RECURSE);
+	mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF);
 	mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
 }
 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
@@ -407,14 +409,14 @@ intr_next_cpu(void)
 	if (!assign_cpu)
 		return (cpu_apic_ids[0]);
 
-	mtx_lock(&intr_table_lock);
+	mtx_lock_spin(&icu_lock);
 	apic_id = cpu_apic_ids[current_cpu];
 	do {
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
 	} while (!(intr_cpus & (1 << current_cpu)));
-	mtx_unlock(&intr_table_lock);
+	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
 
@@ -455,7 +457,6 @@ static void
 intr_shuffle_irqs(void *arg __unused)
 {
 	struct intsrc *isrc;
-	u_int apic_id;
 	int i;
 
 #ifdef XEN
@@ -463,8 +464,8 @@ intr_shuffle_irqs(void *arg __unused)
 	 * Doesn't work yet
 	 */
 	return;
-#endif	
-	
+#endif
+
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
@@ -478,13 +479,17 @@ intr_shuffle_irqs(void *arg __unused)
 			/*
 			 * If this event is already bound to a CPU,
 			 * then assign the source to that CPU instead
-			 * of picking one via round-robin.
+			 * of picking one via round-robin.  Note that
+			 * this is careful to only advance the
+			 * round-robin if the CPU assignment succeeds.
 			 */
 			if (isrc->is_event->ie_cpu != NOCPU)
-				apic_id = isrc->is_event->ie_cpu;
-			else
-				apic_id = intr_next_cpu();
-			isrc->is_pic->pic_assign_cpu(isrc, apic_id);
+				(void)isrc->is_pic->pic_assign_cpu(isrc,
+				    isrc->is_event->ie_cpu);
+			else if (isrc->is_pic->pic_assign_cpu(isrc,
+				cpu_apic_ids[current_cpu]) == 0)
+				(void)intr_next_cpu();
+
 		}
 	}
 	mtx_unlock(&intr_table_lock);

Modified: head/sys/i386/i386/io_apic.c
==============================================================================
--- head/sys/i386/i386/io_apic.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/i386/i386/io_apic.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -120,7 +120,7 @@ static int	ioapic_source_pending(struct 
 static int	ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
 static void	ioapic_resume(struct pic *pic);
-static void	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
+static int	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void	ioapic_program_intpin(struct ioapic_intsrc *intpin);
 
 static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
@@ -322,7 +322,7 @@ ioapic_program_intpin(struct ioapic_ints
 	mtx_unlock_spin(&icu_lock);
 }
 
-static void
+static int
 ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
@@ -342,7 +342,7 @@ ioapic_assign_cpu(struct intsrc *isrc, u
 	old_vector = intpin->io_vector;
 	old_id = intpin->io_cpu;
 	if (old_vector && apic_id == old_id)
-		return;
+		return (0);
 
 	/*
 	 * Allocate an APIC vector for this interrupt pin.  Once
@@ -350,6 +350,9 @@ ioapic_assign_cpu(struct intsrc *isrc, u
 	 */
 	intpin->io_cpu = apic_id;
 	intpin->io_vector = apic_alloc_vector(apic_id, intpin->io_irq);
+	if (intpin->io_vector == 0)
+		return (ENOSPC);
+
 	if (bootverbose) {
 		printf("ioapic%u: routing intpin %u (", io->io_id,
 		    intpin->io_intpin);
@@ -364,6 +367,7 @@ ioapic_assign_cpu(struct intsrc *isrc, u
 	 */
 	if (old_vector)
 		apic_free_vector(old_id, old_vector, intpin->io_irq);
+	return (0);
 }
 
 static void
@@ -372,7 +376,9 @@ ioapic_enable_intr(struct intsrc *isrc)
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 
 	if (intpin->io_vector == 0)
-		ioapic_assign_cpu(isrc, intr_next_cpu());
+		if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0)
+			panic("Couldn't find an APIC vector for IRQ %d",
+			    intpin->io_irq);
 	apic_enable_vector(intpin->io_cpu, intpin->io_vector);
 }
 
@@ -496,7 +502,7 @@ ioapic_create(vm_paddr_t addr, int32_t a
 	io->io_pic = ioapic_template;
 	mtx_lock_spin(&icu_lock);
 	io->io_id = next_id++;
-	io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;	
+	io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT;
 	if (apic_id != -1 && io->io_apic_id != apic_id) {
 		ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT);
 		mtx_unlock_spin(&icu_lock);

Modified: head/sys/i386/i386/local_apic.c
==============================================================================
--- head/sys/i386/i386/local_apic.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/i386/i386/local_apic.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -139,7 +139,7 @@ static inthand_t *ioint_handlers[] = {
 };
 
 
-static u_int32_t lapic_timer_divisors[] = { 
+static u_int32_t lapic_timer_divisors[] = {
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
 	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
 };
@@ -799,7 +799,7 @@ apic_alloc_vector(u_int apic_id, u_int i
 		return (vector + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
-	panic("Couldn't find an APIC vector for IRQ %u", irq);
+	return (0);
 }
 
 /*
@@ -1062,7 +1062,7 @@ DB_SHOW_COMMAND(lapic, db_show_lapic)
 static SLIST_HEAD(, apic_enumerator) enumerators =
 	SLIST_HEAD_INITIALIZER(enumerators);
 static struct apic_enumerator *best_enum;
-	
+
 void
 apic_register_enumerator(struct apic_enumerator *enumerator)
 {

Modified: head/sys/i386/i386/msi.c
==============================================================================
--- head/sys/i386/i386/msi.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/i386/i386/msi.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -113,6 +113,8 @@ struct msi_intsrc {
 	u_int msi_vector:8;		/* IDT vector. */
 	u_int msi_cpu:8;		/* Local APIC ID. (g) */
 	u_int msi_count:8;		/* Messages in this group. (g) */
+	u_int msi_maxcount:8;		/* Alignment for this group. (g) */
+	int *msi_irqs;			/* Group's IRQ list. (g) */
 };
 
 static void	msi_create_source(void);
@@ -125,7 +127,7 @@ static int	msi_vector(struct intsrc *isr
 static int	msi_source_pending(struct intsrc *isrc);
 static int	msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
-static void	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
+static int	msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
 
 struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
 		       msi_enable_intr, msi_disable_intr, msi_vector,
@@ -195,32 +197,52 @@ msi_config_intr(struct intsrc *isrc, enu
 	return (ENODEV);
 }
 
-static void
+static int
 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
-	struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+	struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc;
 	int old_vector;
 	u_int old_id;
-	int vector;
+	int i, vector;
+
+	/*
+	 * Only allow CPUs to be assigned to the first message for an
+	 * MSI group.
+	 */
+	if (msi->msi_first != msi)
+		return (EINVAL);
 
 	/* Store information to free existing irq. */
 	old_vector = msi->msi_vector;
 	old_id = msi->msi_cpu;
 	if (old_id == apic_id)
-		return;
-	if (!msi->msi_msix && msi->msi_first->msi_count > 1)
-		return;
+		return (0);
 
-	/* Allocate IDT vector on this cpu. */
-	vector = apic_alloc_vector(apic_id, msi->msi_irq);
+	/* Allocate IDT vectors on this cpu. */
+	if (msi->msi_count > 1) {
+		KASSERT(msi->msi_msix == 0, ("MSI-X message group"));
+		vector = apic_alloc_vectors(apic_id, msi->msi_irqs,
+		    msi->msi_count, msi->msi_maxcount);
+	} else
+		vector = apic_alloc_vector(apic_id, msi->msi_irq);
 	if (vector == 0)
-		return; /* XXX alloc_vector panics on failure. */
+		return (ENOSPC);
+
 	msi->msi_cpu = apic_id;
 	msi->msi_vector = vector;
 	if (bootverbose)
 		printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
 		    msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
 		    msi->msi_cpu, msi->msi_vector);
+	for (i = 1; i < msi->msi_count; i++) {
+		sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
+		sib->msi_cpu = apic_id;
+		sib->msi_vector = vector + i;
+		if (bootverbose)
+			printf(
+		    "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n",
+			    sib->msi_irq, sib->msi_cpu, sib->msi_vector);
+	}
 	pci_remap_msi_irq(msi->msi_dev, msi->msi_irq);
 
 	/*
@@ -228,6 +250,9 @@ msi_assign_cpu(struct intsrc *isrc, u_in
 	 * to prevent races where we could miss an interrupt.
 	 */
 	apic_free_vector(old_id, old_vector, msi->msi_irq);
+	for (i = 1; i < msi->msi_count; i++)
+		apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]);
+	return (0);
 }
 
 void
@@ -268,7 +293,7 @@ msi_create_source(void)
 	msi_last_irq++;
 	mtx_unlock(&msi_lock);
 
-	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);	
+	msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
 	msi->msi_intsrc.is_pic = &msi_pic;
 	msi->msi_irq = irq;
 	intr_register_source(&msi->msi_intsrc);
@@ -276,21 +301,22 @@ msi_create_source(void)
 }
 
 /*
- * Try to allocate 'count' interrupt sources with contiguous IDT values.  If
- * we allocate any new sources, then their IRQ values will be at the end of
- * the irqs[] array, with *newirq being the index of the first new IRQ value
- * and *newcount being the number of new IRQ values added.
+ * Try to allocate 'count' interrupt sources with contiguous IDT values.
  */
 int
 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
 {
 	struct msi_intsrc *msi, *fsrc;
 	u_int cpu;
-	int cnt, i, vector;
+	int cnt, i, *mirqs, vector;
 
 	if (!msi_enabled)
 		return (ENXIO);
 
+	if (count > 1)
+		mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
+	else
+		mirqs = NULL;
 again:
 	mtx_lock(&msi_lock);
 
@@ -317,6 +343,7 @@ again:
 		/* If we would exceed the max, give up. */
 		if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) {
 			mtx_unlock(&msi_lock);
+			free(mirqs, M_MSI);
 			return (ENXIO);
 		}
 		mtx_unlock(&msi_lock);
@@ -337,6 +364,7 @@ again:
 	vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
 	if (vector == 0) {
 		mtx_unlock(&msi_lock);
+		free(mirqs, M_MSI);
 		return (ENOSPC);
 	}
 
@@ -356,6 +384,10 @@ again:
 		    ("dead MSI has handlers"));
 	}
 	fsrc->msi_count = count;
+	fsrc->msi_maxcount = maxcount;
+	if (count > 1)
+		bcopy(irqs, mirqs, count * sizeof(*mirqs));
+	fsrc->msi_irqs = mirqs;
 	mtx_unlock(&msi_lock);
 
 	return (0);
@@ -413,6 +445,9 @@ msi_release(int *irqs, int count)
 	apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq);
 	first->msi_vector = 0;
 	first->msi_count = 0;
+	first->msi_maxcount = 0;
+	free(first->msi_irqs, M_MSI);
+	first->msi_irqs = NULL;
 
 	mtx_unlock(&msi_lock);
 	return (0);
@@ -498,15 +533,23 @@ again:
 	/* Allocate an IDT vector. */
 	cpu = intr_next_cpu();
 	vector = apic_alloc_vector(cpu, i);
+	if (vector == 0) {
+		mtx_unlock(&msi_lock);
+		return (ENOSPC);
+	}
 	if (bootverbose)
 		printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n",
 		    msi->msi_irq, cpu, vector);
-	
+
 	/* Setup source. */
 	msi->msi_cpu = cpu;
 	msi->msi_dev = dev;
+	msi->msi_first = msi;
 	msi->msi_vector = vector;
 	msi->msi_msix = 1;
+	msi->msi_count = 1;
+	msi->msi_maxcount = 1;
+	msi->msi_irqs = NULL;
 
 	KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
 	mtx_unlock(&msi_lock);
@@ -536,10 +579,13 @@ msix_release(int irq)
 	KASSERT(msi->msi_dev != NULL, ("unowned message"));
 
 	/* Clear out the message. */
+	msi->msi_first = NULL;
 	msi->msi_dev = NULL;
 	apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
 	msi->msi_vector = 0;
 	msi->msi_msix = 0;
+	msi->msi_count = 0;
+	msi->msi_maxcount = 0;
 
 	mtx_unlock(&msi_lock);
 	return (0);

Modified: head/sys/i386/include/intr_machdep.h
==============================================================================
--- head/sys/i386/include/intr_machdep.h	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/i386/include/intr_machdep.h	Wed Jul  1 17:20:07 2009	(r195249)
@@ -93,7 +93,7 @@ struct pic {
 	void (*pic_resume)(struct pic *);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
-	void (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
+	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
 	STAILQ_ENTRY(pic) pics;
 };
 

Modified: head/sys/i386/isa/atpic.c
==============================================================================
--- head/sys/i386/isa/atpic.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/i386/isa/atpic.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -161,7 +161,7 @@ static void atpic_resume(struct pic *pic
 static int atpic_source_pending(struct intsrc *isrc);
 static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol);
-static void atpic_assign_cpu(struct intsrc *isrc, u_int apic_id);
+static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void i8259_init(struct atpic *pic, int slave);
 
 static struct atpic atpics[] = {
@@ -389,7 +389,7 @@ atpic_config_intr(struct intsrc *isrc, e
 #endif /* PC98 */
 }
 
-static void
+static int
 atpic_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
 

Modified: head/sys/kern/kern_intr.c
==============================================================================
--- head/sys/kern/kern_intr.c	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/kern/kern_intr.c	Wed Jul  1 17:20:07 2009	(r195249)
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
+#include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/resourcevar.h>
@@ -304,9 +305,14 @@ intr_event_bind(struct intr_event *ie, u
 
 	if (ie->ie_assign_cpu == NULL)
 		return (EOPNOTSUPP);
+
+	error = priv_check(curthread, PRIV_SCHED_CPUSET_INTR);
+	if (error)
+		return (error);
+
 	/*
-	 * If we have any ithreads try to set their mask first since this
-	 * can fail.
+	 * If we have any ithreads try to set their mask first to verify
+	 * permissions, etc.
 	 */
 	mtx_lock(&ie->ie_lock);
 	if (ie->ie_thread != NULL) {
@@ -323,8 +329,22 @@ intr_event_bind(struct intr_event *ie, u
 	} else
 		mtx_unlock(&ie->ie_lock);
 	error = ie->ie_assign_cpu(ie->ie_source, cpu);
-	if (error)
+	if (error) {
+		mtx_lock(&ie->ie_lock);
+		if (ie->ie_thread != NULL) {
+			CPU_ZERO(&mask);
+			if (ie->ie_cpu == NOCPU)
+				CPU_COPY(cpuset_root, &mask);
+			else
+				CPU_SET(cpu, &mask);
+			id = ie->ie_thread->it_thread->td_tid;
+			mtx_unlock(&ie->ie_lock);
+			(void)cpuset_setthread(id, &mask);
+		} else
+			mtx_unlock(&ie->ie_lock);
 		return (error);
+	}
+
 	mtx_lock(&ie->ie_lock);
 	ie->ie_cpu = cpu;
 	mtx_unlock(&ie->ie_lock);

Modified: head/sys/sys/priv.h
==============================================================================
--- head/sys/sys/priv.h	Wed Jul  1 17:06:56 2009	(r195248)
+++ head/sys/sys/priv.h	Wed Jul  1 17:20:07 2009	(r195249)
@@ -185,6 +185,7 @@
 #define	PRIV_SCHED_SET		204	/* Can set thread scheduler. */
 #define	PRIV_SCHED_SETPARAM	205	/* Can set thread scheduler params. */
 #define	PRIV_SCHED_CPUSET	206	/* Can manipulate cpusets. */
+#define	PRIV_SCHED_CPUSET_INTR	207	/* Can adjust IRQ to CPU binding. */
 
 /*
  * POSIX semaphore privileges.


More information about the svn-src-head mailing list