svn commit: r299286 - in head: share/man/man9 sys/amd64/include sys/dev/acpica sys/i386/include sys/kern sys/sys sys/x86/x86

John Baldwin jhb at FreeBSD.org
Mon May 9 20:50:24 UTC 2016


Author: jhb
Date: Mon May  9 20:50:21 2016
New Revision: 299286
URL: https://svnweb.freebsd.org/changeset/base/299286

Log:
  Add a new bus method to fetch device-specific CPU sets.
  
  bus_get_cpus() returns a specified set of CPUs for a device.  It accepts
  an enum for the second parameter that indicates the type of cpuset to
  request.  Currently two valus are supported:
  
   - LOCAL_CPUS (on x86 this returns all the CPUs in the package closest to
     the device when DEVICE_NUMA is enabled)
   - INTR_CPUS (like LOCAL_CPUS but only returns 1 SMT thread for each core)
  
  For systems that do not support NUMA (or if it is not enabled in the kernel
  config), LOCAL_CPUS fails with EINVAL.  INTR_CPUS is mapped to 'all_cpus'
  by default.  The idea is that INTR_CPUS should always return a valid set.
  
  Device drivers which want to use per-CPU interrupts should start using
  INTR_CPUS instead of simply assigning interrupts to all available CPUs.
  In the future we may wish to add tunables to control the policy of
  INTR_CPUS (e.g. should it be local-only or global, should it ignore
  SMT threads or not).
  
  The x86 nexus driver exposes the internal set of interrupt CPUs from the
  the x86 interrupt code via INTR_CPUS.
  
  The ACPI bus driver and PCI bridge drivers use _PXM to return a suitable
  LOCAL_CPUS set when _PXM exists and DEVICE_NUMA is enabled.  They also and
  the global INTR_CPUS set from the nexus driver with the per-domain set from
  _PXM to generate a local INTR_CPUS set for child devices.
  
  Compared to the r298933, this version uses 'struct _cpuset' in
  <sys/bus.h> instead of 'cpuset_t' to avoid requiring <sys/param.h>
  (<sys/_cpuset.h> still requires <sys/param.h> for MAXCPU even though
  <sys/_bitset.h> does not after recent changes).

Added:
  head/share/man/man9/BUS_GET_CPUS.9
     - copied, changed from r298950, head/share/man/man9/BUS_GET_CPUS.9
Modified:
  head/share/man/man9/Makefile
  head/sys/amd64/include/intr_machdep.h
  head/sys/dev/acpica/acpi.c
  head/sys/dev/acpica/acpi_pci.c
  head/sys/dev/acpica/acpi_pcib.c
  head/sys/dev/acpica/acpi_pcib_acpi.c
  head/sys/dev/acpica/acpi_pcib_pci.c
  head/sys/dev/acpica/acpi_pcibvar.h
  head/sys/dev/acpica/acpivar.h
  head/sys/i386/include/intr_machdep.h
  head/sys/kern/bus_if.m
  head/sys/kern/subr_bus.c
  head/sys/sys/bus.h
  head/sys/x86/x86/intr_machdep.c
  head/sys/x86/x86/nexus.c

Copied and modified: head/share/man/man9/BUS_GET_CPUS.9 (from r298950, head/share/man/man9/BUS_GET_CPUS.9)
==============================================================================
--- head/share/man/man9/BUS_GET_CPUS.9	Tue May  3 00:35:11 2016	(r298950, copy source)
+++ head/share/man/man9/BUS_GET_CPUS.9	Mon May  9 20:50:21 2016	(r299286)
@@ -36,6 +36,7 @@
 .Sh SYNOPSIS
 .In sys/param.h
 .In sys/bus.h
+.In sys/cpuset.h
 .Ft int
 .Fo BUS_GET_CPUS
 .Fa "device_t dev" "device_t child" "enum cpu_sets op" "size_t setsize"

Modified: head/share/man/man9/Makefile
==============================================================================
--- head/share/man/man9/Makefile	Mon May  9 20:21:49 2016	(r299285)
+++ head/share/man/man9/Makefile	Mon May  9 20:50:21 2016	(r299286)
@@ -42,6 +42,7 @@ MAN=	accept_filter.9 \
 	bus_generic_print_child.9 \
 	bus_generic_read_ivar.9 \
 	bus_generic_shutdown.9 \
+	BUS_GET_CPUS.9 \
 	bus_get_resource.9 \
 	BUS_NEW_PASS.9 \
 	BUS_PRINT_CHILD.9 \
@@ -502,6 +503,7 @@ MLINKS+=bus_dma.9 busdma.9 \
 	bus_dma.9 bus_dma_tag_create.9 \
 	bus_dma.9 bus_dma_tag_destroy.9
 MLINKS+=bus_generic_read_ivar.9 bus_generic_write_ivar.9
+MLINKS+=BUS_GET_CPUS.9 bus_get_cpus.9
 MLINKS+=BUS_READ_IVAR.9 BUS_WRITE_IVAR.9
 MLINKS+=BUS_SETUP_INTR.9 bus_setup_intr.9 \
 	BUS_SETUP_INTR.9 BUS_TEARDOWN_INTR.9 \

Modified: head/sys/amd64/include/intr_machdep.h
==============================================================================
--- head/sys/amd64/include/intr_machdep.h	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/amd64/include/intr_machdep.h	Mon May  9 20:50:21 2016	(r299286)
@@ -143,6 +143,9 @@ struct nmi_pcpu {
 	register_t	__padding;	/* pad to 16 bytes */
 };
 
+#ifdef SMP
+extern cpuset_t intr_cpus;
+#endif
 extern struct mtx icu_lock;
 extern int elcr_found;
 

Modified: head/sys/dev/acpica/acpi.c
==============================================================================
--- head/sys/dev/acpica/acpi.c	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/dev/acpica/acpi.c	Mon May  9 20:50:21 2016	(r299286)
@@ -211,6 +211,7 @@ static device_method_t acpi_methods[] = 
     DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
     DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
     DEVMETHOD(bus_hint_device_unit,	acpi_hint_device_unit),
+    DEVMETHOD(bus_get_cpus,		acpi_get_cpus),
     DEVMETHOD(bus_get_domain,		acpi_get_domain),
 
     /* ACPI bus */
@@ -1077,52 +1078,79 @@ acpi_hint_device_unit(device_t acdev, de
 }
 
 /*
- * Fetch the VM domain for the given device 'dev'.
- *
- * Return 1 + domain if there's a domain, 0 if not found;
- * -1 upon an error.
+ * Fetch the NUMA domain for a device by mapping the value returned by
+ * _PXM to a NUMA domain.  If the device does not have a _PXM method,
+ * -2 is returned.  If any other error occurs, -1 is returned.
  */
-int
-acpi_parse_pxm(device_t dev, int *domain)
+static int
+acpi_parse_pxm(device_t dev)
 {
 #ifdef DEVICE_NUMA
-	ACPI_HANDLE h;
-	int d, pxm;
+	ACPI_HANDLE handle;
+	ACPI_STATUS status;
+	int pxm;
 
-	h = acpi_get_handle(dev);
-	if ((h != NULL) &&
-	    ACPI_SUCCESS(acpi_GetInteger(h, "_PXM", &pxm))) {
-		d = acpi_map_pxm_to_vm_domainid(pxm);
-		if (d < 0)
-			return (-1);
-		*domain = d;
-		return (1);
-	}
+	handle = acpi_get_handle(dev);
+	if (handle == NULL)
+		return (-2);
+	status = acpi_GetInteger(handle, "_PXM", &pxm);
+	if (ACPI_SUCCESS(status))
+		return (acpi_map_pxm_to_vm_domainid(pxm));
+	if (status == AE_NOT_FOUND)
+		return (-2);
 #endif
+	return (-1);
+}
 
-	return (0);
+int
+acpi_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
+    cpuset_t *cpuset)
+{
+	int d, error;
+
+	d = acpi_parse_pxm(child);
+	if (d < 0)
+		return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
+
+	switch (op) {
+	case LOCAL_CPUS:
+		if (setsize != sizeof(cpuset_t))
+			return (EINVAL);
+		*cpuset = cpuset_domain[d];
+		return (0);
+	case INTR_CPUS:
+		error = bus_generic_get_cpus(dev, child, op, setsize, cpuset);
+		if (error != 0)
+			return (error);
+		if (setsize != sizeof(cpuset_t))
+			return (EINVAL);
+		CPU_AND(cpuset, &cpuset_domain[d]);
+		return (0);
+	default:
+		return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
+	}
 }
 
 /*
- * Fetch the NUMA domain for the given device.
+ * Fetch the NUMA domain for the given device 'dev'.
  *
  * If a device has a _PXM method, map that to a NUMA domain.
- *
- * If none is found, then it'll call the parent method.
- * If there's no domain, return ENOENT.
+ * Otherwise, pass the request up to the parent.
+ * If there's no matching domain or the domain cannot be
+ * determined, return ENOENT.
  */
 int
 acpi_get_domain(device_t dev, device_t child, int *domain)
 {
-	int ret;
+	int d;
 
-	ret = acpi_parse_pxm(child, domain);
-	/* Error */
-	if (ret == -1)
-		return (ENOENT);
-	/* Found */
-	if (ret == 1)
+	d = acpi_parse_pxm(child);
+	if (d >= 0) {
+		*domain = d;
 		return (0);
+	}
+	if (d == -1)
+		return (ENOENT);
 
 	/* No _PXM node; go up a level */
 	return (bus_generic_get_domain(dev, child, domain));

Modified: head/sys/dev/acpica/acpi_pci.c
==============================================================================
--- head/sys/dev/acpica/acpi_pci.c	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/dev/acpica/acpi_pci.c	Mon May  9 20:50:21 2016	(r299286)
@@ -95,6 +95,7 @@ static device_method_t acpi_pci_methods[
 	DEVMETHOD(bus_write_ivar,	acpi_pci_write_ivar),
 	DEVMETHOD(bus_child_deleted,	acpi_pci_child_deleted),
 	DEVMETHOD(bus_child_location_str, acpi_pci_child_location_str_method),
+	DEVMETHOD(bus_get_cpus,		acpi_get_cpus),
 	DEVMETHOD(bus_get_dma_tag,	acpi_pci_get_dma_tag),
 	DEVMETHOD(bus_get_domain,	acpi_get_domain),
 

Modified: head/sys/dev/acpica/acpi_pcib.c
==============================================================================
--- head/sys/dev/acpica/acpi_pcib.c	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/dev/acpica/acpi_pcib.c	Mon May  9 20:50:21 2016	(r299286)
@@ -265,3 +265,11 @@ acpi_pcib_power_for_sleep(device_t pcib,
     acpi_device_pwr_for_sleep(acpi_dev, dev, pstate);
     return (0);
 }
+
+int
+acpi_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
+    size_t setsize, cpuset_t *cpuset)
+{
+
+	return (bus_get_cpus(pcib, op, setsize, cpuset));
+}

Modified: head/sys/dev/acpica/acpi_pcib_acpi.c
==============================================================================
--- head/sys/dev/acpica/acpi_pcib_acpi.c	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/dev/acpica/acpi_pcib_acpi.c	Mon May  9 20:50:21 2016	(r299286)
@@ -132,6 +132,7 @@ static device_method_t acpi_pcib_acpi_me
     DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
     DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
     DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
+    DEVMETHOD(bus_get_cpus,		acpi_pcib_get_cpus),
 
     /* pcib interface */
     DEVMETHOD(pcib_maxslots,		pcib_maxslots),

Modified: head/sys/dev/acpica/acpi_pcib_pci.c
==============================================================================
--- head/sys/dev/acpica/acpi_pcib_pci.c	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/dev/acpica/acpi_pcib_pci.c	Mon May  9 20:50:21 2016	(r299286)
@@ -78,6 +78,7 @@ static device_method_t acpi_pcib_pci_met
 
     /* Bus interface */
     DEVMETHOD(bus_read_ivar,		acpi_pcib_read_ivar),
+    DEVMETHOD(bus_get_cpus,		acpi_pcib_get_cpus),
 
     /* pcib interface */
     DEVMETHOD(pcib_route_interrupt,	acpi_pcib_pci_route_interrupt),

Modified: head/sys/dev/acpica/acpi_pcibvar.h
==============================================================================
--- head/sys/dev/acpica/acpi_pcibvar.h	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/dev/acpica/acpi_pcibvar.h	Mon May  9 20:50:21 2016	(r299286)
@@ -36,6 +36,8 @@ void	acpi_pci_link_add_reference(device_
     int slot, int pin);
 int	acpi_pci_link_route_interrupt(device_t dev, int index);
 void	acpi_pcib_fetch_prt(device_t bus, ACPI_BUFFER *prt);
+int	acpi_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
+    size_t setsize, cpuset_t *cpuset);
 int	acpi_pcib_route_interrupt(device_t pcib, device_t dev, int pin,
     ACPI_BUFFER *prtbuf);
 int	acpi_pcib_power_for_sleep(device_t pcib, device_t dev,

Modified: head/sys/dev/acpica/acpivar.h
==============================================================================
--- head/sys/dev/acpica/acpivar.h	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/dev/acpica/acpivar.h	Mon May  9 20:50:21 2016	(r299286)
@@ -506,8 +506,9 @@ SYSCTL_DECL(_debug_acpi);
  * Returns the VM domain ID if found, or -1 if not found / invalid.
  */
 int		acpi_map_pxm_to_vm_domainid(int pxm);
+int		acpi_get_cpus(device_t dev, device_t child, enum cpu_sets op,
+		    size_t setsize, cpuset_t *cpuset);
 int		acpi_get_domain(device_t dev, device_t child, int *domain);
-int		acpi_parse_pxm(device_t dev, int *domain);
 
 #endif /* _KERNEL */
 #endif /* !_ACPIVAR_H_ */

Modified: head/sys/i386/include/intr_machdep.h
==============================================================================
--- head/sys/i386/include/intr_machdep.h	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/i386/include/intr_machdep.h	Mon May  9 20:50:21 2016	(r299286)
@@ -134,6 +134,9 @@ struct intsrc {
 
 struct trapframe;
 
+#ifdef SMP
+extern cpuset_t intr_cpus;
+#endif
 extern struct mtx icu_lock;
 extern int elcr_found;
 

Modified: head/sys/kern/bus_if.m
==============================================================================
--- head/sys/kern/bus_if.m	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/kern/bus_if.m	Mon May  9 20:50:21 2016	(r299286)
@@ -731,3 +731,21 @@ METHOD int get_domain {
 	device_t	_child;
 	int		*_domain;
 } DEFAULT bus_generic_get_domain;
+
+/**
+ * @brief Request a set of CPUs
+ *
+ * @param _dev		the bus device
+ * @param _child	the child device
+ * @param _op		type of CPUs to request
+ * @param _setsize	the size of the set passed in _cpuset
+ * @param _cpuset	a pointer to a cpuset to receive the requested
+ *			set of CPUs
+ */
+METHOD int get_cpus {
+	device_t	_dev;
+	device_t	_child;
+	enum cpu_sets	_op;
+	size_t		_setsize;
+	cpuset_t	*_cpuset;
+} DEFAULT bus_generic_get_cpus;

Modified: head/sys/kern/subr_bus.c
==============================================================================
--- head/sys/kern/subr_bus.c	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/kern/subr_bus.c	Mon May  9 20:50:21 2016	(r299286)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/rman.h>
 #include <sys/selinfo.h>
 #include <sys/signalvar.h>
+#include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
@@ -4111,6 +4112,23 @@ bus_generic_describe_intr(device_t dev, 
 }
 
 /**
+ * @brief Helper function for implementing BUS_GET_CPUS().
+ *
+ * This simple implementation of BUS_GET_CPUS() simply calls the
+ * BUS_GET_CPUS() method of the parent of @p dev.
+ */
+int
+bus_generic_get_cpus(device_t dev, device_t child, enum cpu_sets op,
+    size_t setsize, cpuset_t *cpuset)
+{
+
+	/* Propagate up the bus hierarchy until someone handles it. */
+	if (dev->parent != NULL)
+		return (BUS_GET_CPUS(dev->parent, child, op, setsize, cpuset));
+	return (EINVAL);
+}
+
+/**
  * @brief Helper function for implementing BUS_GET_DMA_TAG().
  *
  * This simple implementation of BUS_GET_DMA_TAG() simply calls the
@@ -4620,6 +4638,23 @@ bus_child_location_str(device_t child, c
 }
 
 /**
+ * @brief Wrapper function for BUS_GET_CPUS().
+ *
+ * This function simply calls the BUS_GET_CPUS() method of the
+ * parent of @p dev.
+ */
+int
+bus_get_cpus(device_t dev, enum cpu_sets op, size_t setsize, cpuset_t *cpuset)
+{
+	device_t parent;
+
+	parent = device_get_parent(dev);
+	if (parent == NULL)
+		return (EINVAL);
+	return (BUS_GET_CPUS(parent, dev, op, setsize, cpuset));
+}
+
+/**
  * @brief Wrapper function for BUS_GET_DMA_TAG().
  *
  * This function simply calls the BUS_GET_DMA_TAG() method of the
@@ -4711,6 +4746,23 @@ root_child_present(device_t dev, device_
 	return (-1);
 }
 
+static int
+root_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
+    cpuset_t *cpuset)
+{
+
+	switch (op) {
+	case INTR_CPUS:
+		/* Default to returning the set of all CPUs. */
+		if (setsize != sizeof(cpuset_t))
+			return (EINVAL);
+		*cpuset = all_cpus;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+}
+
 static kobj_method_t root_methods[] = {
 	/* Device interface */
 	KOBJMETHOD(device_shutdown,	bus_generic_shutdown),
@@ -4723,6 +4775,7 @@ static kobj_method_t root_methods[] = {
 	KOBJMETHOD(bus_write_ivar,	bus_generic_write_ivar),
 	KOBJMETHOD(bus_setup_intr,	root_setup_intr),
 	KOBJMETHOD(bus_child_present,	root_child_present),
+	KOBJMETHOD(bus_get_cpus,	root_get_cpus),
 
 	KOBJMETHOD_END
 };

Modified: head/sys/sys/bus.h
==============================================================================
--- head/sys/sys/bus.h	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/sys/bus.h	Mon May  9 20:50:21 2016	(r299286)
@@ -272,6 +272,16 @@ enum intr_polarity {
 	INTR_POLARITY_LOW = 2
 };
 
+/**
+ * CPU sets supported by bus_get_cpus().  Note that not all sets may be
+ * supported for a given device.  If a request is not supported by a
+ * device (or its parents), then bus_get_cpus() will fail with EINVAL.
+ */
+enum cpu_sets {
+	LOCAL_CPUS = 0,
+	INTR_CPUS
+};
+
 typedef int (*devop_t)(void);
 
 /**
@@ -388,6 +398,8 @@ int	bus_generic_deactivate_resource(devi
 					int rid, struct resource *r);
 int	bus_generic_detach(device_t dev);
 void	bus_generic_driver_added(device_t dev, driver_t *driver);
+int	bus_generic_get_cpus(device_t dev, device_t child, enum cpu_sets op,
+			     size_t setsize, struct _cpuset *cpuset);
 bus_dma_tag_t
 	bus_generic_get_dma_tag(device_t dev, device_t child);
 bus_space_tag_t
@@ -457,6 +469,8 @@ int	bus_activate_resource(device_t dev, 
 			      struct resource *r);
 int	bus_deactivate_resource(device_t dev, int type, int rid,
 				struct resource *r);
+int	bus_get_cpus(device_t dev, enum cpu_sets op, size_t setsize,
+		     struct _cpuset *cpuset);
 bus_dma_tag_t bus_get_dma_tag(device_t dev);
 bus_space_tag_t bus_get_bus_tag(device_t dev);
 int	bus_get_domain(device_t dev, int *domain);

Modified: head/sys/x86/x86/intr_machdep.c
==============================================================================
--- head/sys/x86/x86/intr_machdep.c	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/x86/x86/intr_machdep.c	Mon May  9 20:50:21 2016	(r299286)
@@ -490,7 +490,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  * allocate CPUs round-robin.
  */
 
-static cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
+cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
 static int current_cpu;
 
 /*

Modified: head/sys/x86/x86/nexus.c
==============================================================================
--- head/sys/x86/x86/nexus.c	Mon May  9 20:21:49 2016	(r299285)
+++ head/sys/x86/x86/nexus.c	Mon May  9 20:50:21 2016	(r299286)
@@ -127,6 +127,8 @@ static	int nexus_set_resource(device_t, 
 static	int nexus_get_resource(device_t, device_t, int, int,
 			       rman_res_t *, rman_res_t *);
 static void nexus_delete_resource(device_t, device_t, int, int);
+static	int nexus_get_cpus(device_t, device_t, enum cpu_sets, size_t,
+			   cpuset_t *);
 #ifdef DEV_APIC
 static	int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs);
 static	int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs);
@@ -163,6 +165,7 @@ static device_method_t nexus_methods[] =
 	DEVMETHOD(bus_set_resource,	nexus_set_resource),
 	DEVMETHOD(bus_get_resource,	nexus_get_resource),
 	DEVMETHOD(bus_delete_resource,	nexus_delete_resource),
+	DEVMETHOD(bus_get_cpus,		nexus_get_cpus),
 
 	/* pcib interface */
 #ifdef DEV_APIC
@@ -619,6 +622,24 @@ nexus_delete_resource(device_t dev, devi
 	resource_list_delete(rl, type, rid);
 }
 
+static int
+nexus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
+    cpuset_t *cpuset)
+{
+
+	switch (op) {
+#ifdef SMP
+	case INTR_CPUS:
+		if (setsize != sizeof(cpuset_t))
+			return (EINVAL);
+		*cpuset = intr_cpus;
+		return (0);
+#endif
+	default:
+		return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
+	}
+}
+
 /* Called from the MSI code to add new IRQs to the IRQ rman. */
 void
 nexus_add_irq(u_long irq)


More information about the svn-src-all mailing list