svn commit: r293072 - in user/ngie/stable-10-libnv/sys: amd64/conf conf dev/acpica dev/pci i386/conf sys

Garrett Cooper ngie at FreeBSD.org
Sun Jan 3 05:39:21 UTC 2016


Author: ngie
Date: Sun Jan  3 05:39:19 2016
New Revision: 293072
URL: https://svnweb.freebsd.org/changeset/base/293072

Log:
  MFC r279447,r279449,r279450,r279451,r279452,r279453:
  
  r279447 (by rstone):
  
  Implement interface to create SR-IOV Virtual Functions
  
  Implement the interace to create SR-IOV Virtual Functions (VFs).
  When a driver registers that they support SR-IOV by calling
  pci_setup_iov(), the SR-IOV code creates a new node in /dev/iov
  for that device.  An ioctl can be invoked on that device to
  create VFs and have the driver initialize them.
  
  At this point, allocating memory I/O windows (BARs) is not
  supported.
  
  r279449 (by rstone):
  
  Allocate PCI I/O memory spaces for VFs
  
  When creating VFs, we must size each SR-IOV BAR on the PF and
  allocate a configuous I/O memory window large enough for every VF.
  However, the window only needs to be aligned to a boundary equal
  to the size of the window for a single VF.
  
  When a VF attempts to allocate an I/O memory resource, we must
  intercept the request in the pci driver and pass it off to the
  SR-IOV code, which will allocate the correct window from the
  pre-allocated memory space for the PF.
  
  Inform the pci driver about the size and address of the BARs on
  the VF when the VF is created.  This is required by pciconf -b and
  bhyve.
  
  r279450 (by rstone):
  
  Add interface to destroy SR-IOV VFs
  
  r279451 (by rstone):
  
  Add infrastructure for exporting config schema from PF drivers
  
  r279452 (by rstone):
  
  Add function to validate the consistency of SR-IOV config
  
  Add a function that validates that the user-provided SR-IOV
  configuration is valid.  This includes basic checks that the
  structure of the configuration is correct (e.g. all required
  configuration nodes are present) as well as validating against
  a configuration schema.
  
  The schema validation consists of:
   - Ensuring that all required config parameters are present.
   - If the schema defines a default value for a parameter,
     adding the default value if the parameter is not set.
   - Ensuring that no parameters are specified in the config
     that are not defined in the schema.
   - Ensuring that have the correct type defined in the schema.
   - Ensuring that no configuration nodes are present for devices
     that do not exist.  For example, if 2 VFs are configured,
     then we validate that a node called VF-5 does not exist.
  
  r279453 (by rstone):
  
  Pass SR-IOV configuration to kernel using an nvlist
  
  Pass all SR-IOV configuration to the kernel using an nvlist.  The
  main benefit that this offers is flexibility.  It allows a driver
  to accept any number of parameters of any type supported by the
  SR-IOV configuration infrastructure with having to make any
  changes outside of the driver.
  
  It also offers the user very fine-grained control over the
  configuration of the VFs -- if they want, they can have different
  configuration applied to every VF.

Added:
  user/ngie/stable-10-libnv/sys/dev/pci/pci_iov.c
     - copied, changed from r279447, head/sys/dev/pci/pci_iov.c
  user/ngie/stable-10-libnv/sys/dev/pci/pci_iov_private.h
     - copied, changed from r279447, head/sys/dev/pci/pci_iov_private.h
  user/ngie/stable-10-libnv/sys/dev/pci/pci_iov_schema.c
     - copied, changed from r279451, head/sys/dev/pci/pci_iov_schema.c
  user/ngie/stable-10-libnv/sys/dev/pci/schema_private.h
     - copied unchanged from r279451, head/sys/dev/pci/schema_private.h
  user/ngie/stable-10-libnv/sys/sys/iov.h
     - copied, changed from r279447, head/sys/sys/iov.h
  user/ngie/stable-10-libnv/sys/sys/iov_schema.h
     - copied unchanged from r279451, head/sys/sys/iov_schema.h
Modified:
  user/ngie/stable-10-libnv/sys/amd64/conf/GENERIC
  user/ngie/stable-10-libnv/sys/conf/files
  user/ngie/stable-10-libnv/sys/conf/options
  user/ngie/stable-10-libnv/sys/dev/acpica/acpi_pci.c
  user/ngie/stable-10-libnv/sys/dev/pci/pci.c
  user/ngie/stable-10-libnv/sys/dev/pci/pci_if.m
  user/ngie/stable-10-libnv/sys/dev/pci/pci_private.h
  user/ngie/stable-10-libnv/sys/dev/pci/pcireg.h
  user/ngie/stable-10-libnv/sys/dev/pci/pcivar.h
  user/ngie/stable-10-libnv/sys/i386/conf/GENERIC
Directory Properties:
  user/ngie/stable-10-libnv/   (props changed)

Modified: user/ngie/stable-10-libnv/sys/amd64/conf/GENERIC
==============================================================================
--- user/ngie/stable-10-libnv/sys/amd64/conf/GENERIC	Sun Jan  3 04:54:10 2016	(r293071)
+++ user/ngie/stable-10-libnv/sys/amd64/conf/GENERIC	Sun Jan  3 05:39:19 2016	(r293072)
@@ -90,6 +90,7 @@ device		cpufreq
 device		acpi
 options 	ACPI_DMAR
 device		pci
+options		PCI_IOV			# PCI SR-IOV support
 
 # Floppy drives
 device		fdc

Modified: user/ngie/stable-10-libnv/sys/conf/files
==============================================================================
--- user/ngie/stable-10-libnv/sys/conf/files	Sun Jan  3 04:54:10 2016	(r293071)
+++ user/ngie/stable-10-libnv/sys/conf/files	Sun Jan  3 05:39:19 2016	(r293072)
@@ -2003,6 +2003,8 @@ dev/pci/ignore_pci.c		optional pci
 dev/pci/isa_pci.c		optional pci isa
 dev/pci/pci.c			optional pci
 dev/pci/pci_if.m		standard
+dev/pci/pci_iov.c		optional pci pci_iov
+dev/pci/pci_iov_schema.c	optional pci pci_iov
 dev/pci/pci_pci.c		optional pci
 dev/pci/pci_subr.c		optional pci
 dev/pci/pci_user.c		optional pci

Modified: user/ngie/stable-10-libnv/sys/conf/options
==============================================================================
--- user/ngie/stable-10-libnv/sys/conf/options	Sun Jan  3 04:54:10 2016	(r293071)
+++ user/ngie/stable-10-libnv/sys/conf/options	Sun Jan  3 05:39:19 2016	(r293072)
@@ -166,6 +166,7 @@ NO_SYSCTL_DESCR	opt_global.h
 NSWBUF_MIN	opt_swap.h
 MBUF_PACKET_ZONE_DISABLE	opt_global.h
 PANIC_REBOOT_WAIT_TIME	opt_panic.h
+PCI_IOV		opt_global.h
 PPC_DEBUG	opt_ppc.h
 PPC_PROBE_CHIPSET	opt_ppc.h
 PPS_SYNC	opt_ntp.h

Modified: user/ngie/stable-10-libnv/sys/dev/acpica/acpi_pci.c
==============================================================================
--- user/ngie/stable-10-libnv/sys/dev/acpica/acpi_pci.c	Sun Jan  3 04:54:10 2016	(r293071)
+++ user/ngie/stable-10-libnv/sys/dev/acpica/acpi_pci.c	Sun Jan  3 05:39:19 2016	(r293072)
@@ -84,6 +84,11 @@ static int	acpi_pci_set_powerstate_metho
 static void	acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child);
 static bus_dma_tag_t acpi_pci_get_dma_tag(device_t bus, device_t child);
 
+#ifdef PCI_IOV
+static device_t	acpi_pci_create_iov_child(device_t bus, device_t pf,
+		    uint16_t rid, uint16_t vid, uint16_t did);
+#endif
+
 static device_method_t acpi_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		acpi_pci_probe),
@@ -98,6 +103,9 @@ static device_method_t acpi_pci_methods[
 
 	/* PCI interface */
 	DEVMETHOD(pci_set_powerstate,	acpi_pci_set_powerstate_method),
+#ifdef PCI_IOV
+	DEVMETHOD(pci_create_iov_child,	acpi_pci_create_iov_child),
+#endif
 
 	DEVMETHOD_END
 };
@@ -345,3 +353,23 @@ acpi_pci_get_dma_tag(device_t bus, devic
 	return (pci_get_dma_tag(bus, child));
 }
 #endif
+
+#ifdef PCI_IOV
+static device_t
+acpi_pci_create_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
+    uint16_t did)
+{
+	struct acpi_pci_devinfo *dinfo;
+	device_t vf;
+
+	vf = pci_add_iov_child(bus, pf, sizeof(struct acpi_pci_devinfo), rid,
+	    vid, did);
+	if (vf == NULL)
+		return (NULL);
+
+	dinfo = device_get_ivars(vf);
+	dinfo->ap_handle = NULL;
+	return (vf);
+}
+#endif
+

Modified: user/ngie/stable-10-libnv/sys/dev/pci/pci.c
==============================================================================
--- user/ngie/stable-10-libnv/sys/dev/pci/pci.c	Sun Jan  3 04:54:10 2016	(r293071)
+++ user/ngie/stable-10-libnv/sys/dev/pci/pci.c	Sun Jan  3 05:39:19 2016	(r293072)
@@ -186,6 +186,11 @@ static device_method_t pci_methods[] = {
 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
 	DEVMETHOD(pci_get_rid,		pci_get_rid_method),
 	DEVMETHOD(pci_child_added,	pci_child_added_method),
+#ifdef PCI_IOV
+	DEVMETHOD(pci_iov_attach,	pci_iov_attach_method),
+	DEVMETHOD(pci_iov_detach,	pci_iov_detach_method),
+	DEVMETHOD(pci_create_iov_child,	pci_create_iov_child_method),
+#endif
 
 	DEVMETHOD_END
 };
@@ -653,6 +658,9 @@ pci_fill_devinfo(device_t pcib, int d, i
 	cfg->hdrtype		&= ~PCIM_MFDEV;
 	STAILQ_INIT(&cfg->maps);
 
+	cfg->devinfo_size	= size;
+	cfg->iov		= NULL;
+
 	pci_fixancient(cfg);
 	pci_hdrtypedata(pcib, b, s, f, cfg);
 
@@ -3611,6 +3619,51 @@ pci_add_children(device_t dev, int domai
 #undef REG
 }
 
+#ifdef PCI_IOV
+device_t
+pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
+    uint16_t vid, uint16_t did)
+{
+	struct pci_devinfo *pf_dinfo, *vf_dinfo;
+	device_t pcib;
+	int busno, slot, func;
+
+	pf_dinfo = device_get_ivars(pf);
+
+	/*
+	 * Do a sanity check that we have been passed the correct size.  If this
+	 * test fails then likely the pci subclass hasn't implemented the
+	 * pci_create_iov_child method like it's supposed it.
+	 */
+	if (size != pf_dinfo->cfg.devinfo_size) {
+		device_printf(pf,
+		    "PCI subclass does not properly implement PCI_IOV\n");
+		return (NULL);
+	}
+
+	pcib = device_get_parent(bus);
+
+	PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
+
+	vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
+	    vid, did, size);
+
+	vf_dinfo->cfg.flags |= PCICFG_VF;
+	pci_add_child(bus, vf_dinfo);
+
+	return (vf_dinfo->cfg.dev);
+}
+
+device_t
+pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
+    uint16_t vid, uint16_t did)
+{
+
+	return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
+	    did));
+}
+#endif
+
 void
 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
 {
@@ -4722,11 +4775,30 @@ struct resource *
 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
     u_long start, u_long end, u_long count, u_int flags)
 {
+#ifdef PCI_IOV
+	struct pci_devinfo *dinfo;
+#endif
 
 	if (device_get_parent(child) != dev)
 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
 		    type, rid, start, end, count, flags));
 
+#ifdef PCI_IOV
+	dinfo = device_get_ivars(child);
+	if (dinfo->cfg.flags & PCICFG_VF) {
+		switch (type) {
+		/* VFs can't have I/O BARs. */
+		case SYS_RES_IOPORT:
+			return (NULL);
+		case SYS_RES_MEMORY:
+			return (pci_vf_alloc_mem_resource(dev, child, rid,
+			    start, end, count, flags));
+		}
+
+		/* Fall through for other types of resource allocations. */
+	}
+#endif
+
 	return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
 	    count, 1, flags));
 }
@@ -4745,6 +4817,22 @@ pci_release_resource(device_t dev, devic
 
 	dinfo = device_get_ivars(child);
 	cfg = &dinfo->cfg;
+
+#ifdef PCI_IOV
+	if (dinfo->cfg.flags & PCICFG_VF) {
+		switch (type) {
+		/* VFs can't have I/O BARs. */
+		case SYS_RES_IOPORT:
+			return (EDOOFUS);
+		case SYS_RES_MEMORY:
+			return (pci_vf_release_mem_resource(dev, child, rid,
+			    r));
+		}
+
+		/* Fall through for other types of resource allocations. */
+	}
+#endif
+
 #ifdef NEW_PCIB
 	/*
 	 * PCI-PCI bridge I/O window resources are not BARs.  For

Modified: user/ngie/stable-10-libnv/sys/dev/pci/pci_if.m
==============================================================================
--- user/ngie/stable-10-libnv/sys/dev/pci/pci_if.m	Sun Jan  3 04:54:10 2016	(r293071)
+++ user/ngie/stable-10-libnv/sys/dev/pci/pci_if.m	Sun Jan  3 05:39:19 2016	(r293072)
@@ -36,8 +36,20 @@ CODE {
 	{
 		return (0);
 	}
+	
+	static device_t
+	null_create_iov_child(device_t bus, device_t pf, uint16_t rid,
+	    uint16_t vid, uint16_t did)
+	{
+		device_printf(bus, "PCI_IOV not implemented on this bus.\n");
+		return (NULL);
+	}
 };
 
+HEADER {
+	struct nvlist;
+}
+
 
 METHOD u_int32_t read_config {
 	device_t	dev;
@@ -189,3 +201,40 @@ METHOD void child_added {
 	device_t	dev;
 	device_t	child;
 };
+
+METHOD int iov_attach {
+	device_t	dev;
+	device_t	child;
+	struct nvlist	*pf_schema;
+	struct nvlist	*vf_schema;
+};
+
+METHOD int iov_detach {
+	device_t	dev;
+	device_t	child;
+};
+
+METHOD int init_iov {
+	device_t		dev;
+	uint16_t		num_vfs;
+	const struct nvlist	*config;
+};
+
+METHOD void uninit_iov {
+	device_t		dev;
+};
+
+METHOD int add_vf {
+	device_t		dev;
+	uint16_t		vfnum;
+	const struct nvlist	*config;
+};
+
+METHOD device_t create_iov_child {
+	device_t bus;
+	device_t pf;
+	uint16_t rid;
+	uint16_t vid;
+	uint16_t did;
+} DEFAULT null_create_iov_child;
+

Copied and modified: user/ngie/stable-10-libnv/sys/dev/pci/pci_iov.c (from r279447, head/sys/dev/pci/pci_iov.c)
==============================================================================
--- head/sys/dev/pci/pci_iov.c	Sun Mar  1 00:40:09 2015	(r279447, copy source)
+++ user/ngie/stable-10-libnv/sys/dev/pci/pci_iov.c	Sun Jan  3 05:39:19 2016	(r293072)
@@ -46,11 +46,16 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 
 #include <machine/bus.h>
+#include <machine/stdarg.h>
+
+#include <sys/nv.h>
+#include <sys/iov_schema.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pci_private.h>
 #include <dev/pci/pci_iov_private.h>
+#include <dev/pci/schema_private.h>
 
 #include "pci_if.h"
 #include "pcib_if.h"
@@ -65,24 +70,48 @@ static struct cdevsw iov_cdevsw = {
 	.d_ioctl = pci_iov_ioctl
 };
 
+SYSCTL_DECL(_hw_pci);
+
+/*
+ * The maximum amount of memory we will allocate for user configuration of an
+ * SR-IOV device.  1MB ought to be enough for anyone, but leave this 
+ * configurable just in case.
+ */
+static u_long pci_iov_max_config = 1024 * 1024;
+SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN,
+    &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.");
+
+
 #define IOV_READ(d, r, w) \
 	pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
 
 #define IOV_WRITE(d, r, v, w) \
 	pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
 
+static nvlist_t	*pci_iov_build_schema(nvlist_t **pf_schema,
+		    nvlist_t **vf_schema);
+static void	pci_iov_build_pf_schema(nvlist_t *schema,
+		    nvlist_t **driver_schema);
+static void	pci_iov_build_vf_schema(nvlist_t *schema,
+		    nvlist_t **driver_schema);
+static nvlist_t	*pci_iov_get_pf_subsystem_schema(void);
+static nvlist_t	*pci_iov_get_vf_subsystem_schema(void);
+
 int
-pci_iov_attach_method(device_t bus, device_t dev)
+pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema,
+    nvlist_t *vf_schema)
 {
 	device_t pcib;
 	struct pci_devinfo *dinfo;
 	struct pcicfg_iov *iov;
+	nvlist_t *schema;
 	uint32_t version;
 	int error;
 	int iov_pos;
 
 	dinfo = device_get_ivars(dev);
 	pcib = device_get_parent(bus);
+	schema = NULL;
 	
 	error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
 
@@ -106,9 +135,15 @@ pci_iov_attach_method(device_t bus, devi
 		error = EBUSY;
 		goto cleanup;
 	}
-
 	iov->iov_pos = iov_pos;
 
+	schema = pci_iov_build_schema(&pf_schema, &vf_schema);
+	if (schema == NULL) {
+		error = ENOMEM;
+		goto cleanup;
+	}
+	iov->iov_schema = schema;
+
 	iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
 	    UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev));
 
@@ -124,6 +159,9 @@ pci_iov_attach_method(device_t bus, devi
 	return (0);
 
 cleanup:
+	nvlist_destroy(schema);
+	nvlist_destroy(pf_schema);
+	nvlist_destroy(vf_schema);
 	free(iov, M_SRIOV);
 	mtx_unlock(&Giant);
 	return (error);
@@ -144,7 +182,7 @@ pci_iov_detach_method(device_t bus, devi
 		return (0);
 	}
 
-	if (iov->iov_num_vfs != 0) {
+	if (iov->iov_num_vfs != 0 || iov->iov_flags & IOV_BUSY) {
 		mtx_unlock(&Giant);
 		return (EBUSY);
 	}
@@ -155,6 +193,7 @@ pci_iov_detach_method(device_t bus, devi
 		destroy_dev(iov->iov_cdev);
 		iov->iov_cdev = NULL;
 	}
+	nvlist_destroy(iov->iov_schema);
 
 	free(iov, M_SRIOV);
 	mtx_unlock(&Giant);
@@ -162,6 +201,210 @@ pci_iov_detach_method(device_t bus, devi
 	return (0);
 }
 
+static nvlist_t *
+pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf)
+{
+	nvlist_t *schema, *pf_driver, *vf_driver;
+
+	/* We always take ownership of the schemas. */
+	pf_driver = *pf;
+	*pf = NULL;
+	vf_driver = *vf;
+	*vf = NULL;
+
+	schema = pci_iov_schema_alloc_node();
+	if (schema == NULL)
+		goto cleanup;
+
+	pci_iov_build_pf_schema(schema, &pf_driver);
+	pci_iov_build_vf_schema(schema, &vf_driver);
+
+	if (nvlist_error(schema) != 0)
+		goto cleanup;
+
+	return (schema);
+
+cleanup:
+	nvlist_destroy(schema);
+	nvlist_destroy(pf_driver);
+	nvlist_destroy(vf_driver);
+	return (NULL);
+}
+
+static void
+pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema)
+{
+	nvlist_t *pf_schema, *iov_schema;
+
+	pf_schema = pci_iov_schema_alloc_node();
+	if (pf_schema == NULL) {
+		nvlist_set_error(schema, ENOMEM);
+		return;
+	}
+
+	iov_schema = pci_iov_get_pf_subsystem_schema();
+
+	/*
+	 * Note that if either *driver_schema or iov_schema is NULL, then
+	 * nvlist_move_nvlist will put the schema in the error state and
+	 * SR-IOV will fail to initialize later, so we don't have to explicitly
+	 * handle that case.
+	 */
+	nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema);
+	nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema);
+	nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema);
+	*driver_schema = NULL;
+}
+
+static void
+pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema)
+{
+	nvlist_t *vf_schema, *iov_schema;
+
+	vf_schema = pci_iov_schema_alloc_node();
+	if (vf_schema == NULL) {
+		nvlist_set_error(schema, ENOMEM);
+		return;
+	}
+
+	iov_schema = pci_iov_get_vf_subsystem_schema();
+
+	/*
+	 * Note that if either *driver_schema or iov_schema is NULL, then
+	 * nvlist_move_nvlist will put the schema in the error state and
+	 * SR-IOV will fail to initialize later, so we don't have to explicitly
+	 * handle that case.
+	 */
+	nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema);
+	nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema);
+	nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema);
+	*driver_schema = NULL;
+}
+
+static nvlist_t *
+pci_iov_get_pf_subsystem_schema(void)
+{
+	nvlist_t *pf;
+
+	pf = pci_iov_schema_alloc_node();
+	if (pf == NULL)
+		return (NULL);
+
+	pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1);
+	pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL);
+
+	return (pf);
+}
+
+static nvlist_t *
+pci_iov_get_vf_subsystem_schema(void)
+{
+	nvlist_t *vf;
+
+	vf = pci_iov_schema_alloc_node();
+	if (vf == NULL)
+		return (NULL);
+
+	pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0);
+
+	return (vf);
+}
+
+static int
+pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
+{
+	struct resource *res;
+	struct pcicfg_iov *iov;
+	device_t dev, bus;
+	u_long start, end;
+	pci_addr_t bar_size;
+	int rid;
+
+	iov = dinfo->cfg.iov;
+	dev = dinfo->cfg.dev;
+	bus = device_get_parent(dev);
+	rid = iov->iov_pos + PCIR_SRIOV_BAR(bar);
+	bar_size = 1 << bar_shift;
+
+	res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0ul,
+	    ~0ul, 1, iov->iov_num_vfs, RF_ACTIVE);
+
+	if (res == NULL)
+		return (ENXIO);
+
+	iov->iov_bar[bar].res = res;
+	iov->iov_bar[bar].bar_size = bar_size;
+	iov->iov_bar[bar].bar_shift = bar_shift;
+
+	start = rman_get_start(res);
+	end = rman_get_end(res);
+	return (rman_manage_region(&iov->rman, start, end));
+}
+
+static void
+pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
+{
+	struct pci_iov_bar *bar;
+	uint64_t bar_start;
+	int i;
+
+	for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+		bar = &iov->iov_bar[i];
+		if (bar->res != NULL) {
+			bar_start = rman_get_start(bar->res) +
+			    dinfo->cfg.vf.index * bar->bar_size;
+
+			pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start,
+			    bar->bar_shift);
+		}
+	}
+}
+
+static int
+pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg,
+    nvlist_t **ret)
+{
+	void *packed_config;
+	nvlist_t *config;
+	int error;
+
+	config = NULL;
+	packed_config = NULL;
+
+	if (arg->len > pci_iov_max_config) {
+		error = EMSGSIZE;
+		goto out;
+	}
+
+	packed_config = malloc(arg->len, M_SRIOV, M_WAITOK);
+
+	error = copyin(arg->config, packed_config, arg->len);
+	if (error != 0)
+		goto out;
+
+	config = nvlist_unpack(packed_config, arg->len);
+	if (config == NULL) {
+		error = EINVAL;
+		goto out;
+	}
+
+	error = pci_iov_schema_validate_config(iov->iov_schema, config);
+	if (error != 0)
+		goto out;
+
+	error = nvlist_error(config);
+	if (error != 0)
+		goto out;
+
+	*ret = config;
+	config = NULL;
+
+out:
+	nvlist_destroy(config);
+	free(packed_config, M_SRIOV);
+	return (error);
+}
+
 /*
  * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
  * capability.  This bit is only writeable on the lowest-numbered PF but
@@ -235,10 +478,79 @@ pci_iov_config_page_size(struct pci_devi
 	return (0);
 }
 
+static int
+pci_init_iov(device_t dev, uint16_t num_vfs, const nvlist_t *config)
+{
+	const nvlist_t *device, *driver_config;
+
+	device = nvlist_get_nvlist(config, PF_CONFIG_NAME);
+	driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
+	return (PCI_INIT_IOV(dev, num_vfs, driver_config));
+}
+
+static int
+pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
+{
+	int error;
+
+	iov->rman.rm_start = 0;
+	iov->rman.rm_end = ~0ul;
+	iov->rman.rm_type = RMAN_ARRAY;
+	snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory",
+	    device_get_nameunit(pf));
+	iov->rman.rm_descr = iov->rman_name;
+
+	error = rman_init(&iov->rman);
+	if (error != 0)
+		return (error);
+
+	iov->iov_flags |= IOV_RMAN_INITED;
+	return (0);
+}
+
+static int
+pci_iov_setup_bars(struct pci_devinfo *dinfo)
+{
+	device_t dev;
+	struct pcicfg_iov *iov;
+	pci_addr_t bar_value, testval;
+	int i, last_64, error;
+
+	iov = dinfo->cfg.iov;
+	dev = dinfo->cfg.dev;
+	last_64 = 0;
+
+	for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+		/*
+		 * If a PCI BAR is a 64-bit wide BAR, then it spans two
+		 * consecutive registers.  Therefore if the last BAR that
+		 * we looked at was a 64-bit BAR, we need to skip this
+		 * register as it's the second half of the last BAR.
+		 */
+		if (!last_64) {
+			pci_read_bar(dev,
+			    iov->iov_pos + PCIR_SRIOV_BAR(i),
+			    &bar_value, &testval, &last_64);
+
+			if (testval != 0) {
+				error = pci_iov_alloc_bar(dinfo, i,
+				   pci_mapsize(testval));
+				if (error != 0)
+					return (error);
+			}
+		} else
+			last_64 = 0;
+	}
+
+	return (0);
+}
+
 static void
-pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
+pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config,
     uint16_t first_rid, uint16_t rid_stride)
 {
+	char device_name[VF_MAX_NAME];
+	const nvlist_t *device, *driver_config, *iov_config;
 	device_t bus, dev, vf;
 	struct pcicfg_iov *iov;
 	struct pci_devinfo *vfinfo;
@@ -255,18 +567,31 @@ pci_iov_enumerate_vfs(struct pci_devinfo
 	did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
 
 	for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
-
+		snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i);
+		device = nvlist_get_nvlist(config, device_name);
+		iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME);
+		driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
 
 		vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
 		if (vf == NULL)
 			break;
 
+		/*
+		 * If we are creating passthrough devices then force the ppt
+		 * driver to attach to prevent a VF driver from claiming the
+		 * VFs.
+		 */
+		if (nvlist_get_bool(iov_config, "passthrough"))
+			device_set_devclass(vf, "ppt");
+
 		vfinfo = device_get_ivars(vf);
 
 		vfinfo->cfg.iov = iov;
 		vfinfo->cfg.vf.index = i;
 
-		error = PCI_ADD_VF(dev, i);
+		pci_iov_add_bars(iov, vfinfo);
+
+		error = PCI_ADD_VF(dev, i, driver_config);
 		if (error != 0) {
 			device_printf(dev, "Failed to add VF %d\n", i);
 			pci_delete_child(bus, vf);
@@ -280,14 +605,14 @@ static int
 pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
 {
 	device_t bus, dev;
-	const char *driver;
 	struct pci_devinfo *dinfo;
 	struct pcicfg_iov *iov;
-	int error;
+	nvlist_t *config;
+	int i, error;
 	uint16_t rid_off, rid_stride;
 	uint16_t first_rid, last_rid;
 	uint16_t iov_ctl;
-	uint16_t total_vfs;
+	uint16_t num_vfs, total_vfs;
 	int iov_inited;
 
 	mtx_lock(&Giant);
@@ -296,28 +621,25 @@ pci_iov_config(struct cdev *cdev, struct
 	dev = dinfo->cfg.dev;
 	bus = device_get_parent(dev);
 	iov_inited = 0;
+	config = NULL;
 
-	if (iov->iov_num_vfs != 0) {
+	if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
 		mtx_unlock(&Giant);
 		return (EBUSY);
 	}
+	iov->iov_flags |= IOV_BUSY;
 
-	total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
+	error = pci_iov_parse_config(iov, arg, &config);
+	if (error != 0)
+		goto out;
 
-	if (arg->num_vfs > total_vfs) {
+	num_vfs = pci_iov_config_get_num_vfs(config);
+	total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
+	if (num_vfs > total_vfs) {
 		error = EINVAL;
 		goto out;
 	}
 
-	/*
-	 * If we are creating passthrough devices then force the ppt driver to
-	 * attach to prevent a VF driver from claming the VFs.
-	 */
-	if (arg->passthrough)
-		driver = "ppt";
-	else
-		driver = NULL;
-
 	error = pci_iov_config_page_size(dinfo);
 	if (error != 0)
 		goto out;
@@ -326,19 +648,18 @@ pci_iov_config(struct cdev *cdev, struct
 	if (error != 0)
 		goto out;
 
-	error = PCI_INIT_IOV(dev, arg->num_vfs);
-
+	error = pci_init_iov(dev, num_vfs, config);
 	if (error != 0)
 		goto out;
-
 	iov_inited = 1;
-	IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2);
+
+	IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2);
 
 	rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
 	rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
 
 	first_rid = pci_get_rid(dev) + rid_off;
-	last_rid = first_rid + (arg->num_vfs - 1) * rid_stride;
+	last_rid = first_rid + (num_vfs - 1) * rid_stride;
 
 	/* We don't yet support allocating extra bus numbers for VFs. */
 	if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
@@ -350,26 +671,202 @@ pci_iov_config(struct cdev *cdev, struct
 	iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
 	IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
 
-	iov->iov_num_vfs = arg->num_vfs;
+	error = pci_iov_init_rman(dev, iov);
+	if (error != 0)
+		goto out;
+
+	iov->iov_num_vfs = num_vfs;
+
+	error = pci_iov_setup_bars(dinfo);
+	if (error != 0)
+		goto out;
 
 	iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
-	iov_ctl |= PCIM_SRIOV_VF_EN;
+	iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE;
 	IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
 
 	/* Per specification, we must wait 100ms before accessing VFs. */
 	pause("iov", roundup(hz, 10));
-	pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride);
+	pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride);
+
+	nvlist_destroy(config);
+	iov->iov_flags &= ~IOV_BUSY;
 	mtx_unlock(&Giant);
 
 	return (0);
 out:
 	if (iov_inited)
 		PCI_UNINIT_IOV(dev);
+
+	for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+		if (iov->iov_bar[i].res != NULL) {
+			pci_release_resource(bus, dev, SYS_RES_MEMORY,
+			    iov->iov_pos + PCIR_SRIOV_BAR(i),
+			    iov->iov_bar[i].res);
+			pci_delete_resource(bus, dev, SYS_RES_MEMORY,
+			    iov->iov_pos + PCIR_SRIOV_BAR(i));
+			iov->iov_bar[i].res = NULL;
+		}
+	}
+
+	if (iov->iov_flags & IOV_RMAN_INITED) {
+		rman_fini(&iov->rman);
+		iov->iov_flags &= ~IOV_RMAN_INITED;
+	}
+
+	nvlist_destroy(config);
 	iov->iov_num_vfs = 0;
+	iov->iov_flags &= ~IOV_BUSY;
 	mtx_unlock(&Giant);
 	return (error);
 }
 
+/* Return true if child is a VF of the given PF. */
+static int
+pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child)
+{
+	struct pci_devinfo *vfinfo;
+
+	vfinfo = device_get_ivars(child);
+
+	if (!(vfinfo->cfg.flags & PCICFG_VF))
+		return (0);
+
+	return (pf == vfinfo->cfg.iov);
+}
+
+static int
+pci_iov_delete(struct cdev *cdev)
+{
+	device_t bus, dev, vf, *devlist;
+	struct pci_devinfo *dinfo;
+	struct pcicfg_iov *iov;
+	int i, error, devcount;
+	uint32_t iov_ctl;
+
+	mtx_lock(&Giant);
+	dinfo = cdev->si_drv1;
+	iov = dinfo->cfg.iov;
+	dev = dinfo->cfg.dev;
+	bus = device_get_parent(dev);
+	devlist = NULL;
+
+	if (iov->iov_flags & IOV_BUSY) {
+		mtx_unlock(&Giant);
+		return (EBUSY);
+	}
+
+	if (iov->iov_num_vfs == 0) {
+		mtx_unlock(&Giant);
+		return (ECHILD);
+	}
+
+	iov->iov_flags |= IOV_BUSY;
+
+	error = device_get_children(bus, &devlist, &devcount);
+
+	if (error != 0)
+		goto out;
+
+	for (i = 0; i < devcount; i++) {
+		vf = devlist[i];
+
+		if (!pci_iov_is_child_vf(iov, vf))
+			continue;
+
+		error = device_detach(vf);
+		if (error != 0) {
+			device_printf(dev,
+			   "Could not disable SR-IOV: failed to detach VF %s\n",
+			    device_get_nameunit(vf));
+			goto out;
+		}
+	}
+
+	for (i = 0; i < devcount; i++) {
+		vf = devlist[i];
+
+		if (pci_iov_is_child_vf(iov, vf))
+			pci_delete_child(bus, vf);
+	}
+	PCI_UNINIT_IOV(dev);
+
+	iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
+	iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
+	IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
+	IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2);
+
+	iov->iov_num_vfs = 0;
+
+	for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+		if (iov->iov_bar[i].res != NULL) {
+			pci_release_resource(bus, dev, SYS_RES_MEMORY,
+			    iov->iov_pos + PCIR_SRIOV_BAR(i),
+			    iov->iov_bar[i].res);
+			pci_delete_resource(bus, dev, SYS_RES_MEMORY,
+			    iov->iov_pos + PCIR_SRIOV_BAR(i));
+			iov->iov_bar[i].res = NULL;
+		}
+	}
+
+	if (iov->iov_flags & IOV_RMAN_INITED) {
+		rman_fini(&iov->rman);
+		iov->iov_flags &= ~IOV_RMAN_INITED;
+	}
+
+	error = 0;
+out:
+	free(devlist, M_TEMP);
+	iov->iov_flags &= ~IOV_BUSY;
+	mtx_unlock(&Giant);
+	return (error);
+}
+
+static int
+pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output)
+{
+	struct pci_devinfo *dinfo;
+	void *packed;
+	size_t output_len, size;
+	int error;
+
+	packed = NULL;
+
+	mtx_lock(&Giant);
+	dinfo = cdev->si_drv1;
+	packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size);
+	mtx_unlock(&Giant);
+
+	if (packed == NULL) {
+		error = ENOMEM;
+		goto fail;
+	}
+
+	output_len = output->len;
+	output->len = size;
+	if (size <= output_len) {
+		error = copyout(packed, output->schema, size);
+
+		if (error != 0)
+			goto fail;
+
+		output->error = 0;
+	} else
+		/*
+		 * If we return an error then the ioctl code won't copyout
+		 * output back to userland, so we flag the error in the struct
+		 * instead.
+		 */
+		output->error = EMSGSIZE;
+
+	error = 0;
+
+fail:
+	free(packed, M_NVLIST);
+
+	return (error);
+}
+
 static int
 pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
     struct thread *td)
@@ -378,8 +875,102 @@ pci_iov_ioctl(struct cdev *dev, u_long c
 	switch (cmd) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-user mailing list