svn commit: r293072 - in user/ngie/stable-10-libnv/sys: amd64/conf conf dev/acpica dev/pci i386/conf sys
Garrett Cooper
ngie at FreeBSD.org
Sun Jan 3 05:39:21 UTC 2016
Author: ngie
Date: Sun Jan 3 05:39:19 2016
New Revision: 293072
URL: https://svnweb.freebsd.org/changeset/base/293072
Log:
MFC r279447,r279449,r279450,r279451,r279452,r279453:
r279447 (by rstone):
Implement interface to create SR-IOV Virtual Functions
Implement the interace to create SR-IOV Virtual Functions (VFs).
When a driver registers that they support SR-IOV by calling
pci_setup_iov(), the SR-IOV code creates a new node in /dev/iov
for that device. An ioctl can be invoked on that device to
create VFs and have the driver initialize them.
At this point, allocating memory I/O windows (BARs) is not
supported.
r279449 (by rstone):
Allocate PCI I/O memory spaces for VFs
When creating VFs, we must size each SR-IOV BAR on the PF and
allocate a configuous I/O memory window large enough for every VF.
However, the window only needs to be aligned to a boundary equal
to the size of the window for a single VF.
When a VF attempts to allocate an I/O memory resource, we must
intercept the request in the pci driver and pass it off to the
SR-IOV code, which will allocate the correct window from the
pre-allocated memory space for the PF.
Inform the pci driver about the size and address of the BARs on
the VF when the VF is created. This is required by pciconf -b and
bhyve.
r279450 (by rstone):
Add interface to destroy SR-IOV VFs
r279451 (by rstone):
Add infrastructure for exporting config schema from PF drivers
r279452 (by rstone):
Add function to validate the consistency of SR-IOV config
Add a function that validates that the user-provided SR-IOV
configuration is valid. This includes basic checks that the
structure of the configuration is correct (e.g. all required
configuration nodes are present) as well as validating against
a configuration schema.
The schema validation consists of:
- Ensuring that all required config parameters are present.
- If the schema defines a default value for a parameter,
adding the default value if the parameter is not set.
- Ensuring that no parameters are specified in the config
that are not defined in the schema.
- Ensuring that have the correct type defined in the schema.
- Ensuring that no configuration nodes are present for devices
that do not exist. For example, if 2 VFs are configured,
then we validate that a node called VF-5 does not exist.
r279453 (by rstone):
Pass SR-IOV configuration to kernel using an nvlist
Pass all SR-IOV configuration to the kernel using an nvlist. The
main benefit that this offers is flexibility. It allows a driver
to accept any number of parameters of any type supported by the
SR-IOV configuration infrastructure with having to make any
changes outside of the driver.
It also offers the user very fine-grained control over the
configuration of the VFs -- if they want, they can have different
configuration applied to every VF.
Added:
user/ngie/stable-10-libnv/sys/dev/pci/pci_iov.c
- copied, changed from r279447, head/sys/dev/pci/pci_iov.c
user/ngie/stable-10-libnv/sys/dev/pci/pci_iov_private.h
- copied, changed from r279447, head/sys/dev/pci/pci_iov_private.h
user/ngie/stable-10-libnv/sys/dev/pci/pci_iov_schema.c
- copied, changed from r279451, head/sys/dev/pci/pci_iov_schema.c
user/ngie/stable-10-libnv/sys/dev/pci/schema_private.h
- copied unchanged from r279451, head/sys/dev/pci/schema_private.h
user/ngie/stable-10-libnv/sys/sys/iov.h
- copied, changed from r279447, head/sys/sys/iov.h
user/ngie/stable-10-libnv/sys/sys/iov_schema.h
- copied unchanged from r279451, head/sys/sys/iov_schema.h
Modified:
user/ngie/stable-10-libnv/sys/amd64/conf/GENERIC
user/ngie/stable-10-libnv/sys/conf/files
user/ngie/stable-10-libnv/sys/conf/options
user/ngie/stable-10-libnv/sys/dev/acpica/acpi_pci.c
user/ngie/stable-10-libnv/sys/dev/pci/pci.c
user/ngie/stable-10-libnv/sys/dev/pci/pci_if.m
user/ngie/stable-10-libnv/sys/dev/pci/pci_private.h
user/ngie/stable-10-libnv/sys/dev/pci/pcireg.h
user/ngie/stable-10-libnv/sys/dev/pci/pcivar.h
user/ngie/stable-10-libnv/sys/i386/conf/GENERIC
Directory Properties:
user/ngie/stable-10-libnv/ (props changed)
Modified: user/ngie/stable-10-libnv/sys/amd64/conf/GENERIC
==============================================================================
--- user/ngie/stable-10-libnv/sys/amd64/conf/GENERIC Sun Jan 3 04:54:10 2016 (r293071)
+++ user/ngie/stable-10-libnv/sys/amd64/conf/GENERIC Sun Jan 3 05:39:19 2016 (r293072)
@@ -90,6 +90,7 @@ device cpufreq
device acpi
options ACPI_DMAR
device pci
+options PCI_IOV # PCI SR-IOV support
# Floppy drives
device fdc
Modified: user/ngie/stable-10-libnv/sys/conf/files
==============================================================================
--- user/ngie/stable-10-libnv/sys/conf/files Sun Jan 3 04:54:10 2016 (r293071)
+++ user/ngie/stable-10-libnv/sys/conf/files Sun Jan 3 05:39:19 2016 (r293072)
@@ -2003,6 +2003,8 @@ dev/pci/ignore_pci.c optional pci
dev/pci/isa_pci.c optional pci isa
dev/pci/pci.c optional pci
dev/pci/pci_if.m standard
+dev/pci/pci_iov.c optional pci pci_iov
+dev/pci/pci_iov_schema.c optional pci pci_iov
dev/pci/pci_pci.c optional pci
dev/pci/pci_subr.c optional pci
dev/pci/pci_user.c optional pci
Modified: user/ngie/stable-10-libnv/sys/conf/options
==============================================================================
--- user/ngie/stable-10-libnv/sys/conf/options Sun Jan 3 04:54:10 2016 (r293071)
+++ user/ngie/stable-10-libnv/sys/conf/options Sun Jan 3 05:39:19 2016 (r293072)
@@ -166,6 +166,7 @@ NO_SYSCTL_DESCR opt_global.h
NSWBUF_MIN opt_swap.h
MBUF_PACKET_ZONE_DISABLE opt_global.h
PANIC_REBOOT_WAIT_TIME opt_panic.h
+PCI_IOV opt_global.h
PPC_DEBUG opt_ppc.h
PPC_PROBE_CHIPSET opt_ppc.h
PPS_SYNC opt_ntp.h
Modified: user/ngie/stable-10-libnv/sys/dev/acpica/acpi_pci.c
==============================================================================
--- user/ngie/stable-10-libnv/sys/dev/acpica/acpi_pci.c Sun Jan 3 04:54:10 2016 (r293071)
+++ user/ngie/stable-10-libnv/sys/dev/acpica/acpi_pci.c Sun Jan 3 05:39:19 2016 (r293072)
@@ -84,6 +84,11 @@ static int acpi_pci_set_powerstate_metho
static void acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child);
static bus_dma_tag_t acpi_pci_get_dma_tag(device_t bus, device_t child);
+#ifdef PCI_IOV
+static device_t acpi_pci_create_iov_child(device_t bus, device_t pf,
+ uint16_t rid, uint16_t vid, uint16_t did);
+#endif
+
static device_method_t acpi_pci_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, acpi_pci_probe),
@@ -98,6 +103,9 @@ static device_method_t acpi_pci_methods[
/* PCI interface */
DEVMETHOD(pci_set_powerstate, acpi_pci_set_powerstate_method),
+#ifdef PCI_IOV
+ DEVMETHOD(pci_create_iov_child, acpi_pci_create_iov_child),
+#endif
DEVMETHOD_END
};
@@ -345,3 +353,23 @@ acpi_pci_get_dma_tag(device_t bus, devic
return (pci_get_dma_tag(bus, child));
}
#endif
+
+#ifdef PCI_IOV
+static device_t
+acpi_pci_create_iov_child(device_t bus, device_t pf, uint16_t rid, uint16_t vid,
+ uint16_t did)
+{
+ struct acpi_pci_devinfo *dinfo;
+ device_t vf;
+
+ vf = pci_add_iov_child(bus, pf, sizeof(struct acpi_pci_devinfo), rid,
+ vid, did);
+ if (vf == NULL)
+ return (NULL);
+
+ dinfo = device_get_ivars(vf);
+ dinfo->ap_handle = NULL;
+ return (vf);
+}
+#endif
+
Modified: user/ngie/stable-10-libnv/sys/dev/pci/pci.c
==============================================================================
--- user/ngie/stable-10-libnv/sys/dev/pci/pci.c Sun Jan 3 04:54:10 2016 (r293071)
+++ user/ngie/stable-10-libnv/sys/dev/pci/pci.c Sun Jan 3 05:39:19 2016 (r293072)
@@ -186,6 +186,11 @@ static device_method_t pci_methods[] = {
DEVMETHOD(pci_msix_count, pci_msix_count_method),
DEVMETHOD(pci_get_rid, pci_get_rid_method),
DEVMETHOD(pci_child_added, pci_child_added_method),
+#ifdef PCI_IOV
+ DEVMETHOD(pci_iov_attach, pci_iov_attach_method),
+ DEVMETHOD(pci_iov_detach, pci_iov_detach_method),
+ DEVMETHOD(pci_create_iov_child, pci_create_iov_child_method),
+#endif
DEVMETHOD_END
};
@@ -653,6 +658,9 @@ pci_fill_devinfo(device_t pcib, int d, i
cfg->hdrtype &= ~PCIM_MFDEV;
STAILQ_INIT(&cfg->maps);
+ cfg->devinfo_size = size;
+ cfg->iov = NULL;
+
pci_fixancient(cfg);
pci_hdrtypedata(pcib, b, s, f, cfg);
@@ -3611,6 +3619,51 @@ pci_add_children(device_t dev, int domai
#undef REG
}
+#ifdef PCI_IOV
+device_t
+pci_add_iov_child(device_t bus, device_t pf, size_t size, uint16_t rid,
+ uint16_t vid, uint16_t did)
+{
+ struct pci_devinfo *pf_dinfo, *vf_dinfo;
+ device_t pcib;
+ int busno, slot, func;
+
+ pf_dinfo = device_get_ivars(pf);
+
+ /*
+ * Do a sanity check that we have been passed the correct size. If this
+ * test fails then likely the pci subclass hasn't implemented the
+ * pci_create_iov_child method like it's supposed it.
+ */
+ if (size != pf_dinfo->cfg.devinfo_size) {
+ device_printf(pf,
+ "PCI subclass does not properly implement PCI_IOV\n");
+ return (NULL);
+ }
+
+ pcib = device_get_parent(bus);
+
+ PCIB_DECODE_RID(pcib, rid, &busno, &slot, &func);
+
+ vf_dinfo = pci_fill_devinfo(pcib, pci_get_domain(pcib), busno, slot, func,
+ vid, did, size);
+
+ vf_dinfo->cfg.flags |= PCICFG_VF;
+ pci_add_child(bus, vf_dinfo);
+
+ return (vf_dinfo->cfg.dev);
+}
+
+device_t
+pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid,
+ uint16_t vid, uint16_t did)
+{
+
+ return (pci_add_iov_child(bus, pf, sizeof(struct pci_devinfo), rid, vid,
+ did));
+}
+#endif
+
void
pci_add_child(device_t bus, struct pci_devinfo *dinfo)
{
@@ -4722,11 +4775,30 @@ struct resource *
pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
u_long start, u_long end, u_long count, u_int flags)
{
+#ifdef PCI_IOV
+ struct pci_devinfo *dinfo;
+#endif
if (device_get_parent(child) != dev)
return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
type, rid, start, end, count, flags));
+#ifdef PCI_IOV
+ dinfo = device_get_ivars(child);
+ if (dinfo->cfg.flags & PCICFG_VF) {
+ switch (type) {
+ /* VFs can't have I/O BARs. */
+ case SYS_RES_IOPORT:
+ return (NULL);
+ case SYS_RES_MEMORY:
+ return (pci_vf_alloc_mem_resource(dev, child, rid,
+ start, end, count, flags));
+ }
+
+ /* Fall through for other types of resource allocations. */
+ }
+#endif
+
return (pci_alloc_multi_resource(dev, child, type, rid, start, end,
count, 1, flags));
}
@@ -4745,6 +4817,22 @@ pci_release_resource(device_t dev, devic
dinfo = device_get_ivars(child);
cfg = &dinfo->cfg;
+
+#ifdef PCI_IOV
+ if (dinfo->cfg.flags & PCICFG_VF) {
+ switch (type) {
+ /* VFs can't have I/O BARs. */
+ case SYS_RES_IOPORT:
+ return (EDOOFUS);
+ case SYS_RES_MEMORY:
+ return (pci_vf_release_mem_resource(dev, child, rid,
+ r));
+ }
+
+ /* Fall through for other types of resource allocations. */
+ }
+#endif
+
#ifdef NEW_PCIB
/*
* PCI-PCI bridge I/O window resources are not BARs. For
Modified: user/ngie/stable-10-libnv/sys/dev/pci/pci_if.m
==============================================================================
--- user/ngie/stable-10-libnv/sys/dev/pci/pci_if.m Sun Jan 3 04:54:10 2016 (r293071)
+++ user/ngie/stable-10-libnv/sys/dev/pci/pci_if.m Sun Jan 3 05:39:19 2016 (r293072)
@@ -36,8 +36,20 @@ CODE {
{
return (0);
}
+
+ static device_t
+ null_create_iov_child(device_t bus, device_t pf, uint16_t rid,
+ uint16_t vid, uint16_t did)
+ {
+ device_printf(bus, "PCI_IOV not implemented on this bus.\n");
+ return (NULL);
+ }
};
+HEADER {
+ struct nvlist;
+}
+
METHOD u_int32_t read_config {
device_t dev;
@@ -189,3 +201,40 @@ METHOD void child_added {
device_t dev;
device_t child;
};
+
+METHOD int iov_attach {
+ device_t dev;
+ device_t child;
+ struct nvlist *pf_schema;
+ struct nvlist *vf_schema;
+};
+
+METHOD int iov_detach {
+ device_t dev;
+ device_t child;
+};
+
+METHOD int init_iov {
+ device_t dev;
+ uint16_t num_vfs;
+ const struct nvlist *config;
+};
+
+METHOD void uninit_iov {
+ device_t dev;
+};
+
+METHOD int add_vf {
+ device_t dev;
+ uint16_t vfnum;
+ const struct nvlist *config;
+};
+
+METHOD device_t create_iov_child {
+ device_t bus;
+ device_t pf;
+ uint16_t rid;
+ uint16_t vid;
+ uint16_t did;
+} DEFAULT null_create_iov_child;
+
Copied and modified: user/ngie/stable-10-libnv/sys/dev/pci/pci_iov.c (from r279447, head/sys/dev/pci/pci_iov.c)
==============================================================================
--- head/sys/dev/pci/pci_iov.c Sun Mar 1 00:40:09 2015 (r279447, copy source)
+++ user/ngie/stable-10-libnv/sys/dev/pci/pci_iov.c Sun Jan 3 05:39:19 2016 (r293072)
@@ -46,11 +46,16 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <machine/bus.h>
+#include <machine/stdarg.h>
+
+#include <sys/nv.h>
+#include <sys/iov_schema.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pci_private.h>
#include <dev/pci/pci_iov_private.h>
+#include <dev/pci/schema_private.h>
#include "pci_if.h"
#include "pcib_if.h"
@@ -65,24 +70,48 @@ static struct cdevsw iov_cdevsw = {
.d_ioctl = pci_iov_ioctl
};
+SYSCTL_DECL(_hw_pci);
+
+/*
+ * The maximum amount of memory we will allocate for user configuration of an
+ * SR-IOV device. 1MB ought to be enough for anyone, but leave this
+ * configurable just in case.
+ */
+static u_long pci_iov_max_config = 1024 * 1024;
+SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN,
+ &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.");
+
+
#define IOV_READ(d, r, w) \
pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
#define IOV_WRITE(d, r, v, w) \
pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
+static nvlist_t *pci_iov_build_schema(nvlist_t **pf_schema,
+ nvlist_t **vf_schema);
+static void pci_iov_build_pf_schema(nvlist_t *schema,
+ nvlist_t **driver_schema);
+static void pci_iov_build_vf_schema(nvlist_t *schema,
+ nvlist_t **driver_schema);
+static nvlist_t *pci_iov_get_pf_subsystem_schema(void);
+static nvlist_t *pci_iov_get_vf_subsystem_schema(void);
+
int
-pci_iov_attach_method(device_t bus, device_t dev)
+pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema,
+ nvlist_t *vf_schema)
{
device_t pcib;
struct pci_devinfo *dinfo;
struct pcicfg_iov *iov;
+ nvlist_t *schema;
uint32_t version;
int error;
int iov_pos;
dinfo = device_get_ivars(dev);
pcib = device_get_parent(bus);
+ schema = NULL;
error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
@@ -106,9 +135,15 @@ pci_iov_attach_method(device_t bus, devi
error = EBUSY;
goto cleanup;
}
-
iov->iov_pos = iov_pos;
+ schema = pci_iov_build_schema(&pf_schema, &vf_schema);
+ if (schema == NULL) {
+ error = ENOMEM;
+ goto cleanup;
+ }
+ iov->iov_schema = schema;
+
iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev));
@@ -124,6 +159,9 @@ pci_iov_attach_method(device_t bus, devi
return (0);
cleanup:
+ nvlist_destroy(schema);
+ nvlist_destroy(pf_schema);
+ nvlist_destroy(vf_schema);
free(iov, M_SRIOV);
mtx_unlock(&Giant);
return (error);
@@ -144,7 +182,7 @@ pci_iov_detach_method(device_t bus, devi
return (0);
}
- if (iov->iov_num_vfs != 0) {
+ if (iov->iov_num_vfs != 0 || iov->iov_flags & IOV_BUSY) {
mtx_unlock(&Giant);
return (EBUSY);
}
@@ -155,6 +193,7 @@ pci_iov_detach_method(device_t bus, devi
destroy_dev(iov->iov_cdev);
iov->iov_cdev = NULL;
}
+ nvlist_destroy(iov->iov_schema);
free(iov, M_SRIOV);
mtx_unlock(&Giant);
@@ -162,6 +201,210 @@ pci_iov_detach_method(device_t bus, devi
return (0);
}
+static nvlist_t *
+pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf)
+{
+ nvlist_t *schema, *pf_driver, *vf_driver;
+
+ /* We always take ownership of the schemas. */
+ pf_driver = *pf;
+ *pf = NULL;
+ vf_driver = *vf;
+ *vf = NULL;
+
+ schema = pci_iov_schema_alloc_node();
+ if (schema == NULL)
+ goto cleanup;
+
+ pci_iov_build_pf_schema(schema, &pf_driver);
+ pci_iov_build_vf_schema(schema, &vf_driver);
+
+ if (nvlist_error(schema) != 0)
+ goto cleanup;
+
+ return (schema);
+
+cleanup:
+ nvlist_destroy(schema);
+ nvlist_destroy(pf_driver);
+ nvlist_destroy(vf_driver);
+ return (NULL);
+}
+
+static void
+pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema)
+{
+ nvlist_t *pf_schema, *iov_schema;
+
+ pf_schema = pci_iov_schema_alloc_node();
+ if (pf_schema == NULL) {
+ nvlist_set_error(schema, ENOMEM);
+ return;
+ }
+
+ iov_schema = pci_iov_get_pf_subsystem_schema();
+
+ /*
+ * Note that if either *driver_schema or iov_schema is NULL, then
+ * nvlist_move_nvlist will put the schema in the error state and
+ * SR-IOV will fail to initialize later, so we don't have to explicitly
+ * handle that case.
+ */
+ nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema);
+ nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema);
+ nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema);
+ *driver_schema = NULL;
+}
+
+static void
+pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema)
+{
+ nvlist_t *vf_schema, *iov_schema;
+
+ vf_schema = pci_iov_schema_alloc_node();
+ if (vf_schema == NULL) {
+ nvlist_set_error(schema, ENOMEM);
+ return;
+ }
+
+ iov_schema = pci_iov_get_vf_subsystem_schema();
+
+ /*
+ * Note that if either *driver_schema or iov_schema is NULL, then
+ * nvlist_move_nvlist will put the schema in the error state and
+ * SR-IOV will fail to initialize later, so we don't have to explicitly
+ * handle that case.
+ */
+ nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema);
+ nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema);
+ nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema);
+ *driver_schema = NULL;
+}
+
+static nvlist_t *
+pci_iov_get_pf_subsystem_schema(void)
+{
+ nvlist_t *pf;
+
+ pf = pci_iov_schema_alloc_node();
+ if (pf == NULL)
+ return (NULL);
+
+ pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1);
+ pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL);
+
+ return (pf);
+}
+
+static nvlist_t *
+pci_iov_get_vf_subsystem_schema(void)
+{
+ nvlist_t *vf;
+
+ vf = pci_iov_schema_alloc_node();
+ if (vf == NULL)
+ return (NULL);
+
+ pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0);
+
+ return (vf);
+}
+
+static int
+pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
+{
+ struct resource *res;
+ struct pcicfg_iov *iov;
+ device_t dev, bus;
+ u_long start, end;
+ pci_addr_t bar_size;
+ int rid;
+
+ iov = dinfo->cfg.iov;
+ dev = dinfo->cfg.dev;
+ bus = device_get_parent(dev);
+ rid = iov->iov_pos + PCIR_SRIOV_BAR(bar);
+ bar_size = 1 << bar_shift;
+
+ res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0ul,
+ ~0ul, 1, iov->iov_num_vfs, RF_ACTIVE);
+
+ if (res == NULL)
+ return (ENXIO);
+
+ iov->iov_bar[bar].res = res;
+ iov->iov_bar[bar].bar_size = bar_size;
+ iov->iov_bar[bar].bar_shift = bar_shift;
+
+ start = rman_get_start(res);
+ end = rman_get_end(res);
+ return (rman_manage_region(&iov->rman, start, end));
+}
+
+static void
+pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
+{
+ struct pci_iov_bar *bar;
+ uint64_t bar_start;
+ int i;
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+ bar = &iov->iov_bar[i];
+ if (bar->res != NULL) {
+ bar_start = rman_get_start(bar->res) +
+ dinfo->cfg.vf.index * bar->bar_size;
+
+ pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start,
+ bar->bar_shift);
+ }
+ }
+}
+
+static int
+pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg,
+ nvlist_t **ret)
+{
+ void *packed_config;
+ nvlist_t *config;
+ int error;
+
+ config = NULL;
+ packed_config = NULL;
+
+ if (arg->len > pci_iov_max_config) {
+ error = EMSGSIZE;
+ goto out;
+ }
+
+ packed_config = malloc(arg->len, M_SRIOV, M_WAITOK);
+
+ error = copyin(arg->config, packed_config, arg->len);
+ if (error != 0)
+ goto out;
+
+ config = nvlist_unpack(packed_config, arg->len);
+ if (config == NULL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = pci_iov_schema_validate_config(iov->iov_schema, config);
+ if (error != 0)
+ goto out;
+
+ error = nvlist_error(config);
+ if (error != 0)
+ goto out;
+
+ *ret = config;
+ config = NULL;
+
+out:
+ nvlist_destroy(config);
+ free(packed_config, M_SRIOV);
+ return (error);
+}
+
/*
* Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
* capability. This bit is only writeable on the lowest-numbered PF but
@@ -235,10 +478,79 @@ pci_iov_config_page_size(struct pci_devi
return (0);
}
+static int
+pci_init_iov(device_t dev, uint16_t num_vfs, const nvlist_t *config)
+{
+ const nvlist_t *device, *driver_config;
+
+ device = nvlist_get_nvlist(config, PF_CONFIG_NAME);
+ driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
+ return (PCI_INIT_IOV(dev, num_vfs, driver_config));
+}
+
+static int
+pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
+{
+ int error;
+
+ iov->rman.rm_start = 0;
+ iov->rman.rm_end = ~0ul;
+ iov->rman.rm_type = RMAN_ARRAY;
+ snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory",
+ device_get_nameunit(pf));
+ iov->rman.rm_descr = iov->rman_name;
+
+ error = rman_init(&iov->rman);
+ if (error != 0)
+ return (error);
+
+ iov->iov_flags |= IOV_RMAN_INITED;
+ return (0);
+}
+
+static int
+pci_iov_setup_bars(struct pci_devinfo *dinfo)
+{
+ device_t dev;
+ struct pcicfg_iov *iov;
+ pci_addr_t bar_value, testval;
+ int i, last_64, error;
+
+ iov = dinfo->cfg.iov;
+ dev = dinfo->cfg.dev;
+ last_64 = 0;
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+ /*
+ * If a PCI BAR is a 64-bit wide BAR, then it spans two
+ * consecutive registers. Therefore if the last BAR that
+ * we looked at was a 64-bit BAR, we need to skip this
+ * register as it's the second half of the last BAR.
+ */
+ if (!last_64) {
+ pci_read_bar(dev,
+ iov->iov_pos + PCIR_SRIOV_BAR(i),
+ &bar_value, &testval, &last_64);
+
+ if (testval != 0) {
+ error = pci_iov_alloc_bar(dinfo, i,
+ pci_mapsize(testval));
+ if (error != 0)
+ return (error);
+ }
+ } else
+ last_64 = 0;
+ }
+
+ return (0);
+}
+
static void
-pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
+pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config,
uint16_t first_rid, uint16_t rid_stride)
{
+ char device_name[VF_MAX_NAME];
+ const nvlist_t *device, *driver_config, *iov_config;
device_t bus, dev, vf;
struct pcicfg_iov *iov;
struct pci_devinfo *vfinfo;
@@ -255,18 +567,31 @@ pci_iov_enumerate_vfs(struct pci_devinfo
did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
-
+ snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i);
+ device = nvlist_get_nvlist(config, device_name);
+ iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME);
+ driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
if (vf == NULL)
break;
+ /*
+ * If we are creating passthrough devices then force the ppt
+ * driver to attach to prevent a VF driver from claiming the
+ * VFs.
+ */
+ if (nvlist_get_bool(iov_config, "passthrough"))
+ device_set_devclass(vf, "ppt");
+
vfinfo = device_get_ivars(vf);
vfinfo->cfg.iov = iov;
vfinfo->cfg.vf.index = i;
- error = PCI_ADD_VF(dev, i);
+ pci_iov_add_bars(iov, vfinfo);
+
+ error = PCI_ADD_VF(dev, i, driver_config);
if (error != 0) {
device_printf(dev, "Failed to add VF %d\n", i);
pci_delete_child(bus, vf);
@@ -280,14 +605,14 @@ static int
pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
{
device_t bus, dev;
- const char *driver;
struct pci_devinfo *dinfo;
struct pcicfg_iov *iov;
- int error;
+ nvlist_t *config;
+ int i, error;
uint16_t rid_off, rid_stride;
uint16_t first_rid, last_rid;
uint16_t iov_ctl;
- uint16_t total_vfs;
+ uint16_t num_vfs, total_vfs;
int iov_inited;
mtx_lock(&Giant);
@@ -296,28 +621,25 @@ pci_iov_config(struct cdev *cdev, struct
dev = dinfo->cfg.dev;
bus = device_get_parent(dev);
iov_inited = 0;
+ config = NULL;
- if (iov->iov_num_vfs != 0) {
+ if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
mtx_unlock(&Giant);
return (EBUSY);
}
+ iov->iov_flags |= IOV_BUSY;
- total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
+ error = pci_iov_parse_config(iov, arg, &config);
+ if (error != 0)
+ goto out;
- if (arg->num_vfs > total_vfs) {
+ num_vfs = pci_iov_config_get_num_vfs(config);
+ total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
+ if (num_vfs > total_vfs) {
error = EINVAL;
goto out;
}
- /*
- * If we are creating passthrough devices then force the ppt driver to
- * attach to prevent a VF driver from claming the VFs.
- */
- if (arg->passthrough)
- driver = "ppt";
- else
- driver = NULL;
-
error = pci_iov_config_page_size(dinfo);
if (error != 0)
goto out;
@@ -326,19 +648,18 @@ pci_iov_config(struct cdev *cdev, struct
if (error != 0)
goto out;
- error = PCI_INIT_IOV(dev, arg->num_vfs);
-
+ error = pci_init_iov(dev, num_vfs, config);
if (error != 0)
goto out;
-
iov_inited = 1;
- IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2);
+
+ IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2);
rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
first_rid = pci_get_rid(dev) + rid_off;
- last_rid = first_rid + (arg->num_vfs - 1) * rid_stride;
+ last_rid = first_rid + (num_vfs - 1) * rid_stride;
/* We don't yet support allocating extra bus numbers for VFs. */
if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
@@ -350,26 +671,202 @@ pci_iov_config(struct cdev *cdev, struct
iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
- iov->iov_num_vfs = arg->num_vfs;
+ error = pci_iov_init_rman(dev, iov);
+ if (error != 0)
+ goto out;
+
+ iov->iov_num_vfs = num_vfs;
+
+ error = pci_iov_setup_bars(dinfo);
+ if (error != 0)
+ goto out;
iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
- iov_ctl |= PCIM_SRIOV_VF_EN;
+ iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE;
IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
/* Per specification, we must wait 100ms before accessing VFs. */
pause("iov", roundup(hz, 10));
- pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride);
+ pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride);
+
+ nvlist_destroy(config);
+ iov->iov_flags &= ~IOV_BUSY;
mtx_unlock(&Giant);
return (0);
out:
if (iov_inited)
PCI_UNINIT_IOV(dev);
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+ if (iov->iov_bar[i].res != NULL) {
+ pci_release_resource(bus, dev, SYS_RES_MEMORY,
+ iov->iov_pos + PCIR_SRIOV_BAR(i),
+ iov->iov_bar[i].res);
+ pci_delete_resource(bus, dev, SYS_RES_MEMORY,
+ iov->iov_pos + PCIR_SRIOV_BAR(i));
+ iov->iov_bar[i].res = NULL;
+ }
+ }
+
+ if (iov->iov_flags & IOV_RMAN_INITED) {
+ rman_fini(&iov->rman);
+ iov->iov_flags &= ~IOV_RMAN_INITED;
+ }
+
+ nvlist_destroy(config);
iov->iov_num_vfs = 0;
+ iov->iov_flags &= ~IOV_BUSY;
mtx_unlock(&Giant);
return (error);
}
+/* Return true if child is a VF of the given PF. */
+static int
+pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child)
+{
+ struct pci_devinfo *vfinfo;
+
+ vfinfo = device_get_ivars(child);
+
+ if (!(vfinfo->cfg.flags & PCICFG_VF))
+ return (0);
+
+ return (pf == vfinfo->cfg.iov);
+}
+
+static int
+pci_iov_delete(struct cdev *cdev)
+{
+ device_t bus, dev, vf, *devlist;
+ struct pci_devinfo *dinfo;
+ struct pcicfg_iov *iov;
+ int i, error, devcount;
+ uint32_t iov_ctl;
+
+ mtx_lock(&Giant);
+ dinfo = cdev->si_drv1;
+ iov = dinfo->cfg.iov;
+ dev = dinfo->cfg.dev;
+ bus = device_get_parent(dev);
+ devlist = NULL;
+
+ if (iov->iov_flags & IOV_BUSY) {
+ mtx_unlock(&Giant);
+ return (EBUSY);
+ }
+
+ if (iov->iov_num_vfs == 0) {
+ mtx_unlock(&Giant);
+ return (ECHILD);
+ }
+
+ iov->iov_flags |= IOV_BUSY;
+
+ error = device_get_children(bus, &devlist, &devcount);
+
+ if (error != 0)
+ goto out;
+
+ for (i = 0; i < devcount; i++) {
+ vf = devlist[i];
+
+ if (!pci_iov_is_child_vf(iov, vf))
+ continue;
+
+ error = device_detach(vf);
+ if (error != 0) {
+ device_printf(dev,
+ "Could not disable SR-IOV: failed to detach VF %s\n",
+ device_get_nameunit(vf));
+ goto out;
+ }
+ }
+
+ for (i = 0; i < devcount; i++) {
+ vf = devlist[i];
+
+ if (pci_iov_is_child_vf(iov, vf))
+ pci_delete_child(bus, vf);
+ }
+ PCI_UNINIT_IOV(dev);
+
+ iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
+ iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
+ IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
+ IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2);
+
+ iov->iov_num_vfs = 0;
+
+ for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
+ if (iov->iov_bar[i].res != NULL) {
+ pci_release_resource(bus, dev, SYS_RES_MEMORY,
+ iov->iov_pos + PCIR_SRIOV_BAR(i),
+ iov->iov_bar[i].res);
+ pci_delete_resource(bus, dev, SYS_RES_MEMORY,
+ iov->iov_pos + PCIR_SRIOV_BAR(i));
+ iov->iov_bar[i].res = NULL;
+ }
+ }
+
+ if (iov->iov_flags & IOV_RMAN_INITED) {
+ rman_fini(&iov->rman);
+ iov->iov_flags &= ~IOV_RMAN_INITED;
+ }
+
+ error = 0;
+out:
+ free(devlist, M_TEMP);
+ iov->iov_flags &= ~IOV_BUSY;
+ mtx_unlock(&Giant);
+ return (error);
+}
+
+static int
+pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output)
+{
+ struct pci_devinfo *dinfo;
+ void *packed;
+ size_t output_len, size;
+ int error;
+
+ packed = NULL;
+
+ mtx_lock(&Giant);
+ dinfo = cdev->si_drv1;
+ packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size);
+ mtx_unlock(&Giant);
+
+ if (packed == NULL) {
+ error = ENOMEM;
+ goto fail;
+ }
+
+ output_len = output->len;
+ output->len = size;
+ if (size <= output_len) {
+ error = copyout(packed, output->schema, size);
+
+ if (error != 0)
+ goto fail;
+
+ output->error = 0;
+ } else
+ /*
+ * If we return an error then the ioctl code won't copyout
+ * output back to userland, so we flag the error in the struct
+ * instead.
+ */
+ output->error = EMSGSIZE;
+
+ error = 0;
+
+fail:
+ free(packed, M_NVLIST);
+
+ return (error);
+}
+
static int
pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
struct thread *td)
@@ -378,8 +875,102 @@ pci_iov_ioctl(struct cdev *dev, u_long c
switch (cmd) {
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-user
mailing list