git: 381ef27d7b7f - main - bhyve: use pci_next() to save/restore pci devices

From: Corvin Köhne <corvink_at_FreeBSD.org>
Date: Mon, 19 Jun 2023 05:58:02 UTC
The branch main has been updated by corvink:

URL: https://cgit.FreeBSD.org/src/commit/?id=381ef27d7b7f9d9130b6b308d1d598d093d7cfba

commit 381ef27d7b7f9d9130b6b308d1d598d093d7cfba
Author:     Vitaliy Gusev <gusev.vitaliy@gmail.com>
AuthorDate: 2023-05-15 14:29:04 +0000
Commit:     Corvin Köhne <corvink@FreeBSD.org>
CommitDate: 2023-06-19 05:57:05 +0000

    bhyve: use pci_next() to save/restore pci devices
    
    Current snapshot implementation doesn't support multiple devices with
    similar type. For example, two virtio-blk or two CD-ROM-s, etc.
    
    So the following configuration cannot be restored.
    
    bhyve \
            -s 3,virtio-blk,disk.img \
            -s 4,virtio-blk,disk2.img
    
    In some cases it is restored silently, but doesn't work. In some cases
    it fails during restore stage.
    
    This commit fixes that issue.
    
    Reviewed by:            corvink, rew
    MFC after:              1 week
    Sponsored by:           vStack
    Differential Revision:  https://reviews.freebsd.org/D40109
---
 usr.sbin/bhyve/pci_emul.c | 139 +++++++++++++++-------------------------------
 usr.sbin/bhyve/pci_emul.h |   5 +-
 usr.sbin/bhyve/snapshot.c | 121 +++++++++++++++++-----------------------
 3 files changed, 100 insertions(+), 165 deletions(-)

diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index 6eb428e76817..cb92ea9edb09 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -2364,42 +2364,6 @@ done:
 	return (ret);
 }
 
-static int
-pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde,
-		     struct pci_devinst **pdi)
-{
-	struct businfo *bi;
-	struct slotinfo *si;
-	struct funcinfo *fi;
-	int bus, slot, func;
-
-	assert(dev_name != NULL);
-	assert(pde != NULL);
-	assert(pdi != NULL);
-
-	for (bus = 0; bus < MAXBUSES; bus++) {
-		if ((bi = pci_businfo[bus]) == NULL)
-			continue;
-
-		for (slot = 0; slot < MAXSLOTS; slot++) {
-			si = &bi->slotinfo[slot];
-			for (func = 0; func < MAXFUNCS; func++) {
-				fi = &si->si_funcs[func];
-				if (fi->fi_pde == NULL)
-					continue;
-				if (strcmp(dev_name, fi->fi_pde->pe_emu) != 0)
-					continue;
-
-				*pde = fi->fi_pde;
-				*pdi = fi->fi_devi;
-				return (0);
-			}
-		}
-	}
-
-	return (EINVAL);
-}
-
 int
 pci_snapshot(struct vm_snapshot_meta *meta)
 {
@@ -2409,57 +2373,26 @@ pci_snapshot(struct vm_snapshot_meta *meta)
 
 	assert(meta->dev_name != NULL);
 
-	ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi);
-	if (ret != 0) {
-		fprintf(stderr, "%s: no such name: %s\r\n",
-			__func__, meta->dev_name);
-		memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
-		return (0);
-	}
-
-	meta->dev_data = pdi;
+	pdi = meta->dev_data;
+	pde = pdi->pi_d;
 
-	if (pde->pe_snapshot == NULL) {
-		fprintf(stderr, "%s: not implemented yet for: %s\r\n",
-			__func__, meta->dev_name);
-		return (-1);
-	}
+	if (pde->pe_snapshot == NULL)
+		return (ENOTSUP);
 
 	ret = pci_snapshot_pci_dev(meta);
-	if (ret != 0) {
-		fprintf(stderr, "%s: failed to snapshot pci dev\r\n",
-			__func__);
-		return (-1);
-	}
-
-	ret = (*pde->pe_snapshot)(meta);
+	if (ret == 0)
+		ret = (*pde->pe_snapshot)(meta);
 
 	return (ret);
 }
 
 int
-pci_pause(const char *dev_name)
+pci_pause(struct pci_devinst *pdi)
 {
-	struct pci_devemu *pde;
-	struct pci_devinst *pdi;
-	int ret;
-
-	assert(dev_name != NULL);
-
-	ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
-	if (ret != 0) {
-		/*
-		 * It is possible to call this function without
-		 * checking that the device is inserted first.
-		 */
-		fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
-		return (0);
-	}
+	struct pci_devemu *pde = pdi->pi_d;
 
 	if (pde->pe_pause == NULL) {
 		/* The pause/resume functionality is optional. */
-		fprintf(stderr, "%s: not implemented for: %s\n",
-			__func__, dev_name);
 		return (0);
 	}
 
@@ -2467,28 +2400,12 @@ pci_pause(const char *dev_name)
 }
 
 int
-pci_resume(const char *dev_name)
+pci_resume(struct pci_devinst *pdi)
 {
-	struct pci_devemu *pde;
-	struct pci_devinst *pdi;
-	int ret;
-
-	assert(dev_name != NULL);
-
-	ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
-	if (ret != 0) {
-		/*
-		 * It is possible to call this function without
-		 * checking that the device is inserted first.
-		 */
-		fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
-		return (0);
-	}
+	struct pci_devemu *pde = pdi->pi_d;
 
 	if (pde->pe_resume == NULL) {
 		/* The pause/resume functionality is optional. */
-		fprintf(stderr, "%s: not implemented for: %s\n",
-			__func__, dev_name);
 		return (0);
 	}
 
@@ -2665,6 +2582,42 @@ pci_emul_dior(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
 }
 
 #ifdef BHYVE_SNAPSHOT
+struct pci_devinst *
+pci_next(const struct pci_devinst *cursor)
+{
+	unsigned bus = 0, slot = 0, func = 0;
+	struct businfo *bi;
+	struct slotinfo *si;
+	struct funcinfo *fi;
+
+	bus = cursor ? cursor->pi_bus : 0;
+	slot = cursor ? cursor->pi_slot : 0;
+	func = cursor ? (cursor->pi_func + 1) : 0;
+
+	for (; bus < MAXBUSES; bus++) {
+		if ((bi = pci_businfo[bus]) == NULL)
+			continue;
+
+		if (slot >= MAXSLOTS)
+			slot = 0;
+
+		for (; slot < MAXSLOTS; slot++) {
+			si = &bi->slotinfo[slot];
+			if (func >= MAXFUNCS)
+				func = 0;
+			for (; func < MAXFUNCS; func++) {
+				fi = &si->si_funcs[func];
+				if (fi->fi_devi == NULL)
+					continue;
+
+				return (fi->fi_devi);
+			}
+		}
+	}
+
+	return (NULL);
+}
+
 static int
 pci_emul_snapshot(struct vm_snapshot_meta *meta __unused)
 {
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
index ac30c03d9411..d68920524398 100644
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -263,9 +263,10 @@ void	pci_write_dsdt(void);
 uint64_t pci_ecfg_base(void);
 int	pci_bus_configured(int bus);
 #ifdef BHYVE_SNAPSHOT
+struct pci_devinst *pci_next(const struct pci_devinst *cursor);
 int	pci_snapshot(struct vm_snapshot_meta *meta);
-int	pci_pause(const char *dev_name);
-int	pci_resume(const char *dev_name);
+int	pci_pause(struct pci_devinst *pdi);
+int	pci_resume(struct pci_devinst *pdi);
 #endif
 
 static __inline void
diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c
index f7bff85e3361..5569ffcb2e24 100644
--- a/usr.sbin/bhyve/snapshot.c
+++ b/usr.sbin/bhyve/snapshot.c
@@ -138,20 +138,6 @@ static sig_t old_winch_handler;
  _a < _b ? _a : _b;       	\
  })
 
-static const struct vm_snapshot_dev_info snapshot_devs[] = {
-	{ "atkbdc",	atkbdc_snapshot,	NULL,		NULL		},
-	{ "virtio-net",	pci_snapshot,		pci_pause,	pci_resume	},
-	{ "virtio-blk",	pci_snapshot,		pci_pause,	pci_resume	},
-	{ "virtio-rnd",	pci_snapshot,		NULL,		NULL		},
-	{ "lpc",	pci_snapshot,		NULL,		NULL		},
-	{ "fbuf",	pci_snapshot,		NULL,		NULL		},
-	{ "xhci",	pci_snapshot,		NULL,		NULL		},
-	{ "e1000",	pci_snapshot,		NULL,		NULL		},
-	{ "ahci",	pci_snapshot,		pci_pause,	pci_resume	},
-	{ "ahci-hd",	pci_snapshot,		pci_pause,	pci_resume	},
-	{ "ahci-cd",	pci_snapshot,		pci_pause,	pci_resume	},
-};
-
 static const struct vm_snapshot_kern_info snapshot_kern_structs[] = {
 	{ "vhpet",	STRUCT_VHPET	},
 	{ "vm",		STRUCT_VM	},
@@ -856,31 +842,29 @@ vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate)
 }
 
 static int
-vm_restore_device(struct restore_state *rstate,
-		    const struct vm_snapshot_dev_info *info)
+vm_restore_device(struct restore_state *rstate, vm_snapshot_dev_cb func,
+    const char *name, void *data)
 {
 	void *dev_ptr;
 	size_t dev_size;
 	int ret;
 	struct vm_snapshot_meta *meta;
 
-	dev_ptr = lookup_dev(info->dev_name, JSON_DEV_ARR_KEY, rstate,
-	    &dev_size);
+	dev_ptr = lookup_dev(name, JSON_DEV_ARR_KEY, rstate, &dev_size);
+
 	if (dev_ptr == NULL) {
-		fprintf(stderr, "Failed to lookup dev: %s\r\n", info->dev_name);
-		fprintf(stderr, "Continuing the restore/migration process\r\n");
-		return (0);
+		EPRINTLN("Failed to lookup dev: %s", name);
+		return (EINVAL);
 	}
 
 	if (dev_size == 0) {
-		fprintf(stderr, "%s: Device size is 0. "
-			"Assuming %s is not used\r\n",
-			__func__, info->dev_name);
-		return (0);
+		EPRINTLN("Restore device size is 0: %s", name);
+		return (EINVAL);
 	}
 
 	meta = &(struct vm_snapshot_meta) {
-		.dev_name = info->dev_name,
+		.dev_name = name,
+		.dev_data = data,
 
 		.buffer.buf_start = dev_ptr,
 		.buffer.buf_size = dev_size,
@@ -891,11 +875,10 @@ vm_restore_device(struct restore_state *rstate,
 		.op = VM_SNAPSHOT_RESTORE,
 	};
 
-	ret = (*info->snapshot_cb)(meta);
+	ret = func(meta);
 	if (ret != 0) {
-		fprintf(stderr, "Failed to restore dev: %s\r\n",
-			info->dev_name);
-		return (-1);
+		EPRINTLN("Failed to restore dev: %s %d", name, ret);
+		return (ret);
 	}
 
 	return (0);
@@ -904,33 +887,30 @@ vm_restore_device(struct restore_state *rstate,
 int
 vm_restore_devices(struct restore_state *rstate)
 {
-	size_t i;
 	int ret;
+	struct pci_devinst *pdi = NULL;
 
-	for (i = 0; i < nitems(snapshot_devs); i++) {
-		ret = vm_restore_device(rstate, &snapshot_devs[i]);
-		if (ret != 0)
+	while ((pdi = pci_next(pdi)) != NULL) {
+		ret = vm_restore_device(rstate, pci_snapshot, pdi->pi_name, pdi);
+		if (ret)
 			return (ret);
 	}
 
-	return 0;
+	return (vm_restore_device(rstate, atkbdc_snapshot, "atkbdc", NULL));
 }
 
 int
 vm_pause_devices(void)
 {
-	const struct vm_snapshot_dev_info *info;
-	size_t i;
 	int ret;
+	struct pci_devinst *pdi = NULL;
 
-	for (i = 0; i < nitems(snapshot_devs); i++) {
-		info = &snapshot_devs[i];
-		if (info->pause_cb == NULL)
-			continue;
-
-		ret = info->pause_cb(info->dev_name);
-		if (ret != 0)
+	while ((pdi = pci_next(pdi)) != NULL) {
+		ret = pci_pause(pdi);
+		if (ret) {
+			EPRINTLN("Cannot pause dev %s: %d", pdi->pi_name, ret);
 			return (ret);
+		}
 	}
 
 	return (0);
@@ -939,18 +919,15 @@ vm_pause_devices(void)
 int
 vm_resume_devices(void)
 {
-	const struct vm_snapshot_dev_info *info;
-	size_t i;
 	int ret;
+	struct pci_devinst *pdi = NULL;
 
-	for (i = 0; i < nitems(snapshot_devs); i++) {
-		info = &snapshot_devs[i];
-		if (info->resume_cb == NULL)
-			continue;
-
-		ret = info->resume_cb(info->dev_name);
-		if (ret != 0)
+	while ((pdi = pci_next(pdi)) != NULL) {
+		ret = pci_resume(pdi);
+		if (ret) {
+			EPRINTLN("Cannot resume '%s': %d", pdi->pi_name, ret);
 			return (ret);
+		}
 	}
 
 	return (0);
@@ -1089,16 +1066,21 @@ vm_snapshot_dev_write_data(int data_fd, xo_handle_t *xop, const char *array_key,
 }
 
 static int
-vm_snapshot_device(const struct vm_snapshot_dev_info *info,
-		     int data_fd, xo_handle_t *xop,
-		     struct vm_snapshot_meta *meta, off_t *offset)
+vm_snapshot_device(vm_snapshot_dev_cb func, const char *dev_name,
+    void *devdata, int data_fd, xo_handle_t *xop,
+    struct vm_snapshot_meta *meta, off_t *offset)
 {
 	int ret;
 
-	ret = (*info->snapshot_cb)(meta);
+	memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+	meta->buffer.buf = meta->buffer.buf_start;
+	meta->buffer.buf_rem = meta->buffer.buf_size;
+	meta->dev_name = dev_name;
+	meta->dev_data = devdata;
+
+	ret = func(meta);
 	if (ret != 0) {
-		fprintf(stderr, "Failed to snapshot %s; ret=%d\r\n",
-			meta->dev_name, ret);
+		EPRINTLN("Failed to snapshot %s; ret=%d", dev_name, ret);
 		return (ret);
 	}
 
@@ -1116,8 +1098,9 @@ vm_snapshot_devices(int data_fd, xo_handle_t *xop)
 	int ret;
 	off_t offset;
 	void *buffer;
-	size_t buf_size, i;
+	size_t buf_size;
 	struct vm_snapshot_meta *meta;
+	struct pci_devinst *pdi;
 
 	buf_size = SNAPSHOT_BUFFER_SIZE;
 
@@ -1143,20 +1126,18 @@ vm_snapshot_devices(int data_fd, xo_handle_t *xop)
 
 	xo_open_list_h(xop, JSON_DEV_ARR_KEY);
 
-	/* Restore other devices that support this feature */
-	for (i = 0; i < nitems(snapshot_devs); i++) {
-		meta->dev_name = snapshot_devs[i].dev_name;
-
-		memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
-		meta->buffer.buf = meta->buffer.buf_start;
-		meta->buffer.buf_rem = meta->buffer.buf_size;
-
-		ret = vm_snapshot_device(&snapshot_devs[i], data_fd, xop,
-					   meta, &offset);
+	/* Save PCI devices */
+	pdi = NULL;
+	while ((pdi = pci_next(pdi)) != NULL) {
+		ret = vm_snapshot_device(pci_snapshot, pdi->pi_name, pdi,
+		    data_fd, xop, meta, &offset);
 		if (ret != 0)
 			goto snapshot_err;
 	}
 
+	ret = vm_snapshot_device(atkbdc_snapshot, "atkbdc", NULL,
+	    data_fd, xop, meta, &offset);
+
 	xo_close_list_h(xop, JSON_DEV_ARR_KEY);
 
 snapshot_err: