Re: ZFS + FreeBSD XEN dom0 panic

From: Roger Pau Monné <roger.pau_at_citrix.com>
Date: Mon, 21 Mar 2022 16:35:15 UTC
On Mon, Mar 21, 2022 at 04:07:48PM +0200, Ze Dupsys wrote:
> On 2022.03.21. 13:14, Roger Pau Monné wrote:
> > I think the problem is not likely with the xenstore implementation
> > (ie: xs_talkv) but rather a race with how the FreeBSD kernel detects
> > and manages addition and removal of devices that hang off xenbus.
> > 
> > I'm afraid there's too much data below for me to parse it.
> 
> Understood. Sounds more tricky than i thought. What could i do to make data
> more useful?

I have another patch for you to try. This will make the system a bit
chatty, let's see what you get.

Thanks, Roger.
---8<---
diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c
index bf54f3a2f28e..f49b6baa90a6 100644
--- a/sys/dev/xen/netback/netback.c
+++ b/sys/dev/xen/netback/netback.c
@@ -99,7 +99,6 @@ static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data");
 #define	XNB_RX_FLIP 0	/* netback driver does not support feature-rx-flip */
 
 #undef XNB_DEBUG
-#define	XNB_DEBUG /* hardcode on during development */
 
 #ifdef XNB_DEBUG
 #define	DPRINTF(fmt, args...) \
diff --git a/sys/xen/xenbus/xenbusb.c b/sys/xen/xenbus/xenbusb.c
index e026f8203ea1..767175d7174f 100644
--- a/sys/xen/xenbus/xenbusb.c
+++ b/sys/xen/xenbus/xenbusb.c
@@ -254,11 +254,15 @@ xenbusb_delete_child(device_t dev, device_t child)
 static void
 xenbusb_verify_device(device_t dev, device_t child)
 {
-	if (xs_exists(XST_NIL, xenbus_get_node(child), "") == 0) {
+	struct xenbus_device_ivars *ivars = device_get_ivars(child);
+
+	printf("Check device %s\n", ivars->xd_node);
+	if (xs_exists(XST_NIL, xenbus_get_node(child), "state") == 0) {
 		/*
 		 * Device tree has been removed from Xenbus.
 		 * Tear down the device.
 		 */
+		printf("Delete %s\n", ivars->xd_node);
 		xenbusb_delete_child(dev, child);
 	}
 }
@@ -454,7 +458,11 @@ xenbusb_probe_children(device_t dev)
 				continue;
 			}
 
-			error = device_probe_and_attach(kids[i]);
+			ivars = device_get_ivars(kids[i]);
+			printf("Trying to add dev %s\n", ivars->xd_node);
+			error = xenbus_read_driver_state(ivars->xd_node);
+			if (error != XenbusStateClosed)
+				error = device_probe_and_attach(kids[i]);
 			if (error == ENXIO) {
 				struct xenbusb_softc *xbs;
 
@@ -509,9 +517,9 @@ xenbusb_probe_children(device_t dev)
 			 * that can receive otherend state change events,
 			 * hook up a watch for them.
 			 */
-			ivars = device_get_ivars(kids[i]);
 			xs_register_watch(&ivars->xd_otherend_watch);
 			xs_register_watch(&ivars->xd_local_watch);
+			printf("Added dev %s\n", ivars->xd_node);
 		}
 		free(kids, M_TEMP);
 	}
@@ -907,6 +915,7 @@ xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
 	case XENBUS_IVAR_STATE:
 	{
 		int error;
+		struct xs_transaction xst;
 
 		newstate = (enum xenbus_state)value;
 		sx_xlock(&ivars->xd_lock);
@@ -915,31 +924,37 @@ xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
 			goto out;
 		}
 
-		error = xs_scanf(XST_NIL, ivars->xd_node, "state",
-		    NULL, "%d", &currstate);
-		if (error)
-			goto out;
-
 		do {
-			error = xs_printf(XST_NIL, ivars->xd_node, "state",
-			    "%d", newstate);
-		} while (error == EAGAIN);
-		if (error) {
-			/*
-			 * Avoid looping through xenbus_dev_fatal()
-			 * which calls xenbus_write_ivar to set the
-			 * state to closing.
-			 */
-			if (newstate != XenbusStateClosing)
-				xenbus_dev_fatal(dev, error,
-						 "writing new state");
-			goto out;
-		}
+			error = xs_transaction_start(&xst);
+			if (error != 0)
+				goto out;
+
+			error = xs_scanf(xst, ivars->xd_node, "state", NULL,
+			    "%d", &currstate);
+			if (error)
+				goto out;
+
+			do {
+				error = xs_printf(xst, ivars->xd_node, "state",
+				    "%d", newstate);
+			} while (error == EAGAIN);
+			if (error) {
+				/*
+				 * Avoid looping through xenbus_dev_fatal()
+				 * which calls xenbus_write_ivar to set the
+				 * state to closing.
+				 */
+				if (newstate != XenbusStateClosing)
+					xenbus_dev_fatal(dev, error,
+					    "writing new state");
+				goto out;
+			}
+		} while (xs_transaction_end(xst, 0));
 		ivars->xd_state = newstate;
 
-		if ((ivars->xd_flags & XDF_CONNECTING) != 0
-		 && (newstate == XenbusStateClosed
-		  || newstate == XenbusStateConnected)) {
+		if ((ivars->xd_flags & XDF_CONNECTING) != 0 &&
+		    (newstate == XenbusStateClosed ||
+		    newstate == XenbusStateConnected)) {
 			struct xenbusb_softc *xbs;
 
 			ivars->xd_flags &= ~XDF_CONNECTING;
@@ -949,6 +964,8 @@ xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
 
 		wakeup(&ivars->xd_state);
 	out:
+		if (error != 0)
+			xs_transaction_end(xst, 1);
 		sx_xunlock(&ivars->xd_lock);
 		return (error);
 	}