namei (via firmware_get(9)) from taskq in 7.x

Kostik Belousov kostikbel at gmail.com
Sun Oct 18 13:17:15 UTC 2009


On Thu, Oct 15, 2009 at 05:16:22PM -0400, Andrew Gallatin wrote:
> Hi,
> 
> I'm trying to re-initialize a NIC which uses firmware(9)
> after a hardware fault.  As part of the process, I need
> to re-load the firmware using firmware_get().  If the
> firmware kld is not resident, then the machine will panic
> like this:
> 
> Fatal trap 12: page fault while in kernel mode
> cpuid = 0; apic id = 00
> fault virtual address   = 0x20
> fault code              = supervisor read data, page not present
> instruction pointer     = 0x8:0xffffffff805b05d4
> stack pointer           = 0x10:0xffffff8000080460
> frame pointer           = 0x10:0xffffff8000080510
> code segment            = base 0x0, limit 0xfffff, type 0x1b
>                         = DPL 0, pres 1, long 1, def32 0, gran 1
> processor eflags        = interrupt enabled, resume, IOPL = 0
> current process         = 21 (swi5: +)
> [thread pid 21 tid 100021 ]
> Stopped at      namei+0x174:    movq    0x20(%rbx),%rax
> db> bt
> Tracing pid 21 tid 100021 td 0xffffff00013c3ae0
> namei() at namei+0x174
> vn_open_cred() at vn_open_cred+0x3a4
> linker_load_module() at linker_load_module+0x1f2
> linker_reference_module() at linker_reference_module+0xae
> firmware_get() at firmware_get+0x136
> mxge_load_firmware() at mxge_load_firmware+0x2d
> mxge_watchdog_task() at mxge_watchdog_task+0x2f6
> taskqueue_run() at taskqueue_run+0x9d
> ithread_loop() at ithread_loop+0x17d
> fork_exit() at fork_exit+0x11f
> fork_trampoline() at fork_trampoline+0xe
> 
> Looking at it in gdb, it seems like the problem is that namei
> is trying to use ndp->ni_cnd.cn_thread->td_proc->p_fd->fd_cdir
> which is null in this context.
> 
> Can somebody tell me what kernel context it is safe to
> call firmware_get() (and hence namei) from?  Is there
> a safe way to do it from a taskq?
> 
> FWIW, this seems to work fine (even from a callout context)
> in 8 and higher.  It is only 7 and earlier where I'm having
> this problem.

It seems that you want a merge of r178042,183614,184842,188057 (one of
which is yours).


Property changes on: .
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /head/sys:r178042,183614,184842,188057

Index: kern/subr_firmware.c
===================================================================
--- kern/subr_firmware.c	(revision 198202)
+++ kern/subr_firmware.c	(working copy)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2005, Sam Leffler <sam at errno.com>
+ * Copyright (c) 2005-2008, Sam Leffler <sam at errno.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,11 @@
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/module.h>
+#include <sys/eventhandler.h>
 
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+
 /*
  * Loadable firmware support. See sys/sys/firmware.h and firmware(9)
  * form more details on the subsystem.
@@ -89,7 +93,7 @@
 	/*
 	 * 'file' is private info managed by the autoload/unload code.
 	 * Set at the end of firmware_get(), cleared only in the
-	 * firmware_task, so the latter can depend on its value even
+	 * firmware_unload_task, so the latter can depend on its value even
 	 * while the lock is not held.
 	 */
 	linker_file_t   file;	/* module file, if autoloaded */
@@ -121,14 +125,16 @@
 static struct priv_fw firmware_table[FIRMWARE_MAX];
 
 /*
- * module release are handled in a separate task as they might sleep.
+ * Firmware module operations are handled in a separate task as they
+ * might sleep and they require directory context to do i/o.
  */
-struct task firmware_task;
+static struct taskqueue *firmware_tq;
+static struct task firmware_unload_task;
 
 /*
  * This mutex protects accesses to the firmware table.
  */
-struct mtx firmware_mtx;
+static struct mtx firmware_mtx;
 MTX_SYSINIT(firmware, &firmware_mtx, "firmware table", MTX_DEF);
 
 /*
@@ -227,7 +233,7 @@
 	} else if (fp->refcnt != 0) {	/* cannot unregister */
 		err = EBUSY;
 	}  else {
-		linker_file_t   x = fp->file;	/* save value */
+		linker_file_t x = fp->file;	/* save value */
 
 		if (fp->parent != NULL)	/* release parent reference */
 			fp->parent->refcnt--;
@@ -244,6 +250,47 @@
 	return err;
 }
 
+static void
+loadimage(void *arg, int npending)
+{
+	struct thread *td = curthread;
+	char *imagename = arg;
+	struct priv_fw *fp;
+	linker_file_t result;
+	int error;
+
+	/* synchronize with the thread that dispatched us */
+	mtx_lock(&firmware_mtx);
+	mtx_unlock(&firmware_mtx);
+
+	if (td->td_proc->p_fd->fd_rdir == NULL) {
+		printf("%s: root not mounted yet, no way to load image\n",
+		    imagename);
+		goto done;
+	}
+	error = linker_reference_module(imagename, NULL, &result);
+	if (error != 0) {
+		printf("%s: could not load firmware image, error %d\n",
+		    imagename, error);
+		goto done;
+	}
+
+	mtx_lock(&firmware_mtx);
+	fp = lookup(imagename, NULL);
+	if (fp == NULL || fp->file != NULL) {
+		mtx_unlock(&firmware_mtx);
+		if (fp == NULL)
+			printf("%s: firmware image loaded, "
+			    "but did not register\n", imagename);
+		(void) linker_release_module(imagename, NULL, NULL);
+		goto done;
+	}
+	fp->file = result;	/* record the module identity */
+	mtx_unlock(&firmware_mtx);
+done:
+	wakeup_one(imagename);		/* we're done */
+}
+
 /*
  * Lookup and potentially load the specified firmware image.
  * If the firmware is not found in the registry, try to load a kernel
@@ -254,9 +301,9 @@
 const struct firmware *
 firmware_get(const char *imagename)
 {
+	struct task fwload_task;
 	struct thread *td;
 	struct priv_fw *fp;
-	linker_file_t result;
 
 	mtx_lock(&firmware_mtx);
 	fp = lookup(imagename, NULL);
@@ -265,29 +312,34 @@
 	/*
 	 * Image not present, try to load the module holding it.
 	 */
-	mtx_unlock(&firmware_mtx);
 	td = curthread;
 	if (priv_check(td, PRIV_FIRMWARE_LOAD) != 0 ||
 	    securelevel_gt(td->td_ucred, 0) != 0) {
+		mtx_unlock(&firmware_mtx);
 		printf("%s: insufficient privileges to "
 		    "load firmware image %s\n", __func__, imagename);
 		return NULL;
 	}
-	(void) linker_reference_module(imagename, NULL, &result);
+	/* 
+	 * Defer load to a thread with known context.  linker_reference_module
+	 * may do filesystem i/o which requires root & current dirs, etc.
+	 * Also we must not hold any mtx's over this call which is problematic.
+	 */
+	if (!cold) {
+		TASK_INIT(&fwload_task, 0, loadimage, __DECONST(void *,
+		    imagename));
+		taskqueue_enqueue(firmware_tq, &fwload_task);
+		msleep(__DECONST(void *, imagename), &firmware_mtx, 0,
+		    "fwload", 0);
+	}
 	/*
-	 * After loading the module, see if the image is registered now.
+	 * After attempting to load the module, see if the image is registered.
 	 */
-	mtx_lock(&firmware_mtx);
 	fp = lookup(imagename, NULL);
 	if (fp == NULL) {
 		mtx_unlock(&firmware_mtx);
-		printf("%s: failed to load firmware image %s\n",
-			__func__, imagename);
-		(void) linker_release_module(imagename, NULL, NULL);
 		return NULL;
 	}
-	fp->file = result;	/* record the module identity */
-
 found:				/* common exit point on success */
 	fp->refcnt++;
 	mtx_unlock(&firmware_mtx);
@@ -300,8 +352,8 @@
  * to release the resource, but the flag is only advisory.
  *
  * If this is the last reference to the firmware image, and this is an
- * autoloaded module, wake up the firmware_task to figure out what to do
- * with the associated module.
+ * autoloaded module, wake up the firmware_unload_task to figure out
+ * what to do with the associated module.
  */
 void
 firmware_put(const struct firmware *p, int flags)
@@ -314,12 +366,53 @@
 		if (flags & FIRMWARE_UNLOAD)
 			fp->flags |= FW_UNLOAD;
 		if (fp->file)
-			taskqueue_enqueue(taskqueue_thread, &firmware_task);
+			taskqueue_enqueue(firmware_tq, &firmware_unload_task);
 	}
 	mtx_unlock(&firmware_mtx);
 }
 
 /*
+ * Setup directory state for the firmware_tq thread so we can do i/o.
+ */
+static void
+set_rootvnode(void *arg, int npending)
+{
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+
+	FILEDESC_XLOCK(p->p_fd);
+	if (p->p_fd->fd_cdir == NULL) {
+		p->p_fd->fd_cdir = rootvnode;
+		VREF(rootvnode);
+	}
+	if (p->p_fd->fd_rdir == NULL) {
+		p->p_fd->fd_rdir = rootvnode;
+		VREF(rootvnode);
+	}
+	FILEDESC_XUNLOCK(p->p_fd);
+
+	free(arg, M_TEMP);
+}
+
+/*
+ * Event handler called on mounting of /; bounce a task
+ * into the task queue thread to setup it's directories.
+ */
+static void
+firmware_mountroot(void *arg)
+{
+	struct task *setroot_task;
+
+	setroot_task = malloc(sizeof(struct task), M_TEMP, M_NOWAIT);
+	if (setroot_task != NULL) {
+		TASK_INIT(setroot_task, 0, set_rootvnode, setroot_task);
+		taskqueue_enqueue(firmware_tq, setroot_task);
+	} else
+		printf("%s: no memory for task!\n", __func__);
+}
+EVENTHANDLER_DEFINE(mountroot, firmware_mountroot, NULL, 0);
+
+/*
  * The body of the task in charge of unloading autoloaded modules
  * that are not needed anymore.
  * Images can be cross-linked so we may need to make multiple passes,
@@ -383,11 +476,23 @@
 firmware_modevent(module_t mod, int type, void *unused)
 {
 	struct priv_fw *fp;
-	int i, err = EINVAL;
+	int i, err;
 
 	switch (type) {
 	case MOD_LOAD:
-		TASK_INIT(&firmware_task, 0, unloadentry, NULL);
+		TASK_INIT(&firmware_unload_task, 0, unloadentry, NULL);
+		firmware_tq = taskqueue_create("taskqueue_firmware", M_WAITOK,
+		    taskqueue_thread_enqueue, &firmware_tq);
+		/* NB: use our own loop routine that sets up context */
+		(void) taskqueue_start_threads(&firmware_tq, 1, PWAIT,
+		    "firmware taskq");
+		if (rootvnode != NULL) {
+			/* 
+			 * Root is already mounted so we won't get an event;
+			 * simulate one here.
+			 */
+			firmware_mountroot(NULL);
+		}
 		return 0;
 
 	case MOD_UNLOAD:
@@ -398,8 +503,9 @@
 			fp->flags |= FW_UNLOAD;;
 		}
 		mtx_unlock(&firmware_mtx);
-		taskqueue_enqueue(taskqueue_thread, &firmware_task);
-		taskqueue_drain(taskqueue_thread, &firmware_task);
+		taskqueue_enqueue(firmware_tq, &firmware_unload_task);
+		taskqueue_drain(firmware_tq, &firmware_unload_task);
+		err = 0;
 		for (i = 0; i < FIRMWARE_MAX; i++) {
 			fp = &firmware_table[i];
 			if (fp->fw.name != NULL) {
@@ -409,6 +515,8 @@
 				err = EINVAL;
 			}
 		}
+		if (err == 0)
+			taskqueue_free(firmware_tq);
 		return err;
 	}
 	return EINVAL;
@@ -417,7 +525,7 @@
 static moduledata_t firmware_mod = {
 	"firmware",
 	firmware_modevent,
-	0
+	NULL
 };
 DECLARE_MODULE(firmware, firmware_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
 MODULE_VERSION(firmware, 1);

Property changes on: kern/subr_firmware.c
___________________________________________________________________
Modified: cvs2svn:cvs-rev
   - 1.9
   + 1.10

Index: kern/subr_prf.c
===================================================================
--- kern/subr_prf.c	(revision 198202)
+++ kern/subr_prf.c	(working copy)
@@ -955,7 +955,7 @@
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, msgbuf, CTLTYPE_STRING | CTLFLAG_RD,
-    0, 0, sysctl_kern_msgbuf, "A", "Contents of kernel message buffer");
+    NULL, 0, sysctl_kern_msgbuf, "A", "Contents of kernel message buffer");
 
 static int msgbuf_clearflag;
 

Property changes on: contrib/pf
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /head/sys/contrib/pf:r178042,183614,184842,188057

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-hackers/attachments/20091018/ce4ea719/attachment.pgp


More information about the freebsd-hackers mailing list