svn commit: r208487 - in stable/8: cddl/contrib/opensolaris/lib/libzpool/common/sys sys/cddl/compat/opensolaris/kern sys/cddl/compat/opensolaris/sys sys/cddl/contrib/opensolaris/uts/common/fs/zfs s...

Pawel Jakub Dawidek pjd at FreeBSD.org
Mon May 24 10:09:36 UTC 2010


Author: pjd
Date: Mon May 24 10:09:36 2010
New Revision: 208487
URL: http://svn.freebsd.org/changeset/base/208487

Log:
  MFC r207920,r207934,r207936,r207937,r207970,r208142,r208147,r208148,r208166,
  r208454,r208455,r208458:
  
  r207920:
  
  Back out r205134. It is not stable.
  
  r207934:
  
  Add missing new line characters to the warnings.
  
  r207936:
  
  Eventhough r203504 eliminates taste traffic provoked by vdev_geom.c,
  ZFS still like to open all vdevs, close them and open them again,
  which in turn provokes taste traffic anyway.
  
  I don't know of any clean way to fix it, so do it the hard way - if we can't
  open provider for writing just retry 5 times with 0.5 pauses. This should
  elimitate accidental races caused by other classes tasting providers created on
  top of our vdevs.
  
  Reported by:	James R. Van Artsdalen <james-freebsd-fs2 at jrv.org>
  Reported by:	Yuri Pankov <yuri.pankov at gmail.com>
  
  r207937:
  
  I added vfs_lowvnodes event, but it was only used for a short while and now
  it is totally unused. Remove it.
  
  r207970:
  
  When there is no memory or KVA, try to help by reclaiming some vnodes.
  This helps with 'kmem_map too small' panics.
  
  No objections from:	kib
  Tested by:		Alexander V. Ribchansky <shurik at zk.informjust.ua>
  
  r208142:
  
  The whole point of having dedicated worker thread for each leaf VDEV was to
  avoid calling zio_interrupt() from geom_up thread context. It turns out that
  when provider is forcibly removed from the system and we kill worker thread
  there can still be some ZIOs pending. To complete pending ZIOs when there is
  no worker thread anymore we still have to call zio_interrupt() from geom_up
  context. To avoid this race just remove use of worker threads altogether.
  This should be more or less fine, because I also thought that zio_interrupt()
  does more work, but it only makes small UMA allocation with M_WAITOK.
  It also saves one context switch per I/O request.
  
  PR:		kern/145339
  Reported by:	Alex Bakhtin <Alex.Bakhtin at gmail.com>
  
  r208147:
  
  Add task structure to zio and use it instead of allocating one.
  This eliminates the only place where we can sleep when calling zio_interrupt().
  As a side-effect this can actually improve performance a little as we
  allocate one less thing for every I/O.
  
  Prodded by:	kib
  
  r208148:
  
  Allow to configure UMA usage for ZIO data via loader and turn it on by
  default for amd64. On i386 I saw performance degradation when UMA was used,
  but for amd64 it should help.
  
  r208166:
  
  Fix userland build by making io_task available only for the kernel and by
  providing taskq_dispatch_safe() macro.
  
  r208454:
  
  Remove ZIO_USE_UMA from arc.c as well.
  
  r208455:
  
  ZIO_USE_UMA is no longer used.
  
  r208458:
  
  Create UMA zones unconditionally.

Added:
  stable/8/sys/cddl/compat/opensolaris/sys/taskq.h
     - copied unchanged from r208147, head/sys/cddl/compat/opensolaris/sys/taskq.h
Modified:
  stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
  stable/8/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c
  stable/8/sys/cddl/compat/opensolaris/sys/dnlc.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
  stable/8/sys/kern/vfs_subr.c
  stable/8/sys/modules/zfs/Makefile
  stable/8/sys/sys/eventhandler.h
Directory Properties:
  stable/8/cddl/contrib/opensolaris/   (props changed)
  stable/8/cddl/contrib/opensolaris/cmd/zdb/   (props changed)
  stable/8/cddl/contrib/opensolaris/cmd/zfs/   (props changed)
  stable/8/cddl/contrib/opensolaris/lib/libzfs/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)
  stable/8/sys/geom/sched/   (props changed)

Modified: stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h	Mon May 24 10:09:36 2010	(r208487)
@@ -343,6 +343,9 @@ extern void	taskq_wait(taskq_t *);
 extern int	taskq_member(taskq_t *, void *);
 extern void	system_taskq_init(void);
 
+#define	taskq_dispatch_safe(tq, func, arg, task)			\
+	taskq_dispatch((tq), (func), (arg), TQ_SLEEP)
+
 #define	XVA_MAPSIZE	3
 #define	XVA_MAGIC	0x78766174
 

Modified: stable/8/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c
==============================================================================
--- stable/8/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/cddl/compat/opensolaris/kern/opensolaris_taskq.c	Mon May 24 10:09:36 2010	(r208487)
@@ -40,12 +40,6 @@ __FBSDID("$FreeBSD$");
 
 static uma_zone_t taskq_zone;
 
-struct ostask {
-	struct task	ost_task;
-	task_func_t	*ost_func;
-	void		*ost_arg;
-};
-
 taskq_t *system_taskq = NULL;
 
 static void
@@ -140,3 +134,32 @@ taskq_dispatch(taskq_t *tq, task_func_t 
 
 	return ((taskqid_t)(void *)task);
 }
+
+#define	TASKQ_MAGIC	0x74541c
+
+static void
+taskq_run_safe(void *arg, int pending __unused)
+{
+	struct ostask *task = arg;
+
+	ASSERT(task->ost_magic == TASKQ_MAGIC);
+	task->ost_func(task->ost_arg);
+	task->ost_magic = 0;
+}
+
+taskqid_t
+taskq_dispatch_safe(taskq_t *tq, task_func_t func, void *arg,
+    struct ostask *task)
+{
+
+	ASSERT(task->ost_magic != TASKQ_MAGIC);
+
+	task->ost_magic = TASKQ_MAGIC;
+	task->ost_func = func;
+	task->ost_arg = arg;
+
+	TASK_INIT(&task->ost_task, 0, taskq_run_safe, task);
+	taskqueue_enqueue(tq->tq_queue, &task->ost_task);
+
+	return ((taskqid_t)(void *)task);
+}

Modified: stable/8/sys/cddl/compat/opensolaris/sys/dnlc.h
==============================================================================
--- stable/8/sys/cddl/compat/opensolaris/sys/dnlc.h	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/cddl/compat/opensolaris/sys/dnlc.h	Mon May 24 10:09:36 2010	(r208487)
@@ -35,6 +35,6 @@
 #define	dnlc_update(dvp, name, vp)	do { } while (0)
 #define	dnlc_remove(dvp, name)		do { } while (0)
 #define	dnlc_purge_vfsp(vfsp, count)	(0)
-#define	dnlc_reduce_cache(percent)	EVENTHANDLER_INVOKE(vfs_lowvnodes, (int)(intptr_t)(percent))
+#define	dnlc_reduce_cache(percent)	do { } while (0)
 
 #endif	/* !_OPENSOLARIS_SYS_DNLC_H_ */

Copied: stable/8/sys/cddl/compat/opensolaris/sys/taskq.h (from r208147, head/sys/cddl/compat/opensolaris/sys/taskq.h)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/8/sys/cddl/compat/opensolaris/sys/taskq.h	Mon May 24 10:09:36 2010	(r208487, copy of r208147, head/sys/cddl/compat/opensolaris/sys/taskq.h)
@@ -0,0 +1,44 @@
+/*-
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _OPENSOLARIS_SYS_TASKQ_H_
+#define	_OPENSOLARIS_SYS_TASKQ_H_
+
+#include_next <sys/taskq.h>
+
+struct ostask {
+	struct task	 ost_task;
+	task_func_t	*ost_func;
+	void		*ost_arg;
+	int		 ost_magic;
+};
+
+taskqid_t taskq_dispatch_safe(taskq_t *tq, task_func_t func, void *arg,
+    struct ostask *task);
+
+#endif	/* _OPENSOLARIS_SYS_TASKQ_H_ */

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c	Mon May 24 10:09:36 2010	(r208487)
@@ -195,11 +195,6 @@ SYSCTL_QUAD(_vfs_zfs, OID_AUTO, arc_min,
 SYSCTL_INT(_vfs_zfs, OID_AUTO, mdcomp_disable, CTLFLAG_RDTUN,
     &zfs_mdcomp_disable, 0, "Disable metadata compression");
 
-#ifdef ZIO_USE_UMA
-extern kmem_cache_t	*zio_buf_cache[];
-extern kmem_cache_t	*zio_data_buf_cache[];
-#endif
-
 /*
  * Note that buffers can be in one of 6 states:
  *	ARC_anon	- anonymous (discussed below)
@@ -620,11 +615,6 @@ static buf_hash_table_t buf_hash_table;
 
 uint64_t zfs_crc64_table[256];
 
-#ifdef ZIO_USE_UMA
-extern kmem_cache_t	*zio_buf_cache[];
-extern kmem_cache_t	*zio_data_buf_cache[];
-#endif
-
 /*
  * Level 2 ARC
  */
@@ -2192,14 +2182,15 @@ arc_reclaim_needed(void)
 	return (0);
 }
 
+extern kmem_cache_t	*zio_buf_cache[];
+extern kmem_cache_t	*zio_data_buf_cache[];
+
 static void
 arc_kmem_reap_now(arc_reclaim_strategy_t strat)
 {
-#ifdef ZIO_USE_UMA
 	size_t			i;
 	kmem_cache_t		*prev_cache = NULL;
 	kmem_cache_t		*prev_data_cache = NULL;
-#endif
 
 #ifdef _KERNEL
 	if (arc_meta_used >= arc_meta_limit) {
@@ -2224,7 +2215,6 @@ arc_kmem_reap_now(arc_reclaim_strategy_t
 	if (strat == ARC_RECLAIM_AGGR)
 		arc_shrink();
 
-#ifdef ZIO_USE_UMA
 	for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
 		if (zio_buf_cache[i] != prev_cache) {
 			prev_cache = zio_buf_cache[i];
@@ -2235,7 +2225,6 @@ arc_kmem_reap_now(arc_reclaim_strategy_t
 			kmem_cache_reap_now(zio_data_buf_cache[i]);
 		}
 	}
-#endif
 	kmem_cache_reap_now(buf_cache);
 	kmem_cache_reap_now(hdr_cache);
 }

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Mon May 24 10:09:36 2010	(r208487)
@@ -316,6 +316,11 @@ struct zio {
 
 	/* FMA state */
 	uint64_t	io_ena;
+
+#ifdef _KERNEL
+	/* FreeBSD only. */
+	struct ostask	io_task;
+#endif
 };
 
 extern zio_t *zio_null(zio_t *pio, spa_t *spa,

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Mon May 24 10:09:36 2010	(r208487)
@@ -47,31 +47,6 @@ struct g_class zfs_vdev_class = {
 
 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
 
-typedef struct vdev_geom_ctx {
-	struct g_consumer *gc_consumer;
-	int gc_state;
-	struct bio_queue_head gc_queue;
-	struct mtx gc_queue_mtx;
-} vdev_geom_ctx_t;
-
-static void
-vdev_geom_release(vdev_t *vd)
-{
-	vdev_geom_ctx_t *ctx;
-
-	ctx = vd->vdev_tsd;
-	vd->vdev_tsd = NULL;
-
-	mtx_lock(&ctx->gc_queue_mtx);
-	ctx->gc_state = 1;
-	wakeup_one(&ctx->gc_queue);
-	while (ctx->gc_state != 2)
-		msleep(&ctx->gc_state, &ctx->gc_queue_mtx, 0, "vgeom:w", 0);
-	mtx_unlock(&ctx->gc_queue_mtx);
-	mtx_destroy(&ctx->gc_queue_mtx);
-	kmem_free(ctx, sizeof(*ctx));
-}
-
 static void
 vdev_geom_orphan(struct g_consumer *cp)
 {
@@ -96,8 +71,7 @@ vdev_geom_orphan(struct g_consumer *cp)
 		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
 		g_wither_geom(gp, error);
 	}
-	vdev_geom_release(vd);
-
+	vd->vdev_tsd = NULL;
 	vd->vdev_remove_wanted = B_TRUE;
 	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
 }
@@ -188,52 +162,6 @@ vdev_geom_detach(void *arg, int flag __u
 	}
 }
 
-static void
-vdev_geom_worker(void *arg)
-{
-	vdev_geom_ctx_t *ctx;
-	zio_t *zio;
-	struct bio *bp;
-
-	thread_lock(curthread);
-	sched_prio(curthread, PRIBIO);
-	thread_unlock(curthread);
-
-	ctx = arg;
-	for (;;) {
-		mtx_lock(&ctx->gc_queue_mtx);
-		bp = bioq_takefirst(&ctx->gc_queue);
-		if (bp == NULL) {
-			if (ctx->gc_state == 1) {
-				ctx->gc_state = 2;
-				wakeup_one(&ctx->gc_state);
-				mtx_unlock(&ctx->gc_queue_mtx);
-				kthread_exit();
-			}
-			msleep(&ctx->gc_queue, &ctx->gc_queue_mtx,
-			    PRIBIO | PDROP, "vgeom:io", 0);
-			continue;
-		}
-		mtx_unlock(&ctx->gc_queue_mtx);
-		zio = bp->bio_caller1;
-		zio->io_error = bp->bio_error;
-		if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
-			vdev_t *vd;
-
-			/*
-			 * If we get ENOTSUP, we know that no future
-			 * attempts will ever succeed.  In this case we
-			 * set a persistent bit so that we don't bother
-			 * with the ioctl in the future.
-			 */
-			vd = zio->io_vd;
-			vd->vdev_nowritecache = B_TRUE;
-		}
-		g_destroy_bio(bp);
-		zio_interrupt(zio);
-	}
-}
-
 static uint64_t
 nvlist_get_guid(nvlist_t *list)
 {
@@ -396,7 +324,7 @@ vdev_geom_attach_by_guid_event(void *arg
 					continue;
 				ap->cp = vdev_geom_attach(pp);
 				if (ap->cp == NULL) {
-					printf("ZFS WARNING: Unable to attach to %s.",
+					printf("ZFS WARNING: Unable to attach to %s.\n",
 					    pp->name);
 					continue;
 				}
@@ -488,7 +416,6 @@ vdev_geom_open_by_path(vdev_t *vd, int c
 static int
 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
 {
-	vdev_geom_ctx_t *ctx;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	int error, owned;
@@ -530,10 +457,19 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 		ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
 		error = ENOENT;
 	} else if (cp->acw == 0 && (spa_mode & FWRITE) != 0) {
+		int i;
+
 		g_topology_lock();
-		error = g_access(cp, 0, 1, 0);
+		for (i = 0; i < 5; i++) {
+			error = g_access(cp, 0, 1, 0);
+			if (error == 0)
+				break;
+			g_topology_unlock();
+			tsleep(vd, 0, "vdev", hz / 2);
+			g_topology_lock();
+		}
 		if (error != 0) {
-			printf("ZFS WARNING: Unable to open %s for writing (error=%d).",
+			printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
 			    vd->vdev_path, error);
 			vdev_geom_detach(cp, 0);
 			cp = NULL;
@@ -548,19 +484,9 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 	}
 
 	cp->private = vd;
-
-	ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP);
-	bioq_init(&ctx->gc_queue);
-	mtx_init(&ctx->gc_queue_mtx, "zfs:vdev:geom:queue", NULL, MTX_DEF);
-	ctx->gc_consumer = cp;
-	ctx->gc_state = 0;
-
-	vd->vdev_tsd = ctx;
+	vd->vdev_tsd = cp;
 	pp = cp->provider;
 
-	kproc_kthread_add(vdev_geom_worker, ctx, &zfsproc, NULL, 0, 0,
-	    "zfskern", "vdev %s", pp->name);
-
 	/*
 	 * Determine the actual size of the device.
 	 */
@@ -583,50 +509,49 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 static void
 vdev_geom_close(vdev_t *vd)
 {
-	vdev_geom_ctx_t *ctx;
 	struct g_consumer *cp;
 
-	if ((ctx = vd->vdev_tsd) == NULL)
+	cp = vd->vdev_tsd;
+	if (cp == NULL)
 		return;
-	if ((cp = ctx->gc_consumer) == NULL)
-		return;
-	vdev_geom_release(vd);
+	vd->vdev_tsd = NULL;
 	g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
 }
 
 static void
 vdev_geom_io_intr(struct bio *bp)
 {
-	vdev_geom_ctx_t *ctx;
 	zio_t *zio;
 
 	zio = bp->bio_caller1;
-	ctx = zio->io_vd->vdev_tsd;
-
-	if ((zio->io_error = bp->bio_error) == 0 && bp->bio_resid != 0)
+	zio->io_error = bp->bio_error;
+	if (zio->io_error == 0 && bp->bio_resid != 0)
 		zio->io_error = EIO;
+	if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
+		vdev_t *vd;
 
-	mtx_lock(&ctx->gc_queue_mtx);
-	bioq_insert_tail(&ctx->gc_queue, bp);
-	wakeup_one(&ctx->gc_queue);
-	mtx_unlock(&ctx->gc_queue_mtx);
+		/*
+		 * If we get ENOTSUP, we know that no future
+		 * attempts will ever succeed.  In this case we
+		 * set a persistent bit so that we don't bother
+		 * with the ioctl in the future.
+		 */
+		vd = zio->io_vd;
+		vd->vdev_nowritecache = B_TRUE;
+	}
+	g_destroy_bio(bp);
+	zio_interrupt(zio);
 }
 
 static int
 vdev_geom_io_start(zio_t *zio)
 {
 	vdev_t *vd;
-	vdev_geom_ctx_t *ctx;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error;
 
-	cp = NULL;
-
 	vd = zio->io_vd;
-	ctx = vd->vdev_tsd;
-	if (ctx != NULL)
-		cp = ctx->gc_consumer;
 
 	if (zio->io_type == ZIO_TYPE_IOCTL) {
 		/* XXPOLICY */
@@ -655,6 +580,7 @@ vdev_geom_io_start(zio_t *zio)
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 sendreq:
+	cp = vd->vdev_tsd;
 	if (cp == NULL) {
 		zio->io_error = ENXIO;
 		return (ZIO_PIPELINE_CONTINUE);

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	Mon May 24 10:09:36 2010	(r208487)
@@ -33,6 +33,17 @@
 #include <sys/zio_compress.h>
 #include <sys/zio_checksum.h>
 
+#if defined(__amd64__)
+static int zio_use_uma = 1;
+#else
+static int zio_use_uma = 0;
+#endif
+SYSCTL_DECL(_vfs_zfs);
+SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
+TUNABLE_INT("vfs.zfs.zio.use_uma", &zio_use_uma);
+SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, use_uma, CTLFLAG_RDTUN, &zio_use_uma, 0,
+    "Use uma(9) for ZIO allocations");
+
 /*
  * ==========================================================================
  * I/O priority table
@@ -69,10 +80,8 @@ char *zio_type_name[ZIO_TYPES] = {
  * ==========================================================================
  */
 kmem_cache_t *zio_cache;
-#ifdef ZIO_USE_UMA
 kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
 kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
-#endif
 
 #ifdef _KERNEL
 extern vmem_t *zio_alloc_arena;
@@ -88,13 +97,10 @@ extern vmem_t *zio_alloc_arena;
 void
 zio_init(void)
 {
-#ifdef ZIO_USE_UMA
 	size_t c;
-#endif
 	zio_cache = kmem_cache_create("zio_cache", sizeof (zio_t), 0,
 	    NULL, NULL, NULL, NULL, NULL, 0);
 
-#ifdef ZIO_USE_UMA
 	/*
 	 * For small buffers, we want a cache for each multiple of
 	 * SPA_MINBLOCKSIZE.  For medium-size buffers, we want a cache
@@ -138,7 +144,6 @@ zio_init(void)
 		if (zio_data_buf_cache[c - 1] == NULL)
 			zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
 	}
-#endif
 
 	zio_inject_init();
 }
@@ -146,7 +151,6 @@ zio_init(void)
 void
 zio_fini(void)
 {
-#ifdef ZIO_USE_UMA
 	size_t c;
 	kmem_cache_t *last_cache = NULL;
 	kmem_cache_t *last_data_cache = NULL;
@@ -164,7 +168,6 @@ zio_fini(void)
 		}
 		zio_data_buf_cache[c] = NULL;
 	}
-#endif
 
 	kmem_cache_destroy(zio_cache);
 
@@ -186,15 +189,14 @@ zio_fini(void)
 void *
 zio_buf_alloc(size_t size)
 {
-#ifdef ZIO_USE_UMA
 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
 
 	ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
 
-	return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
-#else
-	return (kmem_alloc(size, KM_SLEEP));
-#endif
+	if (zio_use_uma)
+		return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
+	else
+		return (kmem_alloc(size, KM_SLEEP));
 }
 
 /*
@@ -206,43 +208,40 @@ zio_buf_alloc(size_t size)
 void *
 zio_data_buf_alloc(size_t size)
 {
-#ifdef ZIO_USE_UMA
 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
 
 	ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
 
-	return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
-#else
-	return (kmem_alloc(size, KM_SLEEP));
-#endif
+	if (zio_use_uma)
+		return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
+	else
+		return (kmem_alloc(size, KM_SLEEP));
 }
 
 void
 zio_buf_free(void *buf, size_t size)
 {
-#ifdef ZIO_USE_UMA
 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
 
 	ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
 
-	kmem_cache_free(zio_buf_cache[c], buf);
-#else
-	kmem_free(buf, size);
-#endif
+	if (zio_use_uma)
+		kmem_cache_free(zio_buf_cache[c], buf);
+	else
+		kmem_free(buf, size);
 }
 
 void
 zio_data_buf_free(void *buf, size_t size)
 {
-#ifdef ZIO_USE_UMA
 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
 
 	ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
 
-	kmem_cache_free(zio_data_buf_cache[c], buf);
-#else
-	kmem_free(buf, size);
-#endif
+	if (zio_use_uma)
+		kmem_cache_free(zio_data_buf_cache[c], buf);
+	else
+		kmem_free(buf, size);
 }
 
 /*
@@ -908,8 +907,8 @@ zio_taskq_dispatch(zio_t *zio, enum zio_
 	if (t == ZIO_TYPE_WRITE && zio->io_vd && zio->io_vd->vdev_aux)
 		t = ZIO_TYPE_NULL;
 
-	(void) taskq_dispatch(zio->io_spa->spa_zio_taskq[t][q],
-	    (task_func_t *)zio_execute, zio, TQ_SLEEP);
+	(void) taskq_dispatch_safe(zio->io_spa->spa_zio_taskq[t][q],
+	    (task_func_t *)zio_execute, zio, &zio->io_task);
 }
 
 static boolean_t
@@ -2220,9 +2219,9 @@ zio_done(zio_t *zio)
 			 * Reexecution is potentially a huge amount of work.
 			 * Hand it off to the otherwise-unused claim taskq.
 			 */
-			(void) taskq_dispatch(
+			(void) taskq_dispatch_safe(
 			    spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
-			    (task_func_t *)zio_reexecute, zio, TQ_SLEEP);
+			    (task_func_t *)zio_reexecute, zio, &zio->io_task);
 		}
 		return (ZIO_PIPELINE_STOP);
 	}

Modified: stable/8/sys/kern/vfs_subr.c
==============================================================================
--- stable/8/sys/kern/vfs_subr.c	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/kern/vfs_subr.c	Mon May 24 10:09:36 2010	(r208487)
@@ -800,7 +800,6 @@ vnlru_proc(void)
 		}
 		mtx_unlock(&mountlist_mtx);
 		if (done == 0) {
-			EVENTHANDLER_INVOKE(vfs_lowvnodes, desiredvnodes / 10);
 #if 0
 			/* These messages are temporary debugging aids */
 			if (vnlru_nowhere < 5)
@@ -822,6 +821,19 @@ static struct kproc_desc vnlru_kp = {
 };
 SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start,
     &vnlru_kp);
+ 
+static void
+vfs_lowmem(void *arg __unused)
+{
+
+	/*
+	 * On low memory condition free 1/8th of the free vnodes.
+	 */
+	mtx_lock(&vnode_free_list_mtx);
+	vnlru_free(freevnodes / 8);
+	mtx_unlock(&vnode_free_list_mtx);
+}
+EVENTHANDLER_DEFINE(vm_lowmem, vfs_lowmem, NULL, 0);
 
 /*
  * Routines having to do with the management of the vnode table.

Modified: stable/8/sys/modules/zfs/Makefile
==============================================================================
--- stable/8/sys/modules/zfs/Makefile	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/modules/zfs/Makefile	Mon May 24 10:09:36 2010	(r208487)
@@ -63,9 +63,6 @@ ZFS_SRCS=	${ZFS_OBJS:C/.o$/.c/}
 SRCS+=	${ZFS_SRCS}
 SRCS+=	vdev_geom.c
 
-# Use UMA for ZIO allocation.
-CFLAGS+=-DZIO_USE_UMA
-
 # Use FreeBSD's namecache.
 CFLAGS+=-DFREEBSD_NAMECACHE
 

Modified: stable/8/sys/sys/eventhandler.h
==============================================================================
--- stable/8/sys/sys/eventhandler.h	Mon May 24 07:04:00 2010	(r208486)
+++ stable/8/sys/sys/eventhandler.h	Mon May 24 10:09:36 2010	(r208487)
@@ -183,10 +183,6 @@ typedef void (*vm_lowmem_handler_t)(void
 #define	LOWMEM_PRI_DEFAULT	EVENTHANDLER_PRI_FIRST
 EVENTHANDLER_DECLARE(vm_lowmem, vm_lowmem_handler_t);
 
-/* Low vnodes event */
-typedef void (*vfs_lowvnodes_handler_t)(void *, int);
-EVENTHANDLER_DECLARE(vfs_lowvnodes, vfs_lowvnodes_handler_t);
-
 /* Root mounted event */
 typedef void (*mountroot_handler_t)(void *);
 EVENTHANDLER_DECLARE(mountroot, mountroot_handler_t);


More information about the svn-src-all mailing list