svn commit: r252765 - in stable/8: cddl/contrib/opensolaris/cmd/zfs cddl/contrib/opensolaris/cmd/zhack cddl/contrib/opensolaris/cmd/ztest cddl/contrib/opensolaris/lib/libzfs/common cddl/contrib/ope...

Xin LI delphij at FreeBSD.org
Fri Jul 5 04:11:37 UTC 2013


Author: delphij
Date: Fri Jul  5 04:11:34 2013
New Revision: 252765
URL: http://svnweb.freebsd.org/changeset/base/252765

Log:
  MFC r251646 + r252219:
  
  MFV r251644:
  
  Poor ZFS send / receive performance due to snapshot
  hold / release processing (by smh@)
  
  Illumos ZFS issues:
  3740 Poor ZFS send / receive performance due to snapshot
       hold / release processing
  
  MFV r252215:
  
  Restore a previous behavior before r251646, where when destructing
  ZFS snapshot, the ioctl would return ENOENT when it hit any of
  them in the errlist (the new behavior was only return ENOENT when
  all returns error).
  
  Illumos ZFS issues:
    3829 fix for 3740 changed behavior of zfs destroy/hold/release ioctl

Modified:
  stable/8/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
  stable/8/cddl/contrib/opensolaris/cmd/zhack/zhack.c
  stable/8/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
  stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
  stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
  stable/8/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_userhold.h
  stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
Directory Properties:
  stable/8/cddl/contrib/opensolaris/   (props changed)
  stable/8/cddl/contrib/opensolaris/cmd/zfs/   (props changed)
  stable/8/cddl/contrib/opensolaris/lib/libzfs/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/cddl/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)

Modified: stable/8/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -28,6 +28,7 @@
  * Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel at dawidek.net>.
  * All rights reserved.
  * Copyright (c) 2012 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland.  All rights reserved.
  */
 
 #include <assert.h>
@@ -5222,8 +5223,7 @@ zfs_do_hold_rele_impl(int argc, char **a
 			continue;
 		}
 		if (holding) {
-			if (zfs_hold(zhp, delim+1, tag, recursive,
-			    B_FALSE, -1) != 0)
+			if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0)
 				++errors;
 		} else {
 			if (zfs_release(zhp, delim+1, tag, recursive) != 0)

Modified: stable/8/cddl/contrib/opensolaris/cmd/zhack/zhack.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/cmd/zhack/zhack.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/cddl/contrib/opensolaris/cmd/zhack/zhack.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 /*
@@ -153,7 +154,7 @@ import_pool(const char *target, boolean_
 	g_importargs.poolname = g_pool;
 	pools = zpool_search_import(g_zfs, &g_importargs);
 
-	if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) {
+	if (nvlist_empty(pools)) {
 		if (!g_importargs.can_be_active) {
 			g_importargs.can_be_active = B_TRUE;
 			if (zpool_search_import(g_zfs, &g_importargs) != NULL ||

Modified: stable/8/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -23,6 +23,7 @@
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 Martin Matuska <mm at FreeBSD.org>.  All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 /*
@@ -4713,7 +4714,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, 
 
 	error = user_release_one(fullname, tag);
 	if (error)
-		fatal(0, "user_release_one(%s)", fullname, tag);
+		fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error);
 
 	VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
 

Modified: stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
==============================================================================
--- stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	Fri Jul  5 04:11:34 2013	(r252765)
@@ -27,6 +27,7 @@
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #ifndef	_LIBZFS_H
@@ -611,7 +612,8 @@ extern int zfs_send(zfs_handle_t *, cons
 
 extern int zfs_promote(zfs_handle_t *);
 extern int zfs_hold(zfs_handle_t *, const char *, const char *,
-    boolean_t, boolean_t, int);
+    boolean_t, int);
+extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *);
 extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
 extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
 extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);

Modified: stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_dataset.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -21,12 +21,13 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright (c) 2012 DEY Storage Systems, Inc.  All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel at dawidek.net>.
  * All rights reserved.
  * Copyright (c) 2012 Martin Matuska <mm at FreeBSD.org>. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <ctype.h>
@@ -3158,18 +3159,14 @@ static int
 zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct destroydata *dd = arg;
-	zfs_handle_t *szhp;
 	char name[ZFS_MAXNAMELEN];
 	int rv = 0;
 
 	(void) snprintf(name, sizeof (name),
 	    "%s@%s", zhp->zfs_name, dd->snapname);
 
-	szhp = make_dataset_handle(zhp->zfs_hdl, name);
-	if (szhp) {
+	if (lzc_exists(name))
 		verify(nvlist_add_boolean(dd->nvl, name) == 0);
-		zfs_close(szhp);
-	}
 
 	rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd);
 	zfs_close(zhp);
@@ -3189,7 +3186,7 @@ zfs_destroy_snaps(zfs_handle_t *zhp, cha
 	verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0);
 	(void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd);
 
-	if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) {
+	if (nvlist_empty(dd.nvl)) {
 		ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
 		    zhp->zfs_name, snapname);
@@ -3214,7 +3211,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *h
 	if (ret == 0)
 		return (0);
 
-	if (nvlist_next_nvpair(errlist, NULL) == NULL) {
+	if (nvlist_empty(errlist)) {
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
@@ -4162,24 +4159,21 @@ struct holdarg {
 	const char *snapname;
 	const char *tag;
 	boolean_t recursive;
+	int error;
 };
 
 static int
 zfs_hold_one(zfs_handle_t *zhp, void *arg)
 {
 	struct holdarg *ha = arg;
-	zfs_handle_t *szhp;
 	char name[ZFS_MAXNAMELEN];
 	int rv = 0;
 
 	(void) snprintf(name, sizeof (name),
 	    "%s@%s", zhp->zfs_name, ha->snapname);
 
-	szhp = make_dataset_handle(zhp->zfs_hdl, name);
-	if (szhp) {
+	if (lzc_exists(name))
 		fnvlist_add_string(ha->nvl, name, ha->tag);
-		zfs_close(szhp);
-	}
 
 	if (ha->recursive)
 		rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
@@ -4189,14 +4183,10 @@ zfs_hold_one(zfs_handle_t *zhp, void *ar
 
 int
 zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
-    boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
+    boolean_t recursive, int cleanup_fd)
 {
 	int ret;
 	struct holdarg ha;
-	nvlist_t *errors;
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	char errbuf[1024];
-	nvpair_t *elem;
 
 	ha.nvl = fnvlist_alloc();
 	ha.snapname = snapname;
@@ -4204,26 +4194,44 @@ zfs_hold(zfs_handle_t *zhp, const char *
 	ha.recursive = recursive;
 	(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
 
-	if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
+	if (nvlist_empty(ha.nvl)) {
+		char errbuf[1024];
+
 		fnvlist_free(ha.nvl);
 		ret = ENOENT;
-		if (!enoent_ok) {
-			(void) snprintf(errbuf, sizeof (errbuf),
-			    dgettext(TEXT_DOMAIN,
-			    "cannot hold snapshot '%s@%s'"),
-			    zhp->zfs_name, snapname);
-			(void) zfs_standard_error(hdl, ret, errbuf);
-		}
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN,
+		    "cannot hold snapshot '%s@%s'"),
+		    zhp->zfs_name, snapname);
+		(void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf);
 		return (ret);
 	}
 
-	ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
+	ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl);
 	fnvlist_free(ha.nvl);
 
-	if (ret == 0)
+	return (ret);
+}
+
+int
+zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
+{
+	int ret;
+	nvlist_t *errors;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+	nvpair_t *elem;
+
+	errors = NULL;
+	ret = lzc_hold(holds, cleanup_fd, &errors);
+
+	if (ret == 0) {
+		/* There may be errors even in the success case. */
+		fnvlist_free(errors);
 		return (0);
+	}
 
-	if (nvlist_next_nvpair(errors, NULL) == NULL) {
+	if (nvlist_empty(errors)) {
 		/* no hold-specific errors */
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot hold"));
@@ -4263,10 +4271,6 @@ zfs_hold(zfs_handle_t *zhp, const char *
 		case EEXIST:
 			(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
 			break;
-		case ENOENT:
-			if (enoent_ok)
-				return (ENOENT);
-			/* FALLTHROUGH */
 		default:
 			(void) zfs_standard_error(hdl,
 			    fnvpair_value_int32(elem), errbuf);
@@ -4277,30 +4281,26 @@ zfs_hold(zfs_handle_t *zhp, const char *
 	return (ret);
 }
 
-struct releasearg {
-	nvlist_t *nvl;
-	const char *snapname;
-	const char *tag;
-	boolean_t recursive;
-};
-
 static int
 zfs_release_one(zfs_handle_t *zhp, void *arg)
 {
 	struct holdarg *ha = arg;
-	zfs_handle_t *szhp;
 	char name[ZFS_MAXNAMELEN];
 	int rv = 0;
+	nvlist_t *existing_holds;
 
 	(void) snprintf(name, sizeof (name),
 	    "%s@%s", zhp->zfs_name, ha->snapname);
 
-	szhp = make_dataset_handle(zhp->zfs_hdl, name);
-	if (szhp) {
-		nvlist_t *holds = fnvlist_alloc();
-		fnvlist_add_boolean(holds, ha->tag);
-		fnvlist_add_nvlist(ha->nvl, name, holds);
-		zfs_close(szhp);
+	if (lzc_get_holds(name, &existing_holds) != 0) {
+		ha->error = ENOENT;
+	} else if (!nvlist_exists(existing_holds, ha->tag)) {
+		ha->error = ESRCH;
+	} else {
+		nvlist_t *torelease = fnvlist_alloc();
+		fnvlist_add_boolean(torelease, ha->tag);
+		fnvlist_add_nvlist(ha->nvl, name, torelease);
+		fnvlist_free(torelease);
 	}
 
 	if (ha->recursive)
@@ -4315,7 +4315,7 @@ zfs_release(zfs_handle_t *zhp, const cha
 {
 	int ret;
 	struct holdarg ha;
-	nvlist_t *errors;
+	nvlist_t *errors = NULL;
 	nvpair_t *elem;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[1024];
@@ -4324,26 +4324,34 @@ zfs_release(zfs_handle_t *zhp, const cha
 	ha.snapname = snapname;
 	ha.tag = tag;
 	ha.recursive = recursive;
+	ha.error = 0;
 	(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
 
-	if (nvlist_next_nvpair(ha.nvl, NULL) == NULL) {
+	if (nvlist_empty(ha.nvl)) {
 		fnvlist_free(ha.nvl);
-		ret = ENOENT;
+		ret = ha.error;
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "cannot release hold from snapshot '%s@%s'"),
 		    zhp->zfs_name, snapname);
-		(void) zfs_standard_error(hdl, ret, errbuf);
+		if (ret == ESRCH) {
+			(void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf);
+		} else {
+			(void) zfs_standard_error(hdl, ret, errbuf);
+		}
 		return (ret);
 	}
 
 	ret = lzc_release(ha.nvl, &errors);
 	fnvlist_free(ha.nvl);
 
-	if (ret == 0)
+	if (ret == 0) {
+		/* There may be errors even in the success case. */
+		fnvlist_free(errors);
 		return (0);
+	}
 
-	if (nvlist_next_nvpair(errors, NULL) == NULL) {
+	if (nvlist_empty(errors)) {
 		/* no hold-specific errors */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot release"));

Modified: stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -25,6 +25,7 @@
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel at dawidek.net>.
  * All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <assert.h>
@@ -802,6 +803,7 @@ typedef struct send_dump_data {
 	int outfd;
 	boolean_t err;
 	nvlist_t *fss;
+	nvlist_t *snapholds;
 	avl_tree_t *fsavl;
 	snapfilter_cb_t *filter_cb;
 	void *filter_cb_arg;
@@ -952,41 +954,19 @@ dump_ioctl(zfs_handle_t *zhp, const char
 	return (0);
 }
 
-static int
-hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
+static void
+gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
 {
-	zfs_handle_t *pzhp;
-	int error = 0;
-	char *thissnap;
-
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 
-	if (sdd->dryrun)
-		return (0);
-
 	/*
-	 * zfs_send() only opens a cleanup_fd for sends that need it,
+	 * zfs_send() only sets snapholds for sends that need them,
 	 * e.g. replication and doall.
 	 */
-	if (sdd->cleanup_fd == -1)
-		return (0);
-
-	thissnap = strchr(zhp->zfs_name, '@') + 1;
-	*(thissnap - 1) = '\0';
-	pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
-	*(thissnap - 1) = '@';
-
-	/*
-	 * It's OK if the parent no longer exists.  The send code will
-	 * handle that error.
-	 */
-	if (pzhp) {
-		error = zfs_hold(pzhp, thissnap, sdd->holdtag,
-		    B_FALSE, B_TRUE, sdd->cleanup_fd);
-		zfs_close(pzhp);
-	}
+	if (sdd->snapholds == NULL)
+		return;
 
-	return (error);
+	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 }
 
 static void *
@@ -1042,28 +1022,23 @@ dump_snapshot(zfs_handle_t *zhp, void *a
 	send_dump_data_t *sdd = arg;
 	progress_arg_t pa = { 0 };
 	pthread_t tid;
-
 	char *thissnap;
 	int err;
 	boolean_t isfromsnap, istosnap, fromorigin;
 	boolean_t exclude = B_FALSE;
 
+	err = 0;
 	thissnap = strchr(zhp->zfs_name, '@') + 1;
 	isfromsnap = (sdd->fromsnap != NULL &&
 	    strcmp(sdd->fromsnap, thissnap) == 0);
 
 	if (!sdd->seenfrom && isfromsnap) {
-		err = hold_for_send(zhp, sdd);
-		if (err == 0) {
-			sdd->seenfrom = B_TRUE;
-			(void) strcpy(sdd->prevsnap, thissnap);
-			sdd->prevsnap_obj = zfs_prop_get_int(zhp,
-			    ZFS_PROP_OBJSETID);
-		} else if (err == ENOENT) {
-			err = 0;
-		}
+		gather_holds(zhp, sdd);
+		sdd->seenfrom = B_TRUE;
+		(void) strcpy(sdd->prevsnap, thissnap);
+		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 		zfs_close(zhp);
-		return (err);
+		return (0);
 	}
 
 	if (sdd->seento || !sdd->seenfrom) {
@@ -1114,14 +1089,7 @@ dump_snapshot(zfs_handle_t *zhp, void *a
 		return (0);
 	}
 
-	err = hold_for_send(zhp, sdd);
-	if (err) {
-		if (err == ENOENT)
-			err = 0;
-		zfs_close(zhp);
-		return (err);
-	}
-
+	gather_holds(zhp, sdd);
 	fromorigin = sdd->prevsnap[0] == '\0' &&
 	    (sdd->fromorigin || sdd->replicate);
 
@@ -1389,7 +1357,7 @@ zfs_send(zfs_handle_t *zhp, const char *
 	avl_tree_t *fsavl = NULL;
 	static uint64_t holdseq;
 	int spa_version;
-	pthread_t tid;
+	pthread_t tid = 0;
 	int pipefd[2];
 	dedup_arg_t dda = { 0 };
 	int featureflags = 0;
@@ -1462,11 +1430,8 @@ zfs_send(zfs_handle_t *zhp, const char *
 				*debugnvp = hdrnv;
 			else
 				nvlist_free(hdrnv);
-			if (err) {
-				fsavl_destroy(fsavl);
-				nvlist_free(fss);
+			if (err)
 				goto stderr_out;
-			}
 		}
 
 		if (!flags->dryrun) {
@@ -1490,8 +1455,6 @@ zfs_send(zfs_handle_t *zhp, const char *
 			}
 			free(packbuf);
 			if (err == -1) {
-				fsavl_destroy(fsavl);
-				nvlist_free(fss);
 				err = errno;
 				goto stderr_out;
 			}
@@ -1502,8 +1465,6 @@ zfs_send(zfs_handle_t *zhp, const char *
 			drr.drr_u.drr_end.drr_checksum = zc;
 			err = write(outfd, &drr, sizeof (drr));
 			if (err == -1) {
-				fsavl_destroy(fsavl);
-				nvlist_free(fss);
 				err = errno;
 				goto stderr_out;
 			}
@@ -1515,7 +1476,7 @@ zfs_send(zfs_handle_t *zhp, const char *
 	/* dump each stream */
 	sdd.fromsnap = fromsnap;
 	sdd.tosnap = tosnap;
-	if (flags->dedup)
+	if (tid != 0)
 		sdd.outfd = pipefd[0];
 	else
 		sdd.outfd = outfd;
@@ -1552,36 +1513,71 @@ zfs_send(zfs_handle_t *zhp, const char *
 			err = errno;
 			goto stderr_out;
 		}
+		sdd.snapholds = fnvlist_alloc();
 	} else {
 		sdd.cleanup_fd = -1;
+		sdd.snapholds = NULL;
 	}
-	if (flags->verbose) {
+	if (flags->verbose || sdd.snapholds != NULL) {
 		/*
 		 * Do a verbose no-op dry run to get all the verbose output
-		 * before generating any data.  Then do a non-verbose real
-		 * run to generate the streams.
+		 * or to gather snapshot hold's before generating any data,
+		 * then do a non-verbose real run to generate the streams.
 		 */
 		sdd.dryrun = B_TRUE;
 		err = dump_filesystems(zhp, &sdd);
-		sdd.dryrun = flags->dryrun;
-		sdd.verbose = B_FALSE;
-		if (flags->parsable) {
-			(void) fprintf(stderr, "size\t%llu\n",
-			    (longlong_t)sdd.size);
-		} else {
-			char buf[16];
-			zfs_nicenum(sdd.size, buf, sizeof (buf));
-			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
-			    "total estimated size is %s\n"), buf);
+
+		if (err != 0)
+			goto stderr_out;
+
+		if (flags->verbose) {
+			if (flags->parsable) {
+				(void) fprintf(stderr, "size\t%llu\n",
+				    (longlong_t)sdd.size);
+			} else {
+				char buf[16];
+				zfs_nicenum(sdd.size, buf, sizeof (buf));
+				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+				    "total estimated size is %s\n"), buf);
+			}
+		}
+
+		/* Ensure no snaps found is treated as an error. */
+		if (!sdd.seento) {
+			err = ENOENT;
+			goto err_out;
 		}
+
+		/* Skip the second run if dryrun was requested. */
+		if (flags->dryrun)
+			goto err_out;
+
+		if (sdd.snapholds != NULL) {
+			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
+			if (err != 0)
+				goto stderr_out;
+
+			fnvlist_free(sdd.snapholds);
+			sdd.snapholds = NULL;
+		}
+
+		sdd.dryrun = B_FALSE;
+		sdd.verbose = B_FALSE;
 	}
+
 	err = dump_filesystems(zhp, &sdd);
 	fsavl_destroy(fsavl);
 	nvlist_free(fss);
 
-	if (flags->dedup) {
-		(void) close(pipefd[0]);
+	/* Ensure no snaps found is treated as an error. */
+	if (err == 0 && !sdd.seento)
+		err = ENOENT;
+
+	if (tid != 0) {
+		if (err != 0)
+			(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
+		(void) close(pipefd[0]);
 	}
 
 	if (sdd.cleanup_fd != -1) {
@@ -1609,9 +1605,13 @@ zfs_send(zfs_handle_t *zhp, const char *
 stderr_out:
 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
 err_out:
+	fsavl_destroy(fsavl);
+	nvlist_free(fss);
+	fnvlist_free(sdd.snapholds);
+
 	if (sdd.cleanup_fd != -1)
 		VERIFY(0 == close(sdd.cleanup_fd));
-	if (flags->dedup) {
+	if (tid != 0) {
 		(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
 		(void) close(pipefd[0]);

Modified: stable/8/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
==============================================================================
--- stable/8/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 /*
@@ -333,7 +334,6 @@ lzc_destroy_snaps(nvlist_t *snaps, boole
 	nvlist_free(args);
 
 	return (error);
-
 }
 
 int
@@ -393,11 +393,17 @@ lzc_exists(const char *dataset)
  * uncleanly, the holds will be released when the pool is next opened
  * or imported.
  *
- * The return value will be 0 if all holds were created. Otherwise the return
- * value will be the errno of a (unspecified) hold that failed, no holds will
- * be created, and the errlist will have an entry for each hold that
- * failed (name = snapshot).  The value in the errlist will be the error
- * code (int32).
+ * Holds for snapshots which don't exist will be skipped and have an entry
+ * added to errlist, but will not cause an overall failure.
+ *
+ * The return value will be 0 if all holds, for snapshots that existed,
+ * were succesfully created.
+ *
+ * Otherwise the return value will be the errno of a (unspecified) hold that
+ * failed and no holds will be created.
+ *
+ * In all cases the errlist will have an entry for each hold that failed
+ * (name = snapshot), with its value being the error code (int32).
  */
 int
 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
@@ -434,11 +440,17 @@ lzc_hold(nvlist_t *holds, int cleanup_fd
  * The snapshots must all be in the same pool.
  * The value is a nvlist whose keys are the holds to remove.
  *
- * The return value will be 0 if all holds were removed.
- * Otherwise the return value will be the errno of a (unspecified) release
- * that failed, no holds will be released, and the errlist will have an
- * entry for each snapshot that has failed releases (name = snapshot).
- * The value in the errlist will be the error code (int32) of a failed release.
+ * Holds which failed to release because they didn't exist will have an entry
+ * added to errlist, but will not cause an overall failure.
+ *
+ * The return value will be 0 if the nvl holds was empty or all holds that
+ * existed, were successfully removed.
+ *
+ * Otherwise the return value will be the errno of a (unspecified) hold that
+ * failed to release and no holds will be released.
+ *
+ * In all cases the errlist will have an entry for each hold that failed to
+ * to release.
  */
 int
 lzc_release(nvlist_t *holds, nvlist_t **errlist)

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -127,6 +128,7 @@ dsl_destroy_snapshot_check(void *arg, dm
 	pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
 	if (pair != NULL)
 		return (fnvpair_value_int32(pair));
+
 	return (0);
 }
 

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <sys/dsl_pool.h>
@@ -856,23 +857,34 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *
 	zap_cursor_t zc;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj = dp->dp_tmp_userrefs_obj;
+	nvlist_t *holds;
 
 	if (zapobj == 0)
 		return;
 	ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 
+	holds = fnvlist_alloc();
+
 	for (zap_cursor_init(&zc, mos, zapobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		char *htag;
-		uint64_t dsobj;
+		nvlist_t *tags;
 
 		htag = strchr(za.za_name, '-');
 		*htag = '\0';
 		++htag;
-		dsobj = strtonum(za.za_name, NULL);
-		dsl_dataset_user_release_tmp(dp, dsobj, htag);
+		if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) {
+			tags = fnvlist_alloc();
+			fnvlist_add_boolean(tags, htag);
+			fnvlist_add_nvlist(holds, za.za_name, tags);
+			fnvlist_free(tags);
+		} else {
+			fnvlist_add_boolean(tags, htag);
+		}
 	}
+	dsl_dataset_user_release_tmp(dp, holds);
+	fnvlist_free(holds);
 	zap_cursor_fini(&zc);
 }
 

Modified: stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c
==============================================================================
--- stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c	Fri Jul  5 04:08:45 2013	(r252764)
+++ stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c	Fri Jul  5 04:11:34 2013	(r252765)
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -37,6 +38,7 @@
 
 typedef struct dsl_dataset_user_hold_arg {
 	nvlist_t *dduha_holds;
+	nvlist_t *dduha_chkholds;
 	nvlist_t *dduha_errlist;
 	minor_t dduha_minor;
 } dsl_dataset_user_hold_arg_t;
@@ -53,25 +55,24 @@ dsl_dataset_user_hold_check_one(dsl_data
 	objset_t *mos = dp->dp_meta_objset;
 	int error = 0;
 
+	ASSERT(dsl_pool_config_held(dp));
+
 	if (strlen(htag) > MAXNAMELEN)
-		return (E2BIG);
+		return (SET_ERROR(E2BIG));
 	/* Tempholds have a more restricted length */
 	if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
-		return (E2BIG);
+		return (SET_ERROR(E2BIG));
 
 	/* tags must be unique (if ds already exists) */
-	if (ds != NULL) {
-		mutex_enter(&ds->ds_lock);
-		if (ds->ds_phys->ds_userrefs_obj != 0) {
-			uint64_t value;
-			error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
-			    htag, 8, 1, &value);
-			if (error == 0)
-				error = SET_ERROR(EEXIST);
-			else if (error == ENOENT)
-				error = 0;
-		}
-		mutex_exit(&ds->ds_lock);
+	if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
+		uint64_t value;
+
+		error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
+		    htag, 8, 1, &value);
+		if (error == 0)
+			error = SET_ERROR(EEXIST);
+		else if (error == ENOENT)
+			error = 0;
 	}
 
 	return (error);
@@ -82,52 +83,63 @@ dsl_dataset_user_hold_check(void *arg, d
 {
 	dsl_dataset_user_hold_arg_t *dduha = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
-	nvpair_t *pair;
-	int rv = 0;
 
 	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
 		return (SET_ERROR(ENOTSUP));
 
-	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
-		int error = 0;
+	if (!dmu_tx_is_syncing(tx))
+		return (0);
+
+	for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
+	    pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
 		dsl_dataset_t *ds;
-		char *htag;
+		int error = 0;
+		char *htag, *name;
 
 		/* must be a snapshot */
-		if (strchr(nvpair_name(pair), '@') == NULL)
+		name = nvpair_name(pair);
+		if (strchr(name, '@') == NULL)
 			error = SET_ERROR(EINVAL);
 
 		if (error == 0)
 			error = nvpair_value_string(pair, &htag);
-		if (error == 0) {
-			error = dsl_dataset_hold(dp,
-			    nvpair_name(pair), FTAG, &ds);
-		}
+
+		if (error == 0)
+			error = dsl_dataset_hold(dp, name, FTAG, &ds);
+
 		if (error == 0) {
 			error = dsl_dataset_user_hold_check_one(ds, htag,
 			    dduha->dduha_minor != 0, tx);
 			dsl_dataset_rele(ds, FTAG);
 		}
 
-		if (error != 0) {
-			rv = error;
-			fnvlist_add_int32(dduha->dduha_errlist,
-			    nvpair_name(pair), error);
+		if (error == 0) {
+			fnvlist_add_string(dduha->dduha_chkholds, name, htag);
+		} else {
+			/*
+			 * We register ENOENT errors so they can be correctly
+			 * reported if needed, such as when all holds fail.
+			 */
+			fnvlist_add_int32(dduha->dduha_errlist, name, error);
+			if (error != ENOENT)
+				return (error);
 		}
 	}
-	return (rv);
+
+	return (0);
 }
 
-void
-dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
-    minor_t minor, uint64_t now, dmu_tx_t *tx)
+
+static void
+dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
+    const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj;
 
-	mutex_enter(&ds->ds_lock);
+	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+
 	if (ds->ds_phys->ds_userrefs_obj == 0) {
 		/*
 		 * This is the first user hold for this dataset.  Create
@@ -140,14 +152,26 @@ dsl_dataset_user_hold_sync_one(dsl_datas
 		zapobj = ds->ds_phys->ds_userrefs_obj;
 	}
 	ds->ds_userrefs++;
-	mutex_exit(&ds->ds_lock);
 
 	VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 
 	if (minor != 0) {
+		char name[MAXNAMELEN];
+		nvlist_t *tags;
+
 		VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 		    htag, now, tx));
-		dsl_register_onexit_hold_cleanup(ds, htag, minor);
+		(void) snprintf(name, sizeof (name), "%llx",
+		    (u_longlong_t)ds->ds_object);
+
+		if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
+			tags = fnvlist_alloc();
+			fnvlist_add_boolean(tags, htag);
+			fnvlist_add_nvlist(tmpholds, name, tags);
+			fnvlist_free(tags);
+		} else {
+			fnvlist_add_boolean(tags, htag);
+		}
 	}
 
 	spa_history_log_internal_ds(ds, "hold", tx,
@@ -155,140 +179,292 @@ dsl_dataset_user_hold_sync_one(dsl_datas
 	    htag, minor != 0, ds->ds_userrefs);
 }
 
+typedef struct zfs_hold_cleanup_arg {
+	char zhca_spaname[MAXNAMELEN];
+	uint64_t zhca_spa_load_guid;
+	nvlist_t *zhca_holds;
+} zfs_hold_cleanup_arg_t;
+
+static void
+dsl_dataset_user_release_onexit(void *arg)
+{
+	zfs_hold_cleanup_arg_t *ca = arg;
+	spa_t *spa;
+	int error;
+
+	error = spa_open(ca->zhca_spaname, &spa, FTAG);
+	if (error != 0) {
+		zfs_dbgmsg("couldn't release holds on pool=%s "
+		    "because pool is no longer loaded",
+		    ca->zhca_spaname);
+		return;
+	}
+	if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
+		zfs_dbgmsg("couldn't release holds on pool=%s "
+		    "because pool is no longer loaded (guid doesn't match)",
+		    ca->zhca_spaname);
+		spa_close(spa, FTAG);
+		return;
+	}
+
+	(void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
+	fnvlist_free(ca->zhca_holds);
+	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
+	spa_close(spa, FTAG);
+}
+
+static void
+dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
+{
+	zfs_hold_cleanup_arg_t *ca;
+
+	if (minor == 0 || nvlist_empty(holds)) {
+		fnvlist_free(holds);
+		return;
+	}
+
+	ASSERT(spa != NULL);
+	ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
+
+	(void) strlcpy(ca->zhca_spaname, spa_name(spa),
+	    sizeof (ca->zhca_spaname));
+	ca->zhca_spa_load_guid = spa_load_guid(spa);
+	ca->zhca_holds = holds;
+	VERIFY0(zfs_onexit_add_cb(minor,
+	    dsl_dataset_user_release_onexit, ca, NULL));
+}
+
+void
+dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
+    minor_t minor, uint64_t now, dmu_tx_t *tx)
+{
+	nvlist_t *tmpholds;
+
+	if (minor != 0)
+		tmpholds = fnvlist_alloc();
+	else
+		tmpholds = NULL;
+	dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
+	dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
+}
+
 static void
 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_dataset_user_hold_arg_t *dduha = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
-	nvpair_t *pair;
+	nvlist_t *tmpholds;
 	uint64_t now = gethrestime_sec();
 
-	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
+	if (dduha->dduha_minor != 0)
+		tmpholds = fnvlist_alloc();
+	else
+		tmpholds = NULL;
+	for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
+	    pair != NULL;
+	    pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
 		dsl_dataset_t *ds;
+
 		VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
-		dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
-		    dduha->dduha_minor, now, tx);
+		dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
+		    fnvpair_value_string(pair), dduha->dduha_minor, now, tx);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-stable-8 mailing list