kern/170912: [zfs] unnecessarily setting DS_FLAG_INCONSISTENT on async destroyed datasets

Marcelo Araujo araujo at FreeBSD.org
Thu Aug 23 06:40:02 UTC 2012


>Number:         170912
>Category:       kern
>Synopsis:       [zfs] unnecessarily setting DS_FLAG_INCONSISTENT on async destroyed datasets
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Aug 23 06:40:01 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator:     Marcelo Araujo
>Release:        9-1-BETA1
>Organization:
FreeBSD
>Environment:
FreeBSD QnapAraujo 9.1-BETA1 FreeBSD 9.1-BETA1 #15: Wed Jul 11 08:36:49 PDT 2012     root at build9x64.pcbsd.org:/usr/obj/builds/amd64/pcbsd-build90/fbsd-source/9.0/sys/GENERIC  amd64

>Description:
Import the source to solve the issue: https://www.illumos.org/issues/3086

Code obtained on: Commit cd512c80fd75
>How-To-Repeat:

>Fix:


Patch attached with submission follows:

Index: cddl/contrib/opensolaris/cmd/ztest/ztest.c
===================================================================
--- cddl/contrib/opensolaris/cmd/ztest/ztest.c	(revision 239602)
+++ cddl/contrib/opensolaris/cmd/ztest/ztest.c	(working copy)
@@ -2225,6 +2225,7 @@
 {
 	objset_t *os = zd->zd_os;
 
+    VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0);
 	(void) rw_wrlock(&zd->zd_zilog_lock);
 
 	/* zfsvfs_teardown() */
@@ -2235,6 +2236,7 @@
 	zil_replay(os, zd, ztest_replay_vector);
 
 	(void) rw_unlock(&zd->zd_zilog_lock);
+    VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
 }
 
 /*
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c	(working copy)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -462,6 +462,39 @@
 }
 
 /*
+ * Called when we create in-memory log transactions so that we know
+ * to cleanup the itxs at the end of spa_sync().
+ */
+
+void
+zilog_dirty(zilog_t *zilog, uint64_t txg)
+{
+	dsl_pool_t *dp = zilog->zl_dmu_pool;
+	dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
+
+	if (dsl_dataset_is_snapshot(ds))
+		panic("dirtying snapshot!");
+
+	if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) {
+		/* up the hold count until we can be written out */
+		dmu_buf_add_ref(ds->ds_dbuf, zilog);
+	}
+}
+
+
+boolean_t
+zilog_is_dirty(zilog_t *zilog)
+{
+	dsl_pool_t *dp = zilog->zl_dmu_pool;
+
+	for (int t = 0; t < TXG_SIZE; t++) {
+		if (txg_list_member(&dp->dp_dirty_zilogs, zilog, t))
+			return (B_TRUE);
+	}
+	return (B_FALSE);
+}
+
+/*
  * Create an on-disk intent log.
  */
 static lwb_t *
@@ -577,14 +610,21 @@
 			kmem_cache_free(zil_lwb_cache, lwb);
 		}
 	} else if (!keep_first) {
-		(void) zil_parse(zilog, zil_free_log_block,
-		    zil_free_log_record, tx, zh->zh_claim_txg);
+		zil_destroy_sync(zilog, tx);
 	}
 	mutex_exit(&zilog->zl_lock);
 
 	dmu_tx_commit(tx);
 }
 
+void
+zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
+{
+	ASSERT(list_is_empty(&zilog->zl_lwb_list));
+	(void) zil_parse(zilog, zil_free_log_block,
+	    zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
+}
+
 int
 zil_claim(const char *osname, void *txarg)
 {
@@ -998,6 +1038,8 @@
 		return (NULL);
 
 	ASSERT(lwb->lwb_buf != NULL);
+	ASSERT(zilog_is_dirty(zilog) ||
+		spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
 
 	if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY)
 		dlen = P2ROUNDUP_TYPED(
@@ -1218,7 +1260,7 @@
 	if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME)
 		zil_async_to_sync(zilog, itx->itx_oid);
 
-	if (spa_freeze_txg(zilog->zl_spa) !=  UINT64_MAX)
+	if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX)
 		txg = ZILTEST_TXG;
 	else
 		txg = dmu_tx_get_txg(tx);
@@ -1269,6 +1311,7 @@
 	}
 
 	itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx);
+	zilog_dirty(zilog, txg);
 	mutex_exit(&itxg->itxg_lock);
 
 	/* Release the old itxs now we've dropped the lock */
@@ -1278,7 +1321,10 @@
 
 /*
  * If there are any in-memory intent log transactions which have now been
- * synced then start up a taskq to free them.
+ * synced then start up a taskq to free them. We should only do this after we
+ * have written out the uberblocks (i.e. txg has been comitted) so that
+ * don't inadvertently clean out in-memory log records that would be required
+ * by zil_commit().
  */
 void
 zil_clean(zilog_t *zilog, uint64_t synced_txg)
@@ -1746,6 +1792,7 @@
 	mutex_exit(&zilog->zl_lock);
 	if (txg)
 		txg_wait_synced(zilog->zl_dmu_pool, txg);
+	ASSERT(!zilog_is_dirty(zilog));
 
 	taskq_destroy(zilog->zl_clean_taskq);
 	zilog->zl_clean_taskq = NULL;
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c	(working copy)
@@ -42,6 +42,7 @@
 #include <sys/dsl_deadlist.h>
 #include <sys/bptree.h>
 #include <sys/zfeature.h>
+#include <sys/zil_impl.h>
 
 int zfs_no_write_throttle = 0;
 int zfs_write_limit_shift = 3;			/* 1/8th of physical memory */
@@ -111,12 +112,12 @@
 
 	txg_list_create(&dp->dp_dirty_datasets,
 	    offsetof(dsl_dataset_t, ds_dirty_link));
+    txg_list_create(&dp->dp_dirty_zilogs,
+            offsetof(zilog_t, zl_dirty_link));
 	txg_list_create(&dp->dp_dirty_dirs,
 	    offsetof(dsl_dir_t, dd_dirty_link));
 	txg_list_create(&dp->dp_sync_tasks,
 	    offsetof(dsl_sync_task_group_t, dstg_node));
-	list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t),
-	    offsetof(dsl_dataset_t, ds_synced_link));
 
 	mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
 
@@ -249,9 +250,9 @@
 		dmu_objset_evict(dp->dp_meta_objset);
 
 	txg_list_destroy(&dp->dp_dirty_datasets);
+    txg_list_destroy(&dp->dp_dirty_zilogs);
 	txg_list_destroy(&dp->dp_sync_tasks);
 	txg_list_destroy(&dp->dp_dirty_dirs);
-	list_destroy(&dp->dp_synced_datasets);
 
 	arc_flush(dp->dp_spa);
 	txg_fini(dp);
@@ -331,6 +332,21 @@
 	return (dp);
 }
 
+/*
+ * Account for the meta-objset space in its placeholder dsl_dir.
+ */
+void
+dsl_pool_mos_diduse_space(dsl_pool_t *dp,
+        int64_t used, int64_t comp, int64_t uncomp)
+{
+    ASSERT3U(comp, ==, uncomp); /* It's all metadata */
+    mutex_enter(&dp->dp_lock);
+    dp->dp_mos_used_delta += used;
+    dp->dp_mos_compressed_delta += comp;
+    dp->dp_mos_uncompressed_delta += uncomp;
+    mutex_exit(&dp->dp_lock);
+}
+
 static int
 deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
@@ -349,12 +365,15 @@
 	dmu_tx_t *tx;
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
-	dsl_sync_task_group_t *dstg;
 	objset_t *mos = dp->dp_meta_objset;
 	hrtime_t start, write_time;
 	uint64_t data_written;
 	int err;
+    list_t synced_datasets;
 
+    list_create(&synced_datasets, sizeof(dsl_dataset_t),
+            offsetof(dsl_dataset_t, ds_synced_link));
+
 	/*
 	 * We need to copy dp_space_towrite() before doing
 	 * dsl_sync_task_group_sync(), because
@@ -376,7 +395,7 @@
 		 * may sync newly-created datasets on pass 2.
 		 */
 		ASSERT(!list_link_active(&ds->ds_synced_link));
-		list_insert_tail(&dp->dp_synced_datasets, ds);
+        list_insert_tail(&synced_datasets, ds);
 		dsl_dataset_sync(ds, zio, tx);
 	}
 	DTRACE_PROBE(pool_sync__1setup);
@@ -386,15 +405,20 @@
 	ASSERT(err == 0);
 	DTRACE_PROBE(pool_sync__2rootzio);
 
-	for (ds = list_head(&dp->dp_synced_datasets); ds;
-	    ds = list_next(&dp->dp_synced_datasets, ds))
+    /*
+     * After the data blocks have been written (ensured by the zio_wait()
+     * above), update the user/group space accounting.
+     */
+    for (ds = list_head(&synced_datasets); ds;
+            ds = list_next(&synced_datasets, ds))
 		dmu_objset_do_userquota_updates(ds->ds_objset, tx);
 
 	/*
 	 * Sync the datasets again to push out the changes due to
 	 * userspace updates.  This must be done before we process the
-	 * sync tasks, because that could cause a snapshot of a dataset
-	 * whose ds_bp will be rewritten when we do this 2nd sync.
+     * sync tasks, so that any snapshots will have the correct
+     * user accounting information (and we won't get confused
+     * about which blocks are part of the snapshot).
 	 */
 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
@@ -405,30 +429,42 @@
 	err = zio_wait(zio);
 
 	/*
-	 * Move dead blocks from the pending deadlist to the on-disk
-	 * deadlist.
+     * Now that the datasets have been completely synced, we can
+     * clean up our in-memory structures accumulated while syncing:
+     *
+     * - move dead blocks from the pending deadlist to the on-disk deadlist
+     * - clean up zil records
+     * - release hold from dsl_dataset_dirty()
 	 */
-	for (ds = list_head(&dp->dp_synced_datasets); ds;
-	    ds = list_next(&dp->dp_synced_datasets, ds)) {
+    while (ds = list_remove_head(&synced_datasets)) {
+        objset_t *os = ds->ds_objset;
 		bplist_iterate(&ds->ds_pending_deadlist,
 		    deadlist_enqueue_cb, &ds->ds_deadlist, tx);
+        ASSERT(!dmu_objset_is_dirty(os, txg));
+        dmu_buf_rele(ds->ds_dbuf, ds);
 	}
 
-	while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) {
-		/*
-		 * No more sync tasks should have been added while we
-		 * were syncing.
-		 */
-		ASSERT(spa_sync_pass(dp->dp_spa) == 1);
-		dsl_sync_task_group_sync(dstg, tx);
-	}
-	DTRACE_PROBE(pool_sync__3task);
-
 	start = gethrtime();
 	while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg))
 		dsl_dir_sync(dd, tx);
 	write_time += gethrtime() - start;
 
+    /*
+     * The MOS's space is accounted for in the pool/$MOS
+     * (dp_mos_dir). We can't modify the mos while we're syncing
+     * it, so we remember the deltas and apply them here.
+     */
+    if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 ||
+            dp->dp_mos_uncompressed_delta != 0) {
+        dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD,
+                dp->dp_mos_used_delta,
+                dp->dp_mos_compressed_delta,
+                dp->dp_mos_uncompressed_delta, tx);
+        dp->dp_mos_used_delta = 0;
+        dp->dp_mos_compressed_delta = 0;
+        dp->dp_mos_uncompressed_delta = 0;
+    }
+
 	start = gethrtime();
 	if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
 	    list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
@@ -444,6 +480,29 @@
 	    hrtime_t, dp->dp_read_overhead);
 	write_time -= dp->dp_read_overhead;
 
+        /*
+         * If we modify a dataset in the same txg that we want to destroy it,
+         * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it.
+         * dsl_dir_destroy_check() will fail if there are unexpected holds.
+         * Therefore, we want to sync the MOS (thus syncing the dd_dbuf
+         * and clearing the hold on it) before we process the sync_tasks.
+         * The MOS data dirtied by the sync_tasks will be synced on the next
+         * pass.
+         */
+
+        DTRACE_PROBE(pool_sync__3task);
+        if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
+            dsl_sync_task_group_t *dstg;
+            /*
+             * No more sync tasks should have been added while we
+             * were syncing.
+             */
+             ASSERT(spa_sync_pass(dp->dp_spa) == 1);
+             while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
+                 dsl_sync_task_group_sync(dstg, tx);
+
+         }
+
 	dmu_tx_commit(tx);
 
 	dp->dp_space_towrite[txg & TXG_MASK] = 0;
@@ -492,15 +551,14 @@
 void
 dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
 {
+    zilog_t *zilog;
 	dsl_dataset_t *ds;
-	objset_t *os;
 
-	while (ds = list_head(&dp->dp_synced_datasets)) {
-		list_remove(&dp->dp_synced_datasets, ds);
-		os = ds->ds_objset;
-		zil_clean(os->os_zil, txg);
-		ASSERT(!dmu_objset_is_dirty(os, txg));
-		dmu_buf_rele(ds->ds_dbuf, ds);
+    while (zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg)) {
+        ds = dmu_objset_ds(zilog->zl_os);
+        zil_clean(zilog, txg);
+        ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg)); 
+        dmu_buf_rele(ds->ds_dbuf, zilog);
 	}
 	ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
 }
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c	(working copy)
@@ -229,7 +229,7 @@
 	}
 }
 
-/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
+/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
 int
 dsl_dir_namelen(dsl_dir_t *dd)
 {
@@ -593,8 +593,6 @@
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 
-	dmu_buf_will_dirty(dd->dd_dbuf, tx);
-
 	mutex_enter(&dd->dd_lock);
 	ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
 	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
@@ -951,8 +949,6 @@
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(type < DD_USED_NUM);
 
-	dsl_dir_dirty(dd, tx);
-
 	if (needlock)
 		mutex_enter(&dd->dd_lock);
 	accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
@@ -961,6 +957,7 @@
 	    dd->dd_phys->dd_compressed_bytes >= -compressed);
 	ASSERT(uncompressed >= 0 ||
 	    dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
+    dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	dd->dd_phys->dd_used_bytes += used;
 	dd->dd_phys->dd_uncompressed_bytes += uncompressed;
 	dd->dd_phys->dd_compressed_bytes += compressed;
@@ -1002,7 +999,6 @@
 	if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
 		return;
 
-	dsl_dir_dirty(dd, tx);
 	if (needlock)
 		mutex_enter(&dd->dd_lock);
 	ASSERT(delta > 0 ?
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h	(working copy)
@@ -21,6 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
+/* Copyright (c) 2012 by Delphix. All rights reserved. */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
@@ -395,6 +396,7 @@
     zil_replay_func_t *replay_func[TX_MAX_TYPE]);
 extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
 extern void	zil_destroy(zilog_t *zilog, boolean_t keep_first);
+extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);
 extern void	zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
 
 extern itx_t	*zil_itx_create(uint64_t txtype, size_t lrsize);
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_pool.h	(working copy)
@@ -82,7 +82,6 @@
 
 	/* No lock needed - sync context only */
 	blkptr_t dp_meta_rootbp;
-	list_t dp_synced_datasets;
 	hrtime_t dp_read_overhead;
 	uint64_t dp_throughput; /* bytes per millisec */
 	uint64_t dp_write_limit;
@@ -96,10 +95,14 @@
 	kmutex_t dp_lock;
 	uint64_t dp_space_towrite[TXG_SIZE];
 	uint64_t dp_tempreserved[TXG_SIZE];
+    uint64_t dp_mos_used_delta;
+    uint64_t dp_mos_compressed_delta;
+    uint64_t dp_mos_uncompressed_delta;
 
 	/* Has its own locking */
 	tx_state_t dp_tx;
 	txg_list_t dp_dirty_datasets;
+    txg_list_t dp_dirty_zilogs;
 	txg_list_t dp_dirty_dirs;
 	txg_list_t dp_sync_tasks;
 
@@ -139,6 +142,8 @@
 void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
 void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
 void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
+void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
+        int64_t used, int64_t comp, int64_t uncomp);
 
 taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
 
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h	(working copy)
@@ -22,6 +22,9 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #ifndef _SYS_TXG_H
 #define	_SYS_TXG_H
@@ -115,7 +118,7 @@
 
 extern void txg_list_create(txg_list_t *tl, size_t offset);
 extern void txg_list_destroy(txg_list_t *tl);
-extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
+extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
 extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
 extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
 extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil_impl.h	(working copy)
@@ -21,6 +21,9 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
+/*
+ * Copyright (c) 2012, Delphix. All rights reserved.
+ */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
@@ -130,6 +133,7 @@
 	zil_header_t	zl_old_header;	/* debugging aid */
 	uint_t		zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
 	uint_t		zl_prev_rotor;	/* rotor for zl_prev[] */
+    txg_node_t  zl_dirty_link;  /* rotected by dp_dirty_zilogs list */
 };
 
 typedef struct zil_bp_node {
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	(working copy)
@@ -1649,13 +1649,6 @@
 	dsl_dataset_t *ds = drc->drc_logical_ds;
 	int err, myerr;
 
-	/*
-	 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
-	 * expects it to have a ds_user_ptr (and zil), but clone_swap()
-	 * can close it.
-	 */
-	txg_wait_synced(ds->ds_dir->dd_pool, 0);
-
 	if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
 		err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
 		    drc->drc_force);
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c	(working copy)
@@ -22,6 +22,9 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 Martin Matuska <mm at FreeBSD.org>
  */
+/*
+ * Copyright (c) 2012, Delphix. All rights reserved.
+ */
 
 #include <sys/zfs_context.h>
 #include <sys/txg_impl.h>
@@ -596,7 +599,7 @@
 	mutex_destroy(&tl->tl_lock);
 }
 
-int
+boolean_t
 txg_list_empty(txg_list_t *tl, uint64_t txg)
 {
 	return (tl->tl_head[txg & TXG_MASK] == NULL);
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	(working copy)
@@ -1769,15 +1769,15 @@
 	dnode_init();
 	dbuf_init();
 	zfetch_init();
+	l2arc_init();
 	arc_init();
-	l2arc_init();
 }
 
 void
 dmu_fini(void)
 {
+	arc_fini();
 	l2arc_fini();
-	arc_fini();
 	zfetch_fini();
 	dbuf_fini();
 	dnode_fini();
Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
===================================================================
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c	(revision 239602)
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c	(working copy)
@@ -106,14 +106,8 @@
 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
 	ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
 	if (ds == NULL) {
-		/*
-		 * Account for the meta-objset space in its placeholder
-		 * dsl_dir.
-		 */
-		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
-		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
-		    used, compressed, uncompressed, tx);
-		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
+        dsl_pool_mos_diduse_space(tx->tx_pool,
+                used, compressed, uncompressed);
 		return;
 	}
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
@@ -155,9 +149,8 @@
 		 */
 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
 
-		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
-		    -used, -compressed, -uncompressed, tx);
-		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
+        dsl_pool_mos_diduse_space(tx->tx_pool,
+                -used, -compressed, -uncompressed);
 		return (used);
 	}
 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
@@ -1116,26 +1109,26 @@
 	dummy_ds.ds_dir = dd;
 	dummy_ds.ds_object = ds->ds_object;
 
-	/*
-	 * Check for errors and mark this ds as inconsistent, in
-	 * case we crash while freeing the objects.
-	 */
-	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
-	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
-	if (err)
-		goto out;
-
-	err = dmu_objset_from_ds(ds, &os);
-	if (err)
-		goto out;
-
-	/*
-	 * If async destruction is not enabled try to remove all objects
-	 * while in the open context so that there is less work to do in
-	 * the syncing context.
-	 */
 	if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
 	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
+        /*
+         * If async destruction is not enabled try to remove all objects
+         * while in the open context so that there is less work to do in
+         * the syncing context.
+         */
+        err = dsl_sync_task_do(dd->dd_pool,
+                dsl_dataset_destroy_begin_check,
+                dsl_dataset_destroy_begin_sync, ds, NULL, 0);
+        if (err)
+            goto out;
+
+        err = dmu_objset_from_ds(ds, &os);
+        if (err)
+            goto out;
+        /*
+         * Remove all objects while in the open context so that
+         * there is less work to do in the syncing context.
+         */
 		for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
 		    ds->ds_phys->ds_prev_snap_txg)) {
 			/*
@@ -1146,30 +1139,25 @@
 		}
 		if (err != ESRCH)
 			goto out;
-	}
 
-	/*
-	 * Only the ZIL knows how to free log blocks.
-	 */
-	zil_destroy(dmu_objset_zil(os), B_FALSE);
+        /*
+         * Sync out all in-flight IO.
+         */
+        txg_wait_synced(dd->dd_pool, 0);
 
-	/*
-	 * Sync out all in-flight IO.
-	 */
-	txg_wait_synced(dd->dd_pool, 0);
+        /*
+         * If we managed to free al the objects in open
+         * context, the user space accounting should be zero.
+         */
+        if (ds->ds_phys->ds_bp.blk_fill == 0 &&
+                dmu_objset_userused_enabled(os)) {
+                    uint64_t count;
 
-	/*
-	 * If we managed to free all the objects in open
-	 * context, the user space accounting should be zero.
-	 */
-	if (ds->ds_phys->ds_bp.blk_fill == 0 &&
-	    dmu_objset_userused_enabled(os)) {
-		uint64_t count;
-
-		ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
-		    count == 0);
-		ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
-		    count == 0);
+                    ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
+                                &count) != 0 || count == 0);
+                    ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
+                                &count) != 0 || count == 0);
+        }
 	}
 
 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
@@ -1906,6 +1894,7 @@
 	} else {
 		zfeature_info_t *async_destroy =
 		    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
+        objset_t *os;
 
 		/*
 		 * There's no next snapshot, so this is a head dataset.
@@ -1917,6 +1906,8 @@
 		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
 		ds->ds_phys->ds_deadlist_obj = 0;
 
+        VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
+
 		if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
 			err = old_synchronous_dataset_destroy(ds, tx);
 		} else {
@@ -1926,12 +1917,12 @@
 			 */
 			uint64_t used, comp, uncomp;
 
-			ASSERT(err == 0 || err == EBUSY);
+            zil_destroy_sync(dmu_objset_zil(os), tx);
+
 			if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
 				spa_feature_incr(dp->dp_spa, async_destroy, tx);
-				dp->dp_bptree_obj = bptree_alloc(
-				    dp->dp_meta_objset, tx);
-				VERIFY(zap_add(dp->dp_meta_objset,
+                    dp->dp_bptree_obj = bptree_alloc(mos, tx);
+                    VERIFY(zap_add(mos,
 				    DMU_POOL_DIRECTORY_OBJECT,
 				    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 				    &dp->dp_bptree_obj, tx) == 0);
@@ -1944,7 +1935,7 @@
 			ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 			    ds->ds_phys->ds_unique_bytes == used);
 
-			bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
+            bptree_add(mos, dp->dp_bptree_obj,
 			    &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
 			    used, comp, uncomp, tx);
 			dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
@@ -2233,7 +2224,6 @@
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
 
-	dsl_dir_dirty(ds->ds_dir, tx);
 	dmu_objset_sync(ds->ds_objset, zio, tx);
 }
 


>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list