git: 9898f936aa69 - stable/13 - Fix scn_queue races on very old pools

From: Alexander Motin <mav_at_FreeBSD.org>
Date: Thu, 23 May 2024 17:45:40 UTC
The branch stable/13 has been updated by mav:

URL: https://cgit.FreeBSD.org/src/commit/?id=9898f936aa69d1b67bcd83d189acb6013f76bd43

commit 9898f936aa69d1b67bcd83d189acb6013f76bd43
Author:     Alexander Motin <mav@FreeBSD.org>
AuthorDate: 2024-05-23 16:20:37 +0000
Commit:     Alexander Motin <mav@FreeBSD.org>
CommitDate: 2024-05-23 17:43:02 +0000

    Fix scn_queue races on very old pools
    
    Code for pools before version 11 uses dmu_objset_find_dp() to scan
    for children datasets/clones.  It calls enqueue_clones_cb() and
    enqueue_cb() callbacks in parallel from multiple taskq threads.
    It ends up bad for scan_ds_queue_insert(), corrupting scn_queue
    AVL-tree.  Fix it by introducing a mutex to protect those two
    scan_ds_queue_insert() calls.  All other calls are done from the
    sync thread and so serialized.
    
    Reviewed-by:    Brian Behlendorf <behlendorf1@llnl.gov>
    Reviewed-by:    Brian Atkinson <batkinson@lanl.gov>
    Signed-off-by:  Alexander Motin <mav@FreeBSD.org>
    Sponsored by:   iXsystems, Inc.
    Closes  #16162
    PR:     278414
    
    (cherry picked from commit 49086aa35d987b78dbc3c9ec94814fe338e07164)
---
 sys/contrib/openzfs/include/sys/dsl_scan.h | 1 +
 sys/contrib/openzfs/module/zfs/dsl_scan.c  | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/sys/contrib/openzfs/include/sys/dsl_scan.h b/sys/contrib/openzfs/include/sys/dsl_scan.h
index d716510f879d..f2647a936c94 100644
--- a/sys/contrib/openzfs/include/sys/dsl_scan.h
+++ b/sys/contrib/openzfs/include/sys/dsl_scan.h
@@ -155,6 +155,7 @@ typedef struct dsl_scan {
 	dsl_scan_phys_t scn_phys;	/* on disk representation of scan */
 	dsl_scan_phys_t scn_phys_cached;
 	avl_tree_t scn_queue;		/* queue of datasets to scan */
+	kmutex_t scn_queue_lock;	/* serializes scn_queue inserts */
 	uint64_t scn_queues_pending;	/* outstanding data to issue */
 } dsl_scan_t;
 
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
index f0a851ff53a9..6979f08cc9ff 100644
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -469,6 +469,7 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
 
 	avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
 	    offsetof(scan_ds_t, sds_node));
+	mutex_init(&scn->scn_queue_lock, NULL, MUTEX_DEFAULT, NULL);
 	avl_create(&scn->scn_prefetch_queue, scan_prefetch_queue_compare,
 	    sizeof (scan_prefetch_issue_ctx_t),
 	    offsetof(scan_prefetch_issue_ctx_t, spic_avl_node));
@@ -611,6 +612,7 @@ dsl_scan_fini(dsl_pool_t *dp)
 
 		scan_ds_queue_clear(scn);
 		avl_destroy(&scn->scn_queue);
+		mutex_destroy(&scn->scn_queue_lock);
 		scan_ds_prefetch_queue_clear(scn);
 		avl_destroy(&scn->scn_prefetch_queue);
 
@@ -2402,8 +2404,10 @@ enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 			return (err);
 		ds = prev;
 	}
+	mutex_enter(&scn->scn_queue_lock);
 	scan_ds_queue_insert(scn, ds->ds_object,
 	    dsl_dataset_phys(ds)->ds_prev_snap_txg);
+	mutex_exit(&scn->scn_queue_lock);
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
@@ -2593,8 +2597,10 @@ enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
 		ds = prev;
 	}
 
+	mutex_enter(&scn->scn_queue_lock);
 	scan_ds_queue_insert(scn, ds->ds_object,
 	    dsl_dataset_phys(ds)->ds_prev_snap_txg);
+	mutex_exit(&scn->scn_queue_lock);
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }