git: 6b04e2bedd33 - releng/14.0 - zfs: merge openzfs/zfs@2407f30bd (zfs-2.2-release) into stable/14
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 14 Oct 2023 22:35:40 UTC
The branch releng/14.0 has been updated by mm: URL: https://cgit.FreeBSD.org/src/commit/?id=6b04e2bedd33f96037f4a44594d129608ad5e87c commit 6b04e2bedd33f96037f4a44594d129608ad5e87c Author: Martin Matuska <mm@FreeBSD.org> AuthorDate: 2023-10-08 07:58:51 +0000 Commit: Martin Matuska <mm@FreeBSD.org> CommitDate: 2023-10-14 20:52:07 +0000 zfs: merge openzfs/zfs@2407f30bd (zfs-2.2-release) into stable/14 Notable upstream pull request merges: #15290 33d7c2d16 import: require force when cachefile hostid doesn't match on-disk #15319 bcd010d3a Reduce number of metaslab preload taskq threads #15339 1611b8e56 Add BTI landing pads to the AArch64 SHA2 assembly #15340 bc77a0c85 ARC: Remove b_cv from struct l1arc_buf_hdr #15347 3158b5d71 ARC: Drop different size headers for crypto #15350 ba7797c8d ARC: Remove b_bufcnt/b_ebufcnt from ARC headers #15353 9be8ddfb3 ZIL: Reduce maximum size of WR_COPIED to 7.5K #15362 8495536f7 zfsconcepts: add description of block cloning Obtained from: OpenZFS OpenZFS commit: 2407f30bda96f7d61a32fc38c638b3eb5b216284 OpenZFS tag: zfs-2.2.0-rc5 Approved by: re (gjb) (cherry picked from commit fdc38bc6cd28a56fbc82d6ca1d99f47569070b3a) --- sys/contrib/openzfs/.cirrus.yml | 21 ++ sys/contrib/openzfs/.gitignore | 2 +- sys/contrib/openzfs/META | 2 +- sys/contrib/openzfs/cmd/zpool/zpool_main.c | 23 +- sys/contrib/openzfs/config/zfs-build.m4 | 3 + .../openzfs/include/os/linux/zfs/sys/trace_arc.h | 12 +- sys/contrib/openzfs/include/sys/arc_impl.h | 10 +- sys/contrib/openzfs/include/sys/metaslab_impl.h | 1 - sys/contrib/openzfs/include/sys/spa_impl.h | 4 +- sys/contrib/openzfs/man/man4/zfs.4 | 11 + sys/contrib/openzfs/man/man7/zfsconcepts.7 | 40 ++- .../module/icp/asm-aarch64/sha2/sha256-armv8.S | 3 + .../module/icp/asm-aarch64/sha2/sha512-armv8.S | 2 + .../openzfs/module/os/freebsd/zfs/sysctl_os.c | 22 -- sys/contrib/openzfs/module/zfs/arc.c | 322 +++++---------------- sys/contrib/openzfs/module/zfs/metaslab.c | 23 +- sys/contrib/openzfs/module/zfs/spa.c | 46 ++- sys/contrib/openzfs/module/zfs/zil.c | 17 +- sys/contrib/openzfs/rpm/generic/zfs.spec.in | 2 +- sys/contrib/openzfs/tests/runfiles/common.run | 4 + .../openzfs/tests/zfs-tests/tests/Makefile.am | 4 + ...ock_cloning_copyfilerange_fallback_same_txg.ksh | 2 + .../cli_root/zpool_import/zpool_import.cfg | 5 + .../cli_root/zpool_import/zpool_import.kshlib | 1 + .../zpool_import/zpool_import_hostid_changed.ksh | 59 ++++ .../zpool_import_hostid_changed_cachefile.ksh | 65 +++++ ...ort_hostid_changed_cachefile_unclean_export.ksh | 75 +++++ .../zpool_import_hostid_changed_unclean_export.ksh | 70 +++++ sys/modules/zfs/zfs_config.h | 4 +- sys/modules/zfs/zfs_gitrev.h | 2 +- 30 files changed, 521 insertions(+), 336 deletions(-) diff --git a/sys/contrib/openzfs/.cirrus.yml b/sys/contrib/openzfs/.cirrus.yml new file mode 100644 index 000000000000..18b292289e20 --- /dev/null +++ b/sys/contrib/openzfs/.cirrus.yml @@ -0,0 +1,21 @@ +env: + CIRRUS_CLONE_DEPTH: 1 + ARCH: amd64 + +build_task: + matrix: + freebsd_instance: + image_family: freebsd-12-4 + freebsd_instance: + image_family: freebsd-13-2 + freebsd_instance: + image_family: freebsd-14-0-snap + prepare_script: + - pkg install -y autoconf automake libtool gettext-runtime gmake ksh93 py39-packaging py39-cffi py39-sysctl + configure_script: + - env MAKE=gmake ./autogen.sh + - env MAKE=gmake ./configure --with-config="user" --with-python=3.9 + build_script: + - gmake -j `sysctl -n kern.smp.cpus` + install_script: + - gmake install diff --git a/sys/contrib/openzfs/.gitignore b/sys/contrib/openzfs/.gitignore index 8d91dd9466c5..1ef47d921c28 100644 --- a/sys/contrib/openzfs/.gitignore +++ b/sys/contrib/openzfs/.gitignore @@ -42,6 +42,7 @@ !udev/** !.editorconfig +!.cirrus.yml !.gitignore !.gitmodules !AUTHORS @@ -60,7 +61,6 @@ !TEST !zfs.release.in - # # Normal rules # diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 9ffe90458dbd..4178f1b5daa4 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -2,7 +2,7 @@ Meta: 1 Name: zfs Branch: 1.0 Version: 2.2.0 -Release: rc4 +Release: rc5 Release-Tags: relext License: CDDL Author: OpenZFS diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index d64fdfa5ba4c..5507f9d3fd67 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -3122,12 +3122,21 @@ zfs_force_import_required(nvlist_t *config) nvlist_t *nvinfo; state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE); - (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + + /* + * The hostid on LOAD_INFO comes from the MOS label via + * spa_tryimport(). If its not there then we're likely talking to an + * older kernel, so use the top one, which will be from the label + * discovered in zpool_find_import(), or if a cachefile is in use, the + * local hostid. + */ + if (nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_HOSTID, &hostid) != 0) + nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid()) return (B_TRUE); - nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) { mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_STATE); @@ -3198,7 +3207,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, time_t timestamp = 0; uint64_t hostid = 0; - if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME)) + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTNAME)) + hostname = fnvlist_lookup_string(nvinfo, + ZPOOL_CONFIG_HOSTNAME); + else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME)) hostname = fnvlist_lookup_string(config, ZPOOL_CONFIG_HOSTNAME); @@ -3206,7 +3218,10 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, timestamp = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP); - if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID)) + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTID)) + hostid = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_HOSTID); + else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID)) hostid = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID); diff --git a/sys/contrib/openzfs/config/zfs-build.m4 b/sys/contrib/openzfs/config/zfs-build.m4 index 5ea6aa29a3de..e4197dc1424e 100644 --- a/sys/contrib/openzfs/config/zfs-build.m4 +++ b/sys/contrib/openzfs/config/zfs-build.m4 @@ -358,6 +358,9 @@ AC_DEFUN([ZFS_AC_RPM], [ AS_IF([test -n "$udevruledir" ], [ RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"' ]) + AS_IF([test -n "$bashcompletiondir" ], [ + RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_bashcompletiondir $(bashcompletiondir)"' + ]) RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)' RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)' diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h index c494f48bb48b..f749223daa72 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h @@ -51,7 +51,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class, __array(uint64_t, hdr_dva_word, 2) __field(uint64_t, hdr_birth) __field(uint32_t, hdr_flags) - __field(uint32_t, hdr_bufcnt) __field(arc_buf_contents_t, hdr_type) __field(uint16_t, hdr_psize) __field(uint16_t, hdr_lsize) @@ -70,7 +69,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class, __entry->hdr_dva_word[1] = ab->b_dva.dva_word[1]; __entry->hdr_birth = ab->b_birth; __entry->hdr_flags = ab->b_flags; - __entry->hdr_bufcnt = ab->b_l1hdr.b_bufcnt; __entry->hdr_psize = ab->b_psize; __entry->hdr_lsize = ab->b_lsize; __entry->hdr_spa = ab->b_spa; @@ -84,12 +82,12 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class, __entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count; ), TP_printk("hdr { dva 0x%llx:0x%llx birth %llu " - "flags 0x%x bufcnt %u type %u psize %u lsize %u spa %llu " + "flags 0x%x type %u psize %u lsize %u spa %llu " "state_type %u access %lu mru_hits %u mru_ghost_hits %u " "mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }", __entry->hdr_dva_word[0], __entry->hdr_dva_word[1], __entry->hdr_birth, __entry->hdr_flags, - __entry->hdr_bufcnt, __entry->hdr_type, __entry->hdr_psize, + __entry->hdr_type, __entry->hdr_psize, __entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access, __entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits, @@ -192,7 +190,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class, __array(uint64_t, hdr_dva_word, 2) __field(uint64_t, hdr_birth) __field(uint32_t, hdr_flags) - __field(uint32_t, hdr_bufcnt) __field(arc_buf_contents_t, hdr_type) __field(uint16_t, hdr_psize) __field(uint16_t, hdr_lsize) @@ -223,7 +220,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class, __entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1]; __entry->hdr_birth = hdr->b_birth; __entry->hdr_flags = hdr->b_flags; - __entry->hdr_bufcnt = hdr->b_l1hdr.b_bufcnt; __entry->hdr_psize = hdr->b_psize; __entry->hdr_lsize = hdr->b_lsize; __entry->hdr_spa = hdr->b_spa; @@ -255,7 +251,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class, __entry->zb_blkid = zb->zb_blkid; ), TP_printk("hdr { dva 0x%llx:0x%llx birth %llu " - "flags 0x%x bufcnt %u psize %u lsize %u spa %llu state_type %u " + "flags 0x%x psize %u lsize %u spa %llu state_type %u " "access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u " "mfu_ghost_hits %u l2_hits %u refcount %lli } " "bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 " @@ -264,7 +260,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class, "blkid %llu }", __entry->hdr_dva_word[0], __entry->hdr_dva_word[1], __entry->hdr_birth, __entry->hdr_flags, - __entry->hdr_bufcnt, __entry->hdr_psize, __entry->hdr_lsize, + __entry->hdr_psize, __entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access, __entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits, diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h index 78774792f367..adff42c55d05 100644 --- a/sys/contrib/openzfs/include/sys/arc_impl.h +++ b/sys/contrib/openzfs/include/sys/arc_impl.h @@ -159,10 +159,6 @@ struct arc_write_callback { * these two allocation states. */ typedef struct l1arc_buf_hdr { - /* for waiting on reads to complete */ - kcondvar_t b_cv; - uint8_t b_byteswap; - /* protected by arc state mutex */ arc_state_t *b_state; multilist_node_t b_arc_node; @@ -173,7 +169,7 @@ typedef struct l1arc_buf_hdr { uint32_t b_mru_ghost_hits; uint32_t b_mfu_hits; uint32_t b_mfu_ghost_hits; - uint32_t b_bufcnt; + uint8_t b_byteswap; arc_buf_t *b_buf; /* self protecting */ @@ -436,12 +432,12 @@ typedef struct l2arc_dev { */ typedef struct arc_buf_hdr_crypt { abd_t *b_rabd; /* raw encrypted data */ - dmu_object_type_t b_ot; /* object type */ - uint32_t b_ebufcnt; /* count of encrypted buffers */ /* dsobj for looking up encryption key for l2arc encryption */ uint64_t b_dsobj; + dmu_object_type_t b_ot; /* object type */ + /* encryption parameters */ uint8_t b_salt[ZIO_DATA_SALT_LEN]; uint8_t b_iv[ZIO_DATA_IV_LEN]; diff --git a/sys/contrib/openzfs/include/sys/metaslab_impl.h b/sys/contrib/openzfs/include/sys/metaslab_impl.h index d328068890cc..4f434291ddbf 100644 --- a/sys/contrib/openzfs/include/sys/metaslab_impl.h +++ b/sys/contrib/openzfs/include/sys/metaslab_impl.h @@ -250,7 +250,6 @@ struct metaslab_group { int64_t mg_activation_count; metaslab_class_t *mg_class; vdev_t *mg_vd; - taskq_t *mg_taskq; metaslab_group_t *mg_prev; metaslab_group_t *mg_next; diff --git a/sys/contrib/openzfs/include/sys/spa_impl.h b/sys/contrib/openzfs/include/sys/spa_impl.h index 588c72f6e4fa..cdf65c371337 100644 --- a/sys/contrib/openzfs/include/sys/spa_impl.h +++ b/sys/contrib/openzfs/include/sys/spa_impl.h @@ -423,7 +423,9 @@ struct spa { hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ taskq_t *spa_zvol_taskq; /* Taskq for minor management */ + taskq_t *spa_metaslab_taskq; /* Taskq for metaslab preload */ taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */ + taskq_t *spa_upgrade_taskq; /* Taskq for upgrade jobs */ uint64_t spa_multihost; /* multihost aware (mmp) */ mmp_thread_t spa_mmp; /* multihost mmp thread */ list_t spa_leaf_list; /* list of leaf vdevs */ @@ -447,8 +449,6 @@ struct spa { */ spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */ zfs_refcount_t spa_refcount; /* number of opens */ - - taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */ }; extern char *spa_config_path; diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 3843419731b8..cfadd79d87f3 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -402,6 +402,12 @@ Practical upper limit of total metaslabs per top-level vdev. .It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int Enable metaslab group preloading. . +.It Sy metaslab_preload_limit Ns = Ns Sy 10 Pq uint +Maximum number of metaslabs per group to preload +. +.It Sy metaslab_preload_pct Ns = Ns Sy 50 Pq uint +Percentage of CPUs to run a metaslab preload taskq +. .It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int Give more weight to metaslabs with lower LBAs, assuming they have greater bandwidth, @@ -2144,6 +2150,11 @@ On very fragmented pools, lowering this .Pq typically to Sy 36 KiB can improve performance. . +.It Sy zil_maxcopied Ns = Ns Sy 7680 Ns B Po 7.5 KiB Pc Pq uint +This sets the maximum number of write bytes logged via WR_COPIED. +It tunes a tradeoff between additional memory copy and possibly worse log +space efficiency vs additional range lock/unlock. +. .It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64 This sets the minimum delay in nanoseconds ZIL care to delay block commit, waiting for more records. diff --git a/sys/contrib/openzfs/man/man7/zfsconcepts.7 b/sys/contrib/openzfs/man/man7/zfsconcepts.7 index 18a9e9b5cafe..1be3d961c3d7 100644 --- a/sys/contrib/openzfs/man/man7/zfsconcepts.7 +++ b/sys/contrib/openzfs/man/man7/zfsconcepts.7 @@ -28,8 +28,9 @@ .\" Copyright 2019 Richard Laager. All rights reserved. .\" Copyright 2018 Nexenta Systems, Inc. .\" Copyright 2019 Joyent, Inc. +.\" Copyright 2023 Klara, Inc. .\" -.Dd June 30, 2019 +.Dd October 6, 2023 .Dt ZFSCONCEPTS 7 .Os . @@ -205,3 +206,40 @@ practices, such as regular backups. Consider using the .Sy compression property as a less resource-intensive alternative. +.Ss Block cloning +Block cloning is a facility that allows a file (or parts of a file) to be +.Qq cloned , +that is, a shallow copy made where the existing data blocks are referenced +rather than copied. +Later modifications to the data will cause a copy of the data block to be taken +and that copy modified. +This facility is used to implement +.Qq reflinks +or +.Qq file-level copy-on-write . +.Pp +Cloned blocks are tracked in a special on-disk structure called the Block +Reference Table +.Po BRT +.Pc . +Unlike deduplication, this table has minimal overhead, so can be enabled at all +times. +.Pp +Also unlike deduplication, cloning must be requested by a user program. +Many common file copying programs, including newer versions of +.Nm /bin/cp , +will try to create clones automatically. +Look for +.Qq clone , +.Qq dedupe +or +.Qq reflink +in the documentation for more information. +.Pp +There are some limitations to block cloning. +Only whole blocks can be cloned, and blocks can not be cloned if they are not +yet written to disk, or if they are encrypted, or the source and destination +.Sy recordsize +properties differ. +The OS may add additional restrictions; +for example, most versions of Linux will not allow clones across datasets. diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S index fa50c4e74d59..7ae486e4e229 100644 --- a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S +++ b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S @@ -49,6 +49,7 @@ .type zfs_sha256_block_armv7,%function .align 6 zfs_sha256_block_armv7: + hint #34 // bti c stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -1015,6 +1016,7 @@ zfs_sha256_block_armv7: .type zfs_sha256_block_armv8,%function .align 6 zfs_sha256_block_armv8: + hint #34 // bti c .Lv8_entry: stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -1155,6 +1157,7 @@ zfs_sha256_block_armv8: .type zfs_sha256_block_neon,%function .align 4 zfs_sha256_block_neon: + hint #34 // bti c .Lneon_entry: stp x29, x30, [sp, #-16]! mov x29, sp diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S index 1683fc1ca53c..9c61eeee4d7b 100644 --- a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S +++ b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S @@ -73,6 +73,7 @@ .type zfs_sha512_block_armv7,%function .align 6 zfs_sha512_block_armv7: + hint #34 // bti c stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -1040,6 +1041,7 @@ zfs_sha512_block_armv7: .type zfs_sha512_block_armv8,%function .align 6 zfs_sha512_block_armv8: + hint #34 // bti c .Lv8_entry: // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later stp x29,x30,[sp,#-16]! diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index 8ae2f23c3ecf..38ef590702cb 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -596,28 +596,6 @@ SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct, " space map to continue allocations in a first-fit fashion"); /* END CSTYLED */ -/* - * Percentage of all cpus that can be used by the metaslab taskq. - */ -extern int metaslab_load_pct; - -/* BEGIN CSTYLED */ -SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct, - CTLFLAG_RWTUN, &metaslab_load_pct, 0, - "Percentage of cpus that can be used by the metaslab taskq"); -/* END CSTYLED */ - -/* - * Max number of metaslabs per group to preload. - */ -extern uint_t metaslab_preload_limit; - -/* BEGIN CSTYLED */ -SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, - CTLFLAG_RWTUN, &metaslab_preload_limit, 0, - "Max number of metaslabs per group to preload"); -/* END CSTYLED */ - /* mmp.c */ int diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 22dc0ed5e3b6..b5946e7604c0 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -748,8 +748,7 @@ taskq_t *arc_prune_taskq; * Other sizes */ -#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) -#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr)) +#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr)) /* @@ -1113,7 +1112,6 @@ buf_hash_remove(arc_buf_hdr_t *hdr) */ static kmem_cache_t *hdr_full_cache; -static kmem_cache_t *hdr_full_crypt_cache; static kmem_cache_t *hdr_l2only_cache; static kmem_cache_t *buf_cache; @@ -1134,7 +1132,6 @@ buf_fini(void) for (int i = 0; i < BUF_LOCKS; i++) mutex_destroy(BUF_HASH_LOCK(i)); kmem_cache_destroy(hdr_full_cache); - kmem_cache_destroy(hdr_full_crypt_cache); kmem_cache_destroy(hdr_l2only_cache); kmem_cache_destroy(buf_cache); } @@ -1151,7 +1148,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) memset(hdr, 0, HDR_FULL_SIZE); hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); zfs_refcount_create(&hdr->b_l1hdr.b_refcnt); #ifdef ZFS_DEBUG mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1163,19 +1159,6 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) return (0); } -static int -hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag) -{ - (void) unused; - arc_buf_hdr_t *hdr = vbuf; - - hdr_full_cons(vbuf, unused, kmflag); - memset(&hdr->b_crypt_hdr, 0, sizeof (hdr->b_crypt_hdr)); - arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS); - - return (0); -} - static int hdr_l2only_cons(void *vbuf, void *unused, int kmflag) { @@ -1211,7 +1194,6 @@ hdr_full_dest(void *vbuf, void *unused) arc_buf_hdr_t *hdr = vbuf; ASSERT(HDR_EMPTY(hdr)); - cv_destroy(&hdr->b_l1hdr.b_cv); zfs_refcount_destroy(&hdr->b_l1hdr.b_refcnt); #ifdef ZFS_DEBUG mutex_destroy(&hdr->b_l1hdr.b_freeze_lock); @@ -1220,16 +1202,6 @@ hdr_full_dest(void *vbuf, void *unused) arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS); } -static void -hdr_full_crypt_dest(void *vbuf, void *unused) -{ - (void) vbuf, (void) unused; - - hdr_full_dest(vbuf, unused); - arc_space_return(sizeof (((arc_buf_hdr_t *)NULL)->b_crypt_hdr), - ARC_SPACE_HDRS); -} - static void hdr_l2only_dest(void *vbuf, void *unused) { @@ -1285,9 +1257,6 @@ retry: hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, 0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0); - hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt", - HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest, - NULL, NULL, NULL, 0); hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL, NULL, NULL, 0); @@ -1995,7 +1964,6 @@ arc_buf_untransform_in_place(arc_buf_t *buf) arc_buf_size(buf)); buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED; buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; - hdr->b_crypt_hdr.b_ebufcnt -= 1; } /* @@ -2230,7 +2198,6 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(state)) { - ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); ASSERT(!HDR_HAS_RABD(hdr)); @@ -2270,7 +2237,6 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(state)) { - ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); ASSERT(!HDR_HAS_RABD(hdr)); @@ -2386,7 +2352,9 @@ arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index) l2hdr = &hdr->b_l2hdr; if (l1hdr) { - abi->abi_bufcnt = l1hdr->b_bufcnt; + abi->abi_bufcnt = 0; + for (arc_buf_t *buf = l1hdr->b_buf; buf; buf = buf->b_next) + abi->abi_bufcnt++; abi->abi_access = l1hdr->b_arc_access; abi->abi_mru_hits = l1hdr->b_mru_hits; abi->abi_mru_ghost_hits = l1hdr->b_mru_ghost_hits; @@ -2414,7 +2382,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) { arc_state_t *old_state; int64_t refcnt; - uint32_t bufcnt; boolean_t update_old, update_new; arc_buf_contents_t type = arc_buf_type(hdr); @@ -2428,19 +2395,16 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) if (HDR_HAS_L1HDR(hdr)) { old_state = hdr->b_l1hdr.b_state; refcnt = zfs_refcount_count(&hdr->b_l1hdr.b_refcnt); - bufcnt = hdr->b_l1hdr.b_bufcnt; - update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL || - HDR_HAS_RABD(hdr)); + update_old = (hdr->b_l1hdr.b_buf != NULL || + hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); - IMPLY(GHOST_STATE(old_state), bufcnt == 0); - IMPLY(GHOST_STATE(new_state), bufcnt == 0); IMPLY(GHOST_STATE(old_state), hdr->b_l1hdr.b_buf == NULL); IMPLY(GHOST_STATE(new_state), hdr->b_l1hdr.b_buf == NULL); - IMPLY(old_state == arc_anon, bufcnt <= 1); + IMPLY(old_state == arc_anon, hdr->b_l1hdr.b_buf == NULL || + ARC_BUF_LAST(hdr->b_l1hdr.b_buf)); } else { old_state = arc_l2c_only; refcnt = 0; - bufcnt = 0; update_old = B_FALSE; } update_new = update_old; @@ -2488,14 +2452,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) if (update_new && new_state != arc_l2c_only) { ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(new_state)) { - ASSERT0(bufcnt); /* * When moving a header to a ghost state, we first - * remove all arc buffers. Thus, we'll have a - * bufcnt of zero, and no arc buffer to use for - * the reference. As a result, we use the arc - * header pointer for the reference. + * remove all arc buffers. Thus, we'll have no arc + * buffer to use for the reference. As a result, we + * use the arc header pointer for the reference. */ (void) zfs_refcount_add_many( &new_state->arcs_size[type], @@ -2503,7 +2465,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); ASSERT(!HDR_HAS_RABD(hdr)); } else { - uint32_t buffers = 0; /* * Each individual buffer holds a unique reference, @@ -2512,8 +2473,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) */ for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { - ASSERT3U(bufcnt, !=, 0); - buffers++; /* * When the arc_buf_t is sharing the data @@ -2529,7 +2488,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) &new_state->arcs_size[type], arc_buf_size(buf), buf); } - ASSERT3U(bufcnt, ==, buffers); if (hdr->b_l1hdr.b_pabd != NULL) { (void) zfs_refcount_add_many( @@ -2548,7 +2506,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) if (update_old && old_state != arc_l2c_only) { ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(old_state)) { - ASSERT0(bufcnt); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); ASSERT(!HDR_HAS_RABD(hdr)); @@ -2564,7 +2521,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) &old_state->arcs_size[type], HDR_GET_LSIZE(hdr), hdr); } else { - uint32_t buffers = 0; /* * Each individual buffer holds a unique reference, @@ -2573,8 +2529,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) */ for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { - ASSERT3U(bufcnt, !=, 0); - buffers++; /* * When the arc_buf_t is sharing the data @@ -2590,7 +2544,6 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr) &old_state->arcs_size[type], arc_buf_size(buf), buf); } - ASSERT3U(bufcnt, ==, buffers); ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); @@ -2838,9 +2791,6 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, VERIFY3P(buf->b_data, !=, NULL); hdr->b_l1hdr.b_buf = buf; - hdr->b_l1hdr.b_bufcnt += 1; - if (encrypted) - hdr->b_crypt_hdr.b_ebufcnt += 1; /* * If the user wants the data from the hdr, we need to either copy or @@ -3082,8 +3032,6 @@ arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf) } buf->b_next = NULL; ASSERT3P(lastbuf, !=, buf); - IMPLY(hdr->b_l1hdr.b_bufcnt > 0, lastbuf != NULL); - IMPLY(hdr->b_l1hdr.b_bufcnt > 0, hdr->b_l1hdr.b_buf != NULL); IMPLY(lastbuf != NULL, ARC_BUF_LAST(lastbuf)); return (lastbuf); @@ -3122,22 +3070,20 @@ arc_buf_destroy_impl(arc_buf_t *buf) } buf->b_data = NULL; - ASSERT(hdr->b_l1hdr.b_bufcnt > 0); - hdr->b_l1hdr.b_bufcnt -= 1; - - if (ARC_BUF_ENCRYPTED(buf)) { - hdr->b_crypt_hdr.b_ebufcnt -= 1; - - /* - * If we have no more encrypted buffers and we've - * already gotten a copy of the decrypted data we can - * free b_rabd to save some space. - */ - if (hdr->b_crypt_hdr.b_ebufcnt == 0 && - HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd != NULL && - !HDR_IO_IN_PROGRESS(hdr)) { - arc_hdr_free_abd(hdr, B_TRUE); + /* + * If we have no more encrypted buffers and we've already + * gotten a copy of the decrypted data we can free b_rabd + * to save some space. + */ + if (ARC_BUF_ENCRYPTED(buf) && HDR_HAS_RABD(hdr) && + hdr->b_l1hdr.b_pabd != NULL && !HDR_IO_IN_PROGRESS(hdr)) { + arc_buf_t *b; + for (b = hdr->b_l1hdr.b_buf; b; b = b->b_next) { + if (b != buf && ARC_BUF_ENCRYPTED(b)) + break; } + if (b == NULL) + arc_hdr_free_abd(hdr, B_TRUE); } } @@ -3298,11 +3244,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, arc_buf_hdr_t *hdr; VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA); - if (protected) { - hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE); - } else { - hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); - } + hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); ASSERT(HDR_EMPTY(hdr)); #ifdef ZFS_DEBUG @@ -3325,7 +3267,6 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, hdr->b_l1hdr.b_mru_ghost_hits = 0; hdr->b_l1hdr.b_mfu_hits = 0; hdr->b_l1hdr.b_mfu_ghost_hits = 0; - hdr->b_l1hdr.b_bufcnt = 0; hdr->b_l1hdr.b_buf = NULL; ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); @@ -3351,16 +3292,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) || (old == hdr_l2only_cache && new == hdr_full_cache)); - /* - * if the caller wanted a new full header and the header is to be - * encrypted we will actually allocate the header from the full crypt - * cache instead. The same applies to freeing from the old cache. - */ - if (HDR_PROTECTED(hdr) && new == hdr_full_cache) - new = hdr_full_crypt_cache; - if (HDR_PROTECTED(hdr) && old == hdr_full_cache) - old = hdr_full_crypt_cache; - nhdr = kmem_cache_alloc(new, KM_PUSHPAGE); ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); @@ -3368,7 +3299,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) memcpy(nhdr, hdr, HDR_L2ONLY_SIZE); - if (new == hdr_full_cache || new == hdr_full_crypt_cache) { + if (new == hdr_full_cache) { arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR); /* * arc_access and arc_change_state need to be aware that a @@ -3382,7 +3313,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) ASSERT(!HDR_HAS_RABD(hdr)); } else { ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT0(hdr->b_l1hdr.b_bufcnt); #ifdef ZFS_DEBUG ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); #endif @@ -3448,126 +3378,6 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) return (nhdr); } -/* - * This function allows an L1 header to be reallocated as a crypt - * header and vice versa. If we are going to a crypt header, the - * new fields will be zeroed out. - */ -static arc_buf_hdr_t * -arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) -{ - arc_buf_hdr_t *nhdr; - arc_buf_t *buf; - kmem_cache_t *ncache, *ocache; - - /* - * This function requires that hdr is in the arc_anon state. - * Therefore it won't have any L2ARC data for us to worry - * about copying. - */ - ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(!HDR_HAS_L2HDR(hdr)); - ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt); - ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); - ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); - ASSERT(!list_link_active(&hdr->b_l2hdr.b_l2node)); - ASSERT3P(hdr->b_hash_next, ==, NULL); - - if (need_crypt) { - ncache = hdr_full_crypt_cache; - ocache = hdr_full_cache; - } else { - ncache = hdr_full_cache; - ocache = hdr_full_crypt_cache; - } - - nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE); - - /* - * Copy all members that aren't locks or condvars to the new header. - * No lists are pointing to us (as we asserted above), so we don't - * need to worry about the list nodes. - */ - nhdr->b_dva = hdr->b_dva; - nhdr->b_birth = hdr->b_birth; - nhdr->b_type = hdr->b_type; - nhdr->b_flags = hdr->b_flags; - nhdr->b_psize = hdr->b_psize; - nhdr->b_lsize = hdr->b_lsize; - nhdr->b_spa = hdr->b_spa; -#ifdef ZFS_DEBUG - nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum; -#endif - nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt; - nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap; - nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state; - nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access; - nhdr->b_l1hdr.b_mru_hits = hdr->b_l1hdr.b_mru_hits; - nhdr->b_l1hdr.b_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits; - nhdr->b_l1hdr.b_mfu_hits = hdr->b_l1hdr.b_mfu_hits; - nhdr->b_l1hdr.b_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits; - nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb; - nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd; - - /* - * This zfs_refcount_add() exists only to ensure that the individual - * arc buffers always point to a header that is referenced, avoiding - * a small race condition that could trigger ASSERTs. - */ - (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG); - nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf; - for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) - buf->b_hdr = nhdr; - - zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt); - (void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG); - ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt)); - - if (need_crypt) { - arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED); - } else { - arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED); - } - - /* unset all members of the original hdr */ - memset(&hdr->b_dva, 0, sizeof (dva_t)); - hdr->b_birth = 0; - hdr->b_type = 0; - hdr->b_flags = 0; - hdr->b_psize = 0; - hdr->b_lsize = 0; - hdr->b_spa = 0; -#ifdef ZFS_DEBUG - hdr->b_l1hdr.b_freeze_cksum = NULL; -#endif - hdr->b_l1hdr.b_buf = NULL; - hdr->b_l1hdr.b_bufcnt = 0; - hdr->b_l1hdr.b_byteswap = 0; - hdr->b_l1hdr.b_state = NULL; - hdr->b_l1hdr.b_arc_access = 0; - hdr->b_l1hdr.b_mru_hits = 0; - hdr->b_l1hdr.b_mru_ghost_hits = 0; - hdr->b_l1hdr.b_mfu_hits = 0; - hdr->b_l1hdr.b_mfu_ghost_hits = 0; - hdr->b_l1hdr.b_acb = NULL; - hdr->b_l1hdr.b_pabd = NULL; - - if (ocache == hdr_full_crypt_cache) { - ASSERT(!HDR_HAS_RABD(hdr)); - hdr->b_crypt_hdr.b_ot = DMU_OT_NONE; - hdr->b_crypt_hdr.b_ebufcnt = 0; - hdr->b_crypt_hdr.b_dsobj = 0; - memset(hdr->b_crypt_hdr.b_salt, 0, ZIO_DATA_SALT_LEN); - memset(hdr->b_crypt_hdr.b_iv, 0, ZIO_DATA_IV_LEN); - memset(hdr->b_crypt_hdr.b_mac, 0, ZIO_DATA_MAC_LEN); - } - - buf_discard_identity(hdr); - kmem_cache_free(ocache, hdr); - - return (nhdr); -} - /* * This function is used by the send / receive code to convert a newly * allocated arc_buf_t to one that is suitable for a raw encrypted write. It @@ -3587,8 +3397,7 @@ arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED); - if (!HDR_PROTECTED(hdr)) - hdr = arc_hdr_realloc_crypt(hdr, B_TRUE); + arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED); hdr->b_crypt_hdr.b_dsobj = dsobj; hdr->b_crypt_hdr.b_ot = ot; hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? @@ -3789,8 +3598,6 @@ static void arc_hdr_destroy(arc_buf_hdr_t *hdr) { if (HDR_HAS_L1HDR(hdr)) { - ASSERT(hdr->b_l1hdr.b_buf == NULL || - hdr->b_l1hdr.b_bufcnt > 0); *** 836 LINES SKIPPED ***