svn commit: r307112 - in stable/11/sys/cddl/contrib/opensolaris/uts/common: fs/zfs fs/zfs/sys sys/fs

Alexander Motin mav at FreeBSD.org
Wed Oct 12 05:19:10 UTC 2016


Author: mav
Date: Wed Oct 12 05:19:08 2016
New Revision: 307112
URL: https://svnweb.freebsd.org/changeset/base/307112

Log:
  MFC r305222: MFV r302993: 7104 increase indirect block size
  
  illumos/illumos-gate at 4b5c8e93cab28d3c65ba9d407fd8f46e3be1db1c
  https://github.com/illumos/illumos-gate/commit/4b5c8e93cab28d3c65ba9d407fd8f46e3
  be1db1c
  
  https://www.illumos.org/issues/7104
    The current default indirect block size is 16KB. We can improve
    performance by increasing it to 128KB. This is especially helpful for
    any workload that needs to read most of the metadata, e.g.
    scrub/resilver, file deletion, filesystem deletion, and zfs send.
    We also need to fix a few space estimation errors to make the tests
    pass.
  
  Reviewed by: George Wilson <george.wilson at delphix.com>
  Reviewed by: Paul Dagnelie <pcd at delphix.com>
  Reviewed by: Dan McDonald <danmcd at omniti.com>
  Approved by: Robert Mustacchi <rm at joyent.com>
  Author: Matthew Ahrens <mahrens at delphix.com>

Modified:
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Wed Oct 12 05:17:17 2016	(r307111)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Wed Oct 12 05:19:08 2016	(r307112)
@@ -791,11 +791,17 @@ dmu_objset_create_impl(spa_t *spa, dsl_d
 
 		/*
 		 * Determine the number of levels necessary for the meta-dnode
-		 * to contain DN_MAX_OBJECT dnodes.
+		 * to contain DN_MAX_OBJECT dnodes.  Note that in order to
+		 * ensure that we do not overflow 64 bits, there has to be
+		 * a nlevels that gives us a number of blocks > DN_MAX_OBJECT
+		 * but < 2^64.  Therefore,
+		 * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be
+		 * less than (64 - log2(DN_MAX_OBJECT)) (16).
 		 */
-		while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
+		while ((uint64_t)mdn->dn_nblkptr <<
+		    (mdn->dn_datablkshift - DNODE_SHIFT +
 		    (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
-		    DN_MAX_OBJECT * sizeof (dnode_phys_t))
+		    DN_MAX_OBJECT)
 			levels++;
 
 		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	Wed Oct 12 05:17:17 2016	(r307111)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	Wed Oct 12 05:19:08 2016	(r307112)
@@ -411,12 +411,20 @@ zfs_deadman_init()
  * it is possible to run the pool completely out of space, causing it to
  * be permanently read-only.
  *
+ * Note that on very small pools, the slop space will be larger than
+ * 3.2%, in an effort to have it be at least spa_min_slop (128MB),
+ * but we never allow it to be more than half the pool size.
+ *
  * See also the comments in zfs_space_check_t.
  */
 int spa_slop_shift = 5;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_slop_shift, CTLFLAG_RWTUN,
     &spa_slop_shift, 0,
     "Shift value of reserved space (1/(2^spa_slop_shift)).");
+uint64_t spa_min_slop = 128 * 1024 * 1024;
+SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, spa_min_slop, CTLFLAG_RWTUN,
+    &spa_min_slop, 0,
+    "Minimal value of reserved space");
 
 /*
  * ==========================================================================
@@ -1723,14 +1731,16 @@ spa_get_asize(spa_t *spa, uint64_t lsize
 
 /*
  * Return the amount of slop space in bytes.  It is 1/32 of the pool (3.2%),
- * or at least 32MB.
+ * or at least 128MB, unless that would cause it to be more than half the
+ * pool size.
  *
  * See the comment above spa_slop_shift for details.
  */
 uint64_t
-spa_get_slop_space(spa_t *spa) {
+spa_get_slop_space(spa_t *spa)
+{
 	uint64_t space = spa_get_dspace(spa);
-	return (MAX(space >> spa_slop_shift, SPA_MINDEVSIZE >> 1));
+	return (MAX(space >> spa_slop_shift, MIN(space >> 1, spa_min_slop)));
 }
 
 uint64_t

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Wed Oct 12 05:17:17 2016	(r307111)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Wed Oct 12 05:19:08 2016	(r307112)
@@ -58,7 +58,7 @@ extern "C" {
  */
 #define	DNODE_SHIFT		9	/* 512 bytes */
 #define	DN_MIN_INDBLKSHIFT	12	/* 4k */
-#define	DN_MAX_INDBLKSHIFT	14	/* 16k */
+#define	DN_MAX_INDBLKSHIFT	17	/* 128k */
 #define	DNODE_BLOCK_SHIFT	14	/* 16k */
 #define	DNODE_CORE_SIZE		64	/* 64 bytes for dnode sans blkptrs */
 #define	DN_MAX_OBJECT_SHIFT	48	/* 256 trillion (zfs_fid_t limit) */
@@ -88,6 +88,11 @@ extern "C" {
 
 #define	DNODES_PER_BLOCK_SHIFT	(DNODE_BLOCK_SHIFT - DNODE_SHIFT)
 #define	DNODES_PER_BLOCK	(1ULL << DNODES_PER_BLOCK_SHIFT)
+
+/*
+ * This is inaccurate if the indblkshift of the particular object is not the
+ * max.  But it's only used by userland to calculate the zvol reservation.
+ */
 #define	DNODES_PER_LEVEL_SHIFT	(DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
 #define	DNODES_PER_LEVEL	(1ULL << DNODES_PER_LEVEL_SHIFT)
 

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	Wed Oct 12 05:17:17 2016	(r307111)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	Wed Oct 12 05:19:08 2016	(r307112)
@@ -610,6 +610,8 @@ typedef struct zpool_rewind_policy {
 
 /*
  * This is needed in userland to report the minimum necessary device size.
+ *
+ * Note that the zfs test suite uses 64MB vdevs.
  */
 #define	SPA_MINDEVSIZE		(64ULL << 20)
 


More information about the svn-src-all mailing list