svn commit: r321541 - in stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys

Alexander Motin mav at FreeBSD.org
Wed Jul 26 16:27:21 UTC 2017


Author: mav
Date: Wed Jul 26 16:27:20 2017
New Revision: 321541
URL: https://svnweb.freebsd.org/changeset/base/321541

Log:
  MFC r317541: MFV 316905
  
  7740 fix for 6513 only works in hole punching case, not truncation
  
  illumos/illumos-gate at 7de35a3ed0c2e6d4256bd2fb05b48b3798aaf553
  https://github.com/illumos/illumos-gate/commit/7de35a3ed0c2e6d4256bd2fb05b48b3798aaf553
  
  https://www.illumos.org/issues/7740
    The problem is that dbuf_findbp will return ENOENT if the block it's
    trying to find is beyond the end of the file. If that happens, we assume
    there is no birth time, and so we lose that information when we write
    out new blkptrs. We should teach dbuf_findbp to look for things that are
    beyond the current end, but not beyond the absolute end of the file.
    To verify, create a large file, truncate it to a short length, and then
    write beyond the end. Check with zdb to make sure that there are no
    holes with birth time zero (will appear as gaps).
  
  Reviewed by: Steve Gonczi <steve.gonczi at delphix.com>
  Reviewed by: Matthew Ahrens <mahrens at delphix.com>
  Approved by: Dan McDonald <danmcd at omniti.com>
  Author: Paul Dagnelie <pcd at delphix.com>

Modified:
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Wed Jul 26 16:26:34 2017	(r321540)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Wed Jul 26 16:27:20 2017	(r321541)
@@ -2161,8 +2161,6 @@ static int
 dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
     dmu_buf_impl_t **parentp, blkptr_t **bpp)
 {
-	int nlevels, epbs;
-
 	*parentp = NULL;
 	*bpp = NULL;
 
@@ -2181,17 +2179,35 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, in
 		return (0);
 	}
 
-	if (dn->dn_phys->dn_nlevels == 0)
-		nlevels = 1;
-	else
-		nlevels = dn->dn_phys->dn_nlevels;
+	int nlevels =
+	    (dn->dn_phys->dn_nlevels == 0) ? 1 : dn->dn_phys->dn_nlevels;
+	int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 
-	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
-
 	ASSERT3U(level * epbs, <, 64);
 	ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+	/*
+	 * This assertion shouldn't trip as long as the max indirect block size
+	 * is less than 1M.  The reason for this is that up to that point,
+	 * the number of levels required to address an entire object with blocks
+	 * of size SPA_MINBLOCKSIZE satisfies nlevels * epbs + 1 <= 64.  In
+	 * other words, if N * epbs + 1 > 64, then if (N-1) * epbs + 1 > 55
+	 * (i.e. we can address the entire object), objects will all use at most
+	 * N-1 levels and the assertion won't overflow.  However, once epbs is
+	 * 13, 4 * 13 + 1 = 53, but 5 * 13 + 1 = 66.  Then, 4 levels will not be
+	 * enough to address an entire object, so objects will have 5 levels,
+	 * but then this assertion will overflow.
+	 *
+	 * All this is to say that if we ever increase DN_MAX_INDBLKSHIFT, we
+	 * need to redo this logic to handle overflows.
+	 */
+	ASSERT(level >= nlevels ||
+	    ((nlevels - level - 1) * epbs) +
+	    highbit64(dn->dn_phys->dn_nblkptr) <= 64);
 	if (level >= nlevels ||
-	    (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
+	    blkid >= ((uint64_t)dn->dn_phys->dn_nblkptr <<
+	    ((nlevels - level - 1) * epbs)) ||
+	    (fail_sparse &&
+	    blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
 		/* the buffer has no parent yet */
 		return (SET_ERROR(ENOENT));
 	} else if (level < nlevels-1) {
@@ -2209,6 +2225,8 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, in
 		}
 		*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
 		    (blkid & ((1ULL << epbs) - 1));
+		if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))
+			ASSERT(BP_IS_HOLE(*bpp));
 		return (0);
 	} else {
 		/* the block is referenced from the dnode */

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Wed Jul 26 16:26:34 2017	(r321540)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h	Wed Jul 26 16:27:20 2017	(r321541)
@@ -58,6 +58,12 @@ extern "C" {
  */
 #define	DNODE_SHIFT		9	/* 512 bytes */
 #define	DN_MIN_INDBLKSHIFT	12	/* 4k */
+/*
+ * If we ever increase this value beyond 20, we need to revisit all logic that
+ * does x << level * ebps to handle overflow.  With a 1M indirect block size,
+ * 4 levels of indirect blocks would not be able to guarantee addressing an
+ * entire object, so 5 levels will be used, but 5 * (20 - 7) = 65.
+ */
 #define	DN_MAX_INDBLKSHIFT	17	/* 128k */
 #define	DNODE_BLOCK_SHIFT	14	/* 16k */
 #define	DNODE_CORE_SIZE		64	/* 64 bytes for dnode sans blkptrs */


More information about the svn-src-all mailing list