git: 4ee16246f978 - main - vfs_vnops.c: Fix blksize for ZFS

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Thu, 17 Nov 2022 01:38:45 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=4ee16246f97825e893e0a4a4499d1bbe5bc07b6a

commit 4ee16246f97825e893e0a4a4499d1bbe5bc07b6a
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2022-11-17 01:37:22 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2022-11-17 01:37:22 +0000

    vfs_vnops.c: Fix blksize for ZFS
    
    Since ZFS reports _PC_MIN_HOLE_SIZE as 512 (although it
    appears that an unwritten region must be at least f_iosize
    to remain unallocated), vn_generic_copy_file_range()
    uses 4096 for the copy blksize for ZFS, reulting in slow copies.
    
    For most other file systems, _PC_MIN_HOLE_SIZE and f_iosize
    are the same value, so this patch modifies the code to
    use f_iosize for most cases.  It also documents in comments
    why the blksize is being set a certain way, so that the code
    does not appear to be doing "magic math".
    
    Reported by:    allanjude
    Reviewed by:    allanjude, asomers
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D37076
---
 sys/kern/vfs_vnops.c | 51 +++++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 8eaf23d91d4d..4d8963100a43 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -3266,7 +3266,7 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
 	int error, interrupted;
 	bool cantseek, readzeros, eof, lastblock, holetoeof;
 	ssize_t aresid, r = 0;
-	size_t copylen, len, rem, savlen;
+	size_t copylen, len, savlen;
 	char *dat;
 	long holein, holeout;
 	struct timespec curts, endts;
@@ -3338,31 +3338,38 @@ vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
 	if (error != 0)
 		goto out;
 
-	/*
-	 * Set the blksize to the larger of the hole sizes for invp and outvp.
-	 * If hole sizes aren't available, set the blksize to the larger 
-	 * f_iosize of invp and outvp.
-	 * This code expects the hole sizes and f_iosizes to be powers of 2.
-	 * This value is clipped at 4Kbytes and 1Mbyte.
-	 */
-	blksize = MAX(holein, holeout);
-
-	/* Clip len to end at an exact multiple of hole size. */
-	if (blksize > 1) {
-		rem = *inoffp % blksize;
-		if (rem > 0)
-			rem = blksize - rem;
-		if (len > rem && len - rem > blksize)
-			len = savlen = rounddown(len - rem, blksize) + rem;
-	}
-
-	if (blksize <= 1)
+	if (holein == 0 && holeout > 0) {
+		/*
+		 * For this special case, the input data will be scanned
+		 * for blocks of all 0 bytes.  For these blocks, the
+		 * write can be skipped for the output file to create
+		 * an unallocated region.
+		 * Therefore, use the appropriate size for the output file.
+		 */
+		blksize = holeout;
+		if (blksize <= 512) {
+			/*
+			 * Use f_iosize, since ZFS reports a _PC_MIN_HOLE_SIZE
+			 * of 512, although it actually only creates
+			 * unallocated regions for blocks >= f_iosize.
+			 */
+			blksize = outvp->v_mount->mnt_stat.f_iosize;
+		}
+	} else {
+		/*
+		 * Use the larger of the two f_iosize values.  If they are
+		 * not the same size, one will normally be an exact multiple of
+		 * the other, since they are both likely to be a power of 2.
+		 */
 		blksize = MAX(invp->v_mount->mnt_stat.f_iosize,
 		    outvp->v_mount->mnt_stat.f_iosize);
+	}
+
+	/* Clip to sane limits. */
 	if (blksize < 4096)
 		blksize = 4096;
-	else if (blksize > 1024 * 1024)
-		blksize = 1024 * 1024;
+	else if (blksize > maxphys)
+		blksize = maxphys;
 	dat = malloc(blksize, M_TEMP, M_WAITOK);
 
 	/*