git: 35c612fbabd8 - stable/14 - tarfs: Fix support for large files

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Wed, 19 Nov 2025 10:58:20 UTC
The branch stable/14 has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=35c612fbabd8a6fd23e65881595873d0797da4b8

commit 35c612fbabd8a6fd23e65881595873d0797da4b8
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2025-11-12 21:23:49 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2025-11-19 10:57:40 +0000

    tarfs: Fix support for large files
    
    * When fast-forwarding through a zstd frame, we incorrectly used the
      min() inline function instead of the MIN() macro.  The function
      truncates the result to unsigned int, resulting in a decompression
      error when trying to seek more than 4 GB into the frame.
    
    * POSIX states that a size extended header record overrides the size
      field in the header if present, and that one must be included if the
      size of the file exceeds 8 GB (the size field maxes out at 64 GB).
    
    * Reduce repetition in the exthdr parser by deduplicating the syntax
      error handler.
    
    MFC after:      1 week
    Sponsored by:   Klara, Inc.
    Fixes:          69d94f4c7608 ("Add tarfs, a filesystem backed by tarballs.")
    Reviewed by:    allanjude
    Differential Revision:  https://reviews.freebsd.org/D53718
    
    (cherry picked from commit a34a57d4b4eba88dfa5541d7d77b63b01c1a1a9a)
---
 sys/fs/tarfs/tarfs_io.c          |  2 +-
 sys/fs/tarfs/tarfs_vfsops.c      | 48 +++++++++++++++++++---------------------
 tests/sys/fs/tarfs/tarfs_test.sh | 27 ++++++++++++++++++++++
 3 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/sys/fs/tarfs/tarfs_io.c b/sys/fs/tarfs/tarfs_io.c
index a3d8df62d7df..e250c5cbce5a 100644
--- a/sys/fs/tarfs/tarfs_io.c
+++ b/sys/fs/tarfs/tarfs_io.c
@@ -444,7 +444,7 @@ tarfs_zread_zstd(struct tarfs_zio *zio, struct uio *uiop)
 		}
 		if (zio->opos < off) {
 			/* to be discarded */
-			zob.size = min(off - zio->opos, len);
+			zob.size = MIN(off - zio->opos, len);
 			zob.pos = 0;
 		} else {
 			zob.size = len;
diff --git a/sys/fs/tarfs/tarfs_vfsops.c b/sys/fs/tarfs/tarfs_vfsops.c
index a534b18ebf34..e1ba925ccb0f 100644
--- a/sys/fs/tarfs/tarfs_vfsops.c
+++ b/sys/fs/tarfs/tarfs_vfsops.c
@@ -441,7 +441,7 @@ tarfs_alloc_one(struct tarfs_mount *tmp, size_t *blknump)
 	int endmarker = 0;
 	char *namep, *sep;
 	struct tarfs_node *parent, *tnp, *other;
-	size_t namelen = 0, linklen = 0, realsize = 0, sz;
+	size_t namelen = 0, linklen = 0, realsize = 0, extsize = 0, sz;
 	ssize_t res;
 	dev_t rdev;
 	gid_t gid;
@@ -588,10 +588,7 @@ again:
 			char *eol, *key, *value, *sep;
 			size_t len = strtoul(line, &sep, 10);
 			if (len == 0 || sep == line || *sep != ' ') {
-				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
-				    __func__);
-				error = EINVAL;
-				goto bad;
+				goto syntax;
 			}
 			if ((uintptr_t)line + len < (uintptr_t)line ||
 			    line + len > exthdr + sz) {
@@ -606,16 +603,18 @@ again:
 			key = sep + 1;
 			sep = strchr(key, '=');
 			if (sep == NULL) {
-				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
-				    __func__);
-				error = EINVAL;
-				goto bad;
+				goto syntax;
 			}
 			*sep = '\0';
 			value = sep + 1;
 			TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__,
 			    key, value);
-			if (strcmp(key, "path") == 0) {
+			if (strcmp(key, "size") == 0) {
+				extsize = strtol(value, &sep, 10);
+				if (sep != eol) {
+					goto syntax;
+				}
+			} else if (strcmp(key, "path") == 0) {
 				name = value;
 				namelen = eol - value;
 			} else if (strcmp(key, "linkpath") == 0) {
@@ -625,47 +624,42 @@ again:
 				sparse = true;
 				major = strtol(value, &sep, 10);
 				if (sep != eol) {
-					printf("exthdr syntax error\n");
-					error = EINVAL;
-					goto bad;
+					goto syntax;
 				}
 			} else if (strcmp(key, "GNU.sparse.minor") == 0) {
 				sparse = true;
 				minor = strtol(value, &sep, 10);
 				if (sep != eol) {
-					printf("exthdr syntax error\n");
-					error = EINVAL;
-					goto bad;
+					goto syntax;
 				}
 			} else if (strcmp(key, "GNU.sparse.name") == 0) {
 				sparse = true;
 				name = value;
 				namelen = eol - value;
 				if (namelen == 0) {
-					printf("exthdr syntax error\n");
-					error = EINVAL;
-					goto bad;
+					goto syntax;
 				}
 			} else if (strcmp(key, "GNU.sparse.realsize") == 0) {
 				sparse = true;
 				realsize = strtoul(value, &sep, 10);
 				if (sep != eol) {
-					printf("exthdr syntax error\n");
-					error = EINVAL;
-					goto bad;
+					goto syntax;
 				}
 			} else if (strcmp(key, "SCHILY.fflags") == 0) {
 				flags |= tarfs_strtofflags(value, &sep);
 				if (sep != eol) {
-					printf("exthdr syntax error\n");
-					error = EINVAL;
-					goto bad;
+					goto syntax;
 				}
 			}
 		}
 		goto again;
 	}
 
+	/* do we have a size from an exthdr? */
+	if (extsize > 0) {
+		sz = extsize;
+	}
+
 	/* sparse file consistency checks */
 	if (sparse) {
 		TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__,
@@ -832,6 +826,10 @@ skip:
 		sbuf_delete(namebuf);
 	}
 	return (0);
+syntax:
+	TARFS_DPF(ALLOC, "%s: exthdr syntax error\n", __func__);
+	error = EINVAL;
+	goto bad;
 eof:
 	TARFS_DPF(IO, "%s: premature end of file\n", __func__);
 	error = EIO;
diff --git a/tests/sys/fs/tarfs/tarfs_test.sh b/tests/sys/fs/tarfs/tarfs_test.sh
index d4de71271985..505bfc5325f0 100644
--- a/tests/sys/fs/tarfs/tarfs_test.sh
+++ b/tests/sys/fs/tarfs/tarfs_test.sh
@@ -396,6 +396,32 @@ tarfs_git_archive_cleanup() {
 	tarfs_cleanup
 }
 
+atf_test_case tarfs_large cleanup
+tarfs_large_head() {
+	atf_set "descr" "Test support for large files"
+	atf_set "require.user" "root"
+	atf_set "require.kmods" "tarfs"
+	atf_set "timeout" "600"
+}
+tarfs_large_body() {
+	tarfs_setup
+	local tarball="${PWD}/tarfs_test.tar.zst"
+	local exp off
+	for exp in 31 32 33 34 35 36 ; do
+		for off in 1 0 ; do
+			local size=$(((1<<exp)-off))
+			atf_check truncate -s ${size} file
+			atf_check bsdtar -cf "${tarball}" --no-read-sparse --zstd file
+			atf_check mount -rt tarfs "${tarball}" "${mnt}"
+			atf_check -o inline:"${size}\n" stat -f%z "${mnt}"/file
+			atf_check umount "${mnt}"
+		done
+	done
+}
+tarfs_large_cleanup() {
+	tarfs_cleanup
+}
+
 atf_init_test_cases() {
 	atf_add_test_case tarfs_basic
 	atf_add_test_case tarfs_basic_gnu
@@ -414,4 +440,5 @@ atf_init_test_cases() {
 	atf_add_test_case tarfs_long_names
 	atf_add_test_case tarfs_long_paths
 	atf_add_test_case tarfs_git_archive
+	atf_add_test_case tarfs_large
 }