git: 697d7905aba9 - stable/15 - stat: Add option to list holes

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Tue, 23 Sep 2025 12:57:55 UTC
The branch stable/15 has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=697d7905aba98c7e76ccbb9fffea173379f11296

commit 697d7905aba98c7e76ccbb9fffea173379f11296
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2025-09-16 13:37:57 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2025-09-23 12:57:30 +0000

    stat: Add option to list holes
    
    Add a new -h option that causes stat to print a list of holes for each
    file argument.
    
    Sponsored by:   Klara, Inc.
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D52481
    
    (cherry picked from commit 1a7a067da456f8962ef87bfdf75c94cd12988615)
---
 usr.bin/stat/stat.1             |  45 +++++++++-
 usr.bin/stat/stat.c             | 176 ++++++++++++++++++++++++++++++----------
 usr.bin/stat/tests/stat_test.sh |  72 ++++++++++++++++
 3 files changed, 250 insertions(+), 43 deletions(-)

diff --git a/usr.bin/stat/stat.1 b/usr.bin/stat/stat.1
index 2996781fafa6..55e64de0767e 100644
--- a/usr.bin/stat/stat.1
+++ b/usr.bin/stat/stat.1
@@ -6,6 +6,8 @@
 .\" This code is derived from software contributed to The NetBSD Foundation
 .\" by Andrew Brown and Jan Schaumann.
 .\"
+.\" Copyright (c) 2025 Klara, Inc.
+.\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
@@ -27,7 +29,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd June 22, 2017
+.Dd September 9, 2025
 .Dt STAT 1
 .Os
 .Sh NAME
@@ -36,7 +38,7 @@
 .Nd display file status
 .Sh SYNOPSIS
 .Nm
-.Op Fl FHLnq
+.Op Fl FHhLnq
 .Op Fl f Ar format | Fl l | r | s | x
 .Op Fl t Ar timefmt
 .Op Ar
@@ -129,6 +131,45 @@ and use
 instead of
 .Xr lstat 2 .
 This requires root privileges.
+.It Fl h
+For each file argument, print a line consisting of a comma-separated
+list of holes, a space, and the file name.
+Each hole is reported as its starting offset as a decimal number
+followed by a hyphen and the ending offset (one less than the starting
+offset of the data region that follows the hole) as a decimal number.
+If the file ends in a hole, the ending offset of the final hole will
+be one less than the size of the file.
+Otherwise, the final entry in the list (indeed, the only entry in the
+list, if the file is not sparse), is a single decimal number
+corresponding to the size of the file, representing the virtual hole
+at the end of the file.
+.Pp
+If the argument is a directory, instead of a list of holes, a single
+number is printed, corresponding to the minimum hole size for that
+directory as reported by
+.Xr pathconf 2 ,
+followed by a space and the directory name.
+.Pp
+Please note that the only way to retrieve information about the holes
+in a file is to open it and walk the list of holes and data regions
+using
+.Xr lseek 2 .
+If the file is being modified by another process at the same time as
+.Nm
+is inspecting it, the result may be inconsistent.
+.Pp
+This option cannot be combined with the
+.Fl F ,
+.Fl f ,
+.Fl H ,
+.Fl L ,
+.Fl l ,
+.Fl r ,
+.Fl s ,
+.Fl t ,
+or
+.Fl x
+options.
 .It Fl L
 Use
 .Xr stat 2
diff --git a/usr.bin/stat/stat.c b/usr.bin/stat/stat.c
index 1fd8288728c1..0ed5d3ae5b53 100644
--- a/usr.bin/stat/stat.c
+++ b/usr.bin/stat/stat.c
@@ -7,6 +7,8 @@
  * This code is derived from software contributed to The NetBSD Foundation
  * by Andrew Brown.
  *
+ * Copyright (c) 2025 Klara, Inc.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -47,18 +49,19 @@ __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly Exp $"
 #endif /* HAVE_CONFIG_H */
 
 #include <sys/param.h>
-#include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <grp.h>
 #include <limits.h>
 #include <locale.h>
 #include <paths.h>
 #include <pwd.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -178,22 +181,24 @@ __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly Exp $"
 #define SHOW_filename	'N'
 #define SHOW_sizerdev	'Z'
 
-void	usage(const char *);
-void	output(const struct stat *, const char *,
-	    const char *, int, int);
-int	format1(const struct stat *,	/* stat info */
+static void	 usage(const char *);
+static void	 output(const struct stat *, const char *, const char *, int);
+static int	 format1(const struct stat *,	/* stat info */
 	    const char *,		/* the file name */
 	    const char *, int,		/* the format string itself */
 	    char *, size_t,		/* a place to put the output */
 	    int, int, int, int,		/* the parsed format */
 	    int, int);
-int	hex2byte(const char [2]);
+static int	 hex2byte(const char [2]);
 #if HAVE_STRUCT_STAT_ST_FLAGS
-char   *xfflagstostr(unsigned long);
+static char	*xfflagstostr(unsigned long);
 #endif
+static int	 fdlistholes(int, const char *);
+static int	 listholes(const char *);
 
 static const char *timefmt;
 static int linkfail;
+static bool nonl;
 
 #define addchar(s, c, nl) \
 	do { \
@@ -205,20 +210,22 @@ int
 main(int argc, char *argv[])
 {
 	struct stat st;
-	int ch, rc, errs, am_readlink;
-	int lsF, fmtchar, usestat, nfs_handle, fn, nonl, quiet;
-	const char *statfmt, *options, *synopsis;
 	char dname[sizeof _PATH_DEV + SPECNAMELEN] = _PATH_DEV;
-	fhandle_t fhnd;
+	const char *statfmt, *options, *synopsis;
 	const char *file;
+	fhandle_t fhnd;
+	int ch, rc, errs, am_readlink, fn, fmtchar;
+	bool lsF, holes, usestat, nfs_handle, quiet;
 
 	am_readlink = 0;
-	lsF = 0;
+	errs = 0;
+	lsF = false;
 	fmtchar = '\0';
-	usestat = 0;
-	nfs_handle = 0;
-	nonl = 0;
-	quiet = 0;
+	holes = false;
+	usestat = false;
+	nfs_handle = false;
+	nonl = false;
+	quiet = false;
 	linkfail = 0;
 	statfmt = NULL;
 	timefmt = NULL;
@@ -231,28 +238,35 @@ main(int argc, char *argv[])
 		fmtchar = 'f';
 		quiet = 1;
 	} else {
-		options = "f:FHlLnqrst:x";
-		synopsis = "[-FLnq] [-f format | -l | -r | -s | -x] "
+		options = "Ff:HhLlnqrst:x";
+		synopsis = "[-FHhLnq] [-f format | -l | -r | -s | -x] "
 		    "[-t timefmt] [file|handle ...]";
 	}
 
 	while ((ch = getopt(argc, argv, options)) != -1)
 		switch (ch) {
 		case 'F':
-			lsF = 1;
+			lsF = true;
 			break;
                 case 'H':
-			nfs_handle = 1;
+			nfs_handle = true;
+			break;
+		case 'h':
+			holes = true;
 			break;
 		case 'L':
-			usestat = 1;
+			usestat = true;
 			break;
 		case 'n':
-			nonl = 1;
+			nonl = true;
+			break;
+		case 't':
+			timefmt = optarg;
 			break;
 		case 'q':
-			quiet = 1;
+			quiet = true;
 			break;
+		/* remaining cases are purposefully out of order */
 		case 'f':
 			if (am_readlink) {
 				statfmt = "%R";
@@ -269,9 +283,6 @@ main(int argc, char *argv[])
 				    fmtchar, ch);
 			fmtchar = ch;
 			break;
-		case 't':
-			timefmt = optarg;
-			break;
 		default:
 			usage(synopsis);
 		}
@@ -280,6 +291,28 @@ main(int argc, char *argv[])
 	argv += optind;
 	fn = 1;
 
+	if (holes) {
+		if (fmtchar || lsF || nfs_handle || usestat || timefmt)
+			usage(synopsis);
+		if (argc > 0) {
+			while (argc-- > 0) {
+				if (listholes(*argv) != 0) {
+					if (!quiet)
+						warn("%s", *argv);
+					errs++;
+				}
+				argv++;
+			}
+		} else {
+			if (fdlistholes(STDIN_FILENO, "stdin") != 0) {
+				if (!quiet)
+					warn("stdin");
+				errs++;
+			}
+		}
+		exit(errs ? 1 : 0);
+	}
+
 	if (fmtchar == '\0') {
 		if (lsF)
 			fmtchar = 'l';
@@ -318,7 +351,6 @@ main(int argc, char *argv[])
 	if (timefmt == NULL)
 		timefmt = TIME_FORMAT;
 
-	errs = 0;
 	do {
 		if (argc == 0) {
 			if (fdevname_r(STDIN_FILENO, dname +
@@ -361,8 +393,7 @@ main(int argc, char *argv[])
 				    errno == ENOENT &&
 				    (rc = lstat(file, &st)) == -1)
 					errno = ENOENT;
-			}
-			else
+			} else
 				rc = lstat(file, &st);
 		}
 
@@ -371,9 +402,8 @@ main(int argc, char *argv[])
 			linkfail = 1;
 			if (!quiet)
 				warn("%s", file);
-		}
-		else
-			output(&st, file, statfmt, fn, nonl);
+		} else
+			output(&st, file, statfmt, fn);
 
 		argv++;
 		argc--;
@@ -387,7 +417,7 @@ main(int argc, char *argv[])
 /*
  * fflagstostr() wrapper that leaks only once
  */
-char *
+static char *
 xfflagstostr(unsigned long fflags)
 {
 	static char *str = NULL;
@@ -402,10 +432,9 @@ xfflagstostr(unsigned long fflags)
 }
 #endif /* HAVE_STRUCT_STAT_ST_FLAGS */
 
-void
+static void
 usage(const char *synopsis)
 {
-
 	(void)fprintf(stderr, "usage: %s %s\n", getprogname(), synopsis);
 	exit(1);
 }
@@ -413,9 +442,8 @@ usage(const char *synopsis)
 /* 
  * Parses a format string.
  */
-void
-output(const struct stat *st, const char *file,
-    const char *statfmt, int fn, int nonl)
+static void
+output(const struct stat *st, const char *file, const char *statfmt, int fn)
 {
 	int flags, size, prec, ofmt, hilo, what;
 	char buf[PATH_MAX + 4 + 1];
@@ -606,7 +634,7 @@ output(const struct stat *st, const char *file,
 /*
  * Arranges output according to a single parsed format substring.
  */
-int
+static int
 format1(const struct stat *st,
     const char *file,
     const char *fmt, int flen,
@@ -1073,7 +1101,7 @@ format1(const struct stat *st,
 	(void)strcat(lfmt, "ll");
 	switch (ofmt) {
 	case FMTF_DECIMAL:	(void)strcat(lfmt, "d");	break;
-	case FMTF_OCTAL:		(void)strcat(lfmt, "o");	break;
+	case FMTF_OCTAL:	(void)strcat(lfmt, "o");	break;
 	case FMTF_UNSIGNED:	(void)strcat(lfmt, "u");	break;
 	case FMTF_HEX:		(void)strcat(lfmt, "x");	break;
 	}
@@ -1083,9 +1111,75 @@ format1(const struct stat *st,
 
 
 #define hex2nibble(c) (c <= '9' ? c - '0' : toupper(c) - 'A' + 10)
-int
+static int
 hex2byte(const char c[2]) {
 	if (!(ishexnumber(c[0]) && ishexnumber(c[1])))
 		return -1;
 	return (hex2nibble(c[0]) << 4) + hex2nibble(c[1]);
 }
+
+static int
+fdlistholes(int fd, const char *fn)
+{
+	struct stat sb;
+	off_t pos = 0, off;
+	long l;
+
+	if (fstat(fd, &sb) < 0)
+		return (-1);
+	if (S_ISDIR(sb.st_mode)) {
+		if ((l = fpathconf(fd, _PC_MIN_HOLE_SIZE)) < 0)
+			return (-1);
+		printf("%ld", l);
+	} else if (!S_ISREG(sb.st_mode)) {
+		errno = ESPIPE;
+		return (-1);
+	} else {
+		for (;;) {
+			if ((off = lseek(fd, pos, SEEK_HOLE)) < 0) {
+				if (errno != ENXIO)
+					return (-1);
+				/*
+				 * This can only happen if the file was
+				 * truncated while we were scanning it, or
+				 * on the initial seek if the file is
+				 * empty.  Report the virtual hole at the
+				 * end of the file at this position.
+				 */
+				off = pos;
+			}
+			printf("%jd", (intmax_t)off);
+			pos = off;
+			if ((off = lseek(fd, pos, SEEK_DATA)) < 0) {
+				if (errno != ENXIO)
+					return (-1);
+				/*
+				 * There are no more data regions in the
+				 * file, or it got truncated.  However, we
+				 * may not be at the end yet.
+				 */
+				if ((off = lseek(fd, 0, SEEK_END)) > pos)
+					printf("-%jd", (intmax_t)off - 1);
+				break;
+			}
+			printf("-%jd,", (intmax_t)off - 1);
+			pos = off;
+		}
+	}
+	printf(" %s", fn);
+	if (!nonl)
+		printf("\n");
+	return (0);
+}
+
+static int
+listholes(const char *fn)
+{
+	int fd, ret;
+
+	if ((fd = open(fn, O_RDONLY)) < 0)
+		return (-1);
+	ret = fdlistholes(fd, fn);
+	close(fd);
+	return (ret);
+}
diff --git a/usr.bin/stat/tests/stat_test.sh b/usr.bin/stat/tests/stat_test.sh
index e75fd0c56490..afe698575034 100755
--- a/usr.bin/stat/tests/stat_test.sh
+++ b/usr.bin/stat/tests/stat_test.sh
@@ -1,6 +1,7 @@
 #
 # Copyright (c) 2017 Dell EMC
 # All rights reserved.
+# Copyright (c) 2025 Klara, Inc.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -45,6 +46,76 @@ F_flag_body()
 	atf_check -o match:'.* f\|' stat -Fn f
 }
 
+atf_test_case h_flag cleanup
+h_flag_head()
+{
+	atf_set "descr" "Verify the output format for -h"
+	atf_set "require.user" "root"
+}
+h_flag_body()
+{
+	# POSIX defines a hole as “[a] contiguous region of bytes
+	# within a file, all having the value of zero” and requires
+	# that “all seekable files shall have a virtual hole starting
+	# at the current size of the file” but says “it is up to the
+	# implementation to define when sparse files can be created
+	# and with what granularity for the size of holes”.  It also
+	# defines a sparse file as “[a] file that contains more holes
+	# than just the virtual hole at the end of the file”.  That's
+	# pretty much the extent of its discussion of holes, apart
+	# from the description of SEEK_HOLE and SEEK_DATA in the lseek
+	# manual page.  In other words, there is no portable way to
+	# reliably create a hole in a file on any given file system.
+	#
+	# On FreeBSD, this test is likely to run on either tmpfs, ufs
+	# (ffs2), or zfs.  Of those three, only tmpfs has predictable
+	# semantics and supports all possible configurations (the
+	# minimum hole size on zfs is variable for small files, and
+	# ufs will not allow a file to end in a hole).
+	atf_check mkdir mnt
+	atf_check mount -t tmpfs tmpfs mnt
+	cd mnt
+
+	# For a directory, prints the minimum hole size, which on
+	# tmpfs is the system page size.
+	ps=$(sysctl -n hw.pagesize)
+	atf_check -o inline:"$((ps)) .\n" stat -h .
+	atf_check -o inline:"$((ps)) ." stat -hn .
+
+	# For a file, prints a list of holes.
+	atf_check truncate -s 0 foo
+	atf_check -o inline:"0 foo" \
+	    stat -hn foo
+	atf_check truncate -s "$((ps))" foo
+	atf_check -o inline:"0-$((ps-1)) foo" \
+	    stat -hn foo
+	atf_check dd status=none if=/COPYRIGHT of=foo \
+	    oseek="$((ps))" bs=1 count=1
+	atf_check -o inline:"0-$((ps-1)),$((ps+1)) foo" \
+	    stat -hn foo
+	atf_check truncate -s "$((ps*3))" foo
+	atf_check -o inline:"0-$((ps-1)),$((ps*2))-$((ps*3-1)) foo" \
+	    stat -hn foo
+
+	# Test multiple files.
+	atf_check dd status=none if=/COPYRIGHT of=bar
+	sz=$(stat -f%z bar)
+	atf_check -o inline:"0-$((ps-1)),$((ps*2))-$((ps*3-1)) foo
+$((sz)) bar
+" \
+	    stat -h foo bar
+
+	# For a device, fail.
+	atf_check -s exit:1 -e match:"/dev/null: Illegal seek" \
+	    stat -h /dev/null
+}
+h_flag_cleanup()
+{
+	if [ -d mnt ]; then
+		umount mnt || true
+	fi
+}
+
 atf_test_case l_flag
 l_flag_head()
 {
@@ -233,6 +304,7 @@ atf_init_test_cases()
 {
 	atf_add_test_case F_flag
 	#atf_add_test_case H_flag
+	atf_add_test_case h_flag
 	#atf_add_test_case L_flag
 	#atf_add_test_case f_flag
 	atf_add_test_case l_flag