git: 127f4dd60bed - stable/13 - gzip: Add support for decompressing zstd files.

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Wed, 17 Jan 2024 15:56:15 UTC
The branch stable/13 has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=127f4dd60bed2af6ab6b99560e7cce2e5ac86772

commit 127f4dd60bed2af6ab6b99560e7cce2e5ac86772
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2022-11-02 13:41:44 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2024-01-17 15:52:31 +0000

    gzip: Add support for decompressing zstd files.
    
    Sponsored by:   Klara, Inc.
    Differential Revision: https://reviews.freebsd.org/D37236
    
    (cherry picked from commit f9349d427431eaa5b3bd7e9e7e87725a138facd1)
---
 usr.bin/gzip/Makefile        |  4 +-
 usr.bin/gzip/Makefile.depend |  1 +
 usr.bin/gzip/gzip.1          |  4 +-
 usr.bin/gzip/gzip.c          | 60 ++++++++++++++++++++++-------
 usr.bin/gzip/unzstd.c        | 89 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 143 insertions(+), 15 deletions(-)

diff --git a/usr.bin/gzip/Makefile b/usr.bin/gzip/Makefile
index b9e6d84f436e..33fbdb85d78c 100644
--- a/usr.bin/gzip/Makefile
+++ b/usr.bin/gzip/Makefile
@@ -5,7 +5,7 @@
 PROG=		gzip
 MAN=		gzip.1 gzexe.1 zdiff.1 zforce.1 zmore.1 znew.1
 
-LIBADD=		z lzma
+LIBADD=		z lzma zstd
 
 .if ${MK_BZIP2_SUPPORT} != "no"
 LIBADD+=	bz2
@@ -13,6 +13,8 @@ LIBADD+=	bz2
 CFLAGS+=	-DNO_BZIP2_SUPPORT
 .endif
 
+CFLAGS+=	-I${SRCTOP}/sys/contrib/zstd/lib
+
 SCRIPTS=	gzexe zdiff zforce zmore znew
 
 MLINKS+=	gzip.1 gunzip.1 \
diff --git a/usr.bin/gzip/Makefile.depend b/usr.bin/gzip/Makefile.depend
index 413518057565..7c13917770d9 100644
--- a/usr.bin/gzip/Makefile.depend
+++ b/usr.bin/gzip/Makefile.depend
@@ -9,6 +9,7 @@ DIRDEPS = \
 	lib/liblzma \
 	lib/libthr \
 	lib/libz \
+	lib/libzstd \
 
 
 .include <dirdeps.mk>
diff --git a/usr.bin/gzip/gzip.1 b/usr.bin/gzip/gzip.1
index 8677edf2751a..afb1147f3dfe 100644
--- a/usr.bin/gzip/gzip.1
+++ b/usr.bin/gzip/gzip.1
@@ -23,7 +23,7 @@
 .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
-.Dd January 7, 2019
+.Dd November 2, 2022
 .Dt GZIP 1
 .Os
 .Sh NAME
@@ -108,6 +108,7 @@ is also capable of decompressing files compressed using
 .Xr compress 1 ,
 .Xr bzip2 1 ,
 .Ar lzip ,
+.Xr zstd 1 ,
 or
 .Xr xz 1 .
 .Sh OPTIONS
@@ -201,6 +202,7 @@ Report progress to standard error.
 .Sh SEE ALSO
 .Xr bzip2 1 ,
 .Xr compress 1 ,
+.Xr zstd 1 ,
 .Xr xz 1 ,
 .Xr fts 3 ,
 .Xr zlib 3
diff --git a/usr.bin/gzip/gzip.c b/usr.bin/gzip/gzip.c
index 873d571f2b9b..8eddac4adcdd 100644
--- a/usr.bin/gzip/gzip.c
+++ b/usr.bin/gzip/gzip.c
@@ -85,6 +85,9 @@ enum filetype {
 #endif
 #ifndef NO_LZ_SUPPORT
 	FT_LZ,
+#endif
+#ifndef NO_ZSTD_SUPPORT
+	FT_ZSTD,
 #endif
 	FT_LAST,
 	FT_UNKNOWN
@@ -117,6 +120,12 @@ enum filetype {
 #define LZ_MAGIC	"LZIP"
 #endif
 
+#ifndef NO_ZSTD_SUPPORT
+#include <zstd.h>
+#define ZSTD_SUFFIX	".zst"
+#define ZSTD_MAGIC	"\050\265\057\375"
+#endif
+
 #define GZ_SUFFIX	".gz"
 
 #define BUFLEN		(64 * 1024)
@@ -163,6 +172,9 @@ static suffixes_t suffixes[] = {
 #endif
 #ifndef NO_LZ_SUPPORT
 	SUFFIX(LZ_SUFFIX,	""),
+#endif
+#ifndef NO_ZSTD_SUPPORT
+	SUFFIX(ZSTD_SUFFIX,	""),
 #endif
 	SUFFIX(GZ_SUFFIX,	""),	/* Overwritten by -S "" */
 #undef SUFFIX
@@ -220,7 +232,7 @@ static	const char *infile;		/* name of file coming in */
 
 static	void	maybe_err(const char *fmt, ...) __printflike(1, 2) __dead2;
 #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) ||	\
-    !defined(NO_XZ_SUPPORT)
+    !defined(NO_XZ_SUPPORT) || !defined(NO_ZSTD_SUPPORT)
 static	void	maybe_errx(const char *fmt, ...) __printflike(1, 2) __dead2;
 #endif
 static	void	maybe_warn(const char *fmt, ...) __printflike(1, 2);
@@ -283,6 +295,10 @@ static	off_t	unxz_len(int);
 static	off_t	unlz(int, int, char *, size_t, off_t *);
 #endif
 
+#ifndef NO_ZSTD_SUPPORT
+static	off_t	unzstd(int, int, char *, size_t, off_t *);
+#endif
+
 static const struct option longopts[] = {
 	{ "stdout",		no_argument,		0,	'c' },
 	{ "to-stdout",		no_argument,		0,	'c' },
@@ -466,7 +482,7 @@ maybe_err(const char *fmt, ...)
 }
 
 #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) ||	\
-    !defined(NO_XZ_SUPPORT)
+    !defined(NO_XZ_SUPPORT) || !defined(NO_ZSTD_SUPPORT)
 /* ... without an errno. */
 void
 maybe_errx(const char *fmt, ...)
@@ -1097,33 +1113,32 @@ file_gettype(u_char *buf)
 	if (buf[0] == GZIP_MAGIC0 &&
 	    (buf[1] == GZIP_MAGIC1 || buf[1] == GZIP_OMAGIC1))
 		return FT_GZIP;
-	else
 #ifndef NO_BZIP2_SUPPORT
-	if (memcmp(buf, BZIP2_MAGIC, 3) == 0 &&
+	else if (memcmp(buf, BZIP2_MAGIC, 3) == 0 &&
 	    buf[3] >= '0' && buf[3] <= '9')
 		return FT_BZIP2;
-	else
 #endif
 #ifndef NO_COMPRESS_SUPPORT
-	if (memcmp(buf, Z_MAGIC, 2) == 0)
+	else if (memcmp(buf, Z_MAGIC, 2) == 0)
 		return FT_Z;
-	else
 #endif
 #ifndef NO_PACK_SUPPORT
-	if (memcmp(buf, PACK_MAGIC, 2) == 0)
+	else if (memcmp(buf, PACK_MAGIC, 2) == 0)
 		return FT_PACK;
-	else
 #endif
 #ifndef NO_XZ_SUPPORT
-	if (memcmp(buf, XZ_MAGIC, 4) == 0)	/* XXX: We only have 4 bytes */
+	else if (memcmp(buf, XZ_MAGIC, 4) == 0)	/* XXX: We only have 4 bytes */
 		return FT_XZ;
-	else
 #endif
 #ifndef NO_LZ_SUPPORT
-	if (memcmp(buf, LZ_MAGIC, 4) == 0)
+	else if (memcmp(buf, LZ_MAGIC, 4) == 0)
 		return FT_LZ;
-	else
 #endif
+#ifndef NO_ZSTD_SUPPORT
+	else if (memcmp(buf, ZSTD_MAGIC, 4) == 0)
+		return FT_ZSTD;
+#endif
+	else
 		return FT_UNKNOWN;
 }
 
@@ -1585,6 +1600,16 @@ file_uncompress(char *file, char *outfile, size_t outsize)
 		size = unlz(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
+
+#ifndef NO_ZSTD_SUPPORT
+	case FT_ZSTD:
+		if (lflag) {
+			maybe_warnx("no -l with zstd files");
+			goto lose;
+		}
+		size = unzstd(fd, zfd, NULL, 0, NULL);
+		break;
+#endif
 	case FT_UNKNOWN:
 		if (lflag) {
 			maybe_warnx("no -l for unknown filetypes");
@@ -1812,6 +1837,12 @@ handle_stdin(void)
 		usize = unlz(STDIN_FILENO, STDOUT_FILENO,
 			     (char *)fourbytes, sizeof fourbytes, &gsize);
 		break;
+#endif
+#ifndef NO_ZSTD_SUPPORT
+	case FT_ZSTD:
+		usize = unzstd(STDIN_FILENO, STDOUT_FILENO,
+			       (char *)fourbytes, sizeof fourbytes, &gsize);
+		break;
 #endif
 	}
 
@@ -2187,6 +2218,9 @@ display_version(void)
 #ifndef NO_LZ_SUPPORT
 #include "unlz.c"
 #endif
+#ifndef NO_ZSTD_SUPPORT
+#include "unzstd.c"
+#endif
 
 static ssize_t
 read_retry(int fd, void *buf, size_t sz)
diff --git a/usr.bin/gzip/unzstd.c b/usr.bin/gzip/unzstd.c
new file mode 100644
index 000000000000..4536f3119ace
--- /dev/null
+++ b/usr.bin/gzip/unzstd.c
@@ -0,0 +1,89 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2022 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* This file is #included by gzip.c */
+
+static off_t
+unzstd(int in, int out, char *pre, size_t prelen, off_t *bytes_in)
+{
+	static char *ibuf, *obuf;
+	ZSTD_inBuffer zib;
+	ZSTD_outBuffer zob;
+	ZSTD_DCtx *zds;
+	ssize_t res;
+	size_t zres;
+	size_t bytes_out = 0;
+	int eof = 0;
+
+	if (ibuf == NULL)
+		ibuf = malloc(BUFLEN);
+	if (obuf == NULL)
+		obuf = malloc(BUFLEN);
+	if (ibuf == NULL || obuf == NULL)
+		maybe_err("malloc");
+
+	zds = ZSTD_createDStream();
+	ZSTD_initDStream(zds);
+
+	zib.src = pre;
+	zib.size = prelen;
+	zib.pos = 0;
+	if (bytes_in != NULL)
+		*bytes_in = prelen;
+	zob.dst = obuf;
+	zob.size = BUFLEN;
+	zob.pos = 0;
+
+	while (!eof) {
+		if (zib.pos >= zib.size) {
+			res = read(in, ibuf, BUFLEN);
+			if (res < 0)
+				maybe_err("read");
+			if (res == 0)
+				eof = 1;
+			infile_newdata(res);
+			zib.src = ibuf;
+			zib.size = res;
+			zib.pos = 0;
+			if (bytes_in != NULL)
+				*bytes_in += res;
+		}
+		zres = ZSTD_decompressStream(zds, &zob, &zib);
+		if (ZSTD_isError(zres)) {
+			maybe_errx("%s", ZSTD_getErrorName(zres));
+		}
+		if (zob.pos > 0) {
+			res = write(out, obuf, zob.pos);
+			if (res < 0)
+				maybe_err("write");
+			zob.pos = 0;
+			bytes_out += res;
+		}
+	}
+	ZSTD_freeDStream(zds);
+	return (bytes_out);
+}