git: 906748d208d3 - main - newsyslog(8): Add option to globally override compression method.

From: Xin LI <delphij_at_FreeBSD.org>
Date: Sat, 23 Dec 2023 06:53:06 UTC
The branch main has been updated by delphij:

URL: https://cgit.FreeBSD.org/src/commit/?id=906748d208d356a59538dc009a8ad5db040968d9

commit 906748d208d356a59538dc009a8ad5db040968d9
Author:     Xin LI <delphij@FreeBSD.org>
AuthorDate: 2023-12-23 06:46:33 +0000
Commit:     Xin LI <delphij@FreeBSD.org>
CommitDate: 2023-12-23 06:46:33 +0000

    newsyslog(8): Add option to globally override compression method.
    
    Historically, newsyslog compressed rotated log files to save disk space.
    This was useful in the early days. However, with modern file systems like
    ZFS offering native compression, and with the availability of larger hard
    drives, the benefits of additional compression have become less significant.
    This is particularly true considering the inconvenience of decompressing
    log files when searching for specific patterns.
    
    Additionally, the original implementation of compression methods was not
    future-proof.  As a result, we have redefined the J, X, Y, Z flags to
    signify "treat the file as compressible" rather than "compress the file
    with that specific method."
    
    A new command-line option, -c, has been introduced to allow overriding
    these settings in a more future-proof way. The available choices are:
    
     * none                  - do not compress, regardless of flag.
     * legacy                - historical behavior: J=bzip2, X=xz, Y=zstd, Z=gzip.
     * bzip2, xz, zstd, gzip - apply the specified compression method.
    
    Currently, the default is set to 'legacy' to preserve historical behavior.
    However, our intention is to change this default to 'none' in FreeBSD 15.0.
    
    Additionally, this update changes the default settings for zstd to use
    multithreading and long-range options, better aligning with its intended
    use.
    
    Inspired by D42961 .
    
    Reviewed by:    debdrup (earlier version, mdoc(7))
    MFC after:      1 week
    Differential Revision: https://reviews.freebsd.org/D43165
---
 usr.sbin/newsyslog/newsyslog.8      |  22 +++++++-
 usr.sbin/newsyslog/newsyslog.c      | 110 ++++++++++++++++++++++++++++--------
 usr.sbin/newsyslog/newsyslog.conf.5 |  30 +++++++---
 3 files changed, 129 insertions(+), 33 deletions(-)

diff --git a/usr.sbin/newsyslog/newsyslog.8 b/usr.sbin/newsyslog/newsyslog.8
index ed1a917d2ce2..6d4fc378e790 100644
--- a/usr.sbin/newsyslog/newsyslog.8
+++ b/usr.sbin/newsyslog/newsyslog.8
@@ -14,7 +14,7 @@
 .\" the suitability of this software for any purpose.  It is
 .\" provided "as is" without express or implied warranty.
 .\"
-.Dd November 10, 2018
+.Dd December 22, 2023
 .Dt NEWSYSLOG 8
 .Os
 .Sh NAME
@@ -24,6 +24,7 @@
 .Nm
 .Op Fl CFNPnrsv
 .Op Fl a Ar directory
+.Op Fl c Ar none Ns | Ns Ar legacy Ns | Ns Ar bzip2 Ns | Ns Ar gzip Ns | Ns Ar xz Ns | Ns Ar zstd
 .Op Fl d Ar directory
 .Op Fl f Ar config_file
 .Op Fl S Ar pidfile
@@ -78,6 +79,25 @@ and mode three (above) assumes that this is so.
 The following options can be used with
 .Nm :
 .Bl -tag -width indent
+.It Fl c Ar none Ns | Ns Ar legacy Ns | Ns Ar bzip2 Ns | Ns Ar gzip Ns | Ns Ar xz Ns | Ns Ar zstd
+Instructs
+.Nm
+to use the specified compression method when a file is flagged for compression.
+The default method is
+.Dq legacy ,
+which interprets the
+.Sy J, X, Y, Z
+flags in the configuration file according to their historical meanings.
+This default setting can be overridden by specifying
+.Fl c Ar none ,
+which causes
+.Nm
+to ignore all compression flags.
+Alternatively, specifying one of the compression methods:
+.Sy bzip2 , gzip , xz ,
+or
+.Sy zstd ,
+will apply the chosen method to all files flagged for compression.
 .It Fl f Ar config_file
 Instruct
 .Nm
diff --git a/usr.sbin/newsyslog/newsyslog.c b/usr.sbin/newsyslog/newsyslog.c
index c867237d212c..a08e7b903707 100644
--- a/usr.sbin/newsyslog/newsyslog.c
+++ b/usr.sbin/newsyslog/newsyslog.c
@@ -88,13 +88,15 @@
 /*
  * Compression types
  */
-#define	COMPRESS_TYPES  5	/* Number of supported compression types */
-
-#define	COMPRESS_NONE	0
-#define	COMPRESS_GZIP	1
-#define	COMPRESS_BZIP2	2
-#define	COMPRESS_XZ	3
-#define COMPRESS_ZSTD	4
+enum compress_types_enum {
+	COMPRESS_NONE	= 0,
+	COMPRESS_GZIP	= 1,
+	COMPRESS_BZIP2	= 2,
+	COMPRESS_XZ	= 3,
+	COMPRESS_ZSTD	= 4,
+	COMPRESS_LEGACY = 5,			/* Special: use legacy type */
+	COMPRESS_TYPES = COMPRESS_LEGACY	/* Number of supported compression types */
+};
 
 /*
  * Bit-values for the 'flags' parsed from a config-file entry.
@@ -127,6 +129,7 @@
 #define	MAX_OLDLOGS 65536	/* Default maximum number of old logfiles */
 
 struct compress_types {
+	const char *name;	/* Name of compression type */
 	const char *flag;	/* Flag in configuration file */
 	const char *suffix;	/* Compression suffix */
 	const char *path;	/* Path to compression program */
@@ -137,14 +140,29 @@ struct compress_types {
 static const char *gzip_flags[] = { "-f" };
 #define bzip2_flags gzip_flags
 #define xz_flags gzip_flags
-static const char *zstd_flags[] = { "-q", "--rm" };
-
-static const struct compress_types compress_type[COMPRESS_TYPES] = {
-	{ "", "", "", NULL, 0 },
-	{ "Z", ".gz", _PATH_GZIP, gzip_flags, nitems(gzip_flags) },
-	{ "J", ".bz2", _PATH_BZIP2, bzip2_flags, nitems(bzip2_flags) },
-	{ "X", ".xz", _PATH_XZ, xz_flags, nitems(xz_flags) },
-	{ "Y", ".zst", _PATH_ZSTD, zstd_flags, nitems(zstd_flags) }
+static const char *zstd_flags[] = { "-q", "-T0", "--adapt", "--long", "--rm" };
+
+static struct compress_types compress_type[COMPRESS_TYPES] = {
+	[COMPRESS_NONE] = {
+		.name = "none", .flag = "", .suffix = "",
+		.path = "", .flags = NULL, .nflags = 0
+	},
+	[COMPRESS_GZIP] = {
+		.name = "gzip", .flag = "Z", .suffix = ".gz",
+		.path = _PATH_GZIP, .flags = gzip_flags, .nflags = nitems(gzip_flags)
+	},
+	[COMPRESS_BZIP2] = {
+		.name = "bzip2", .flag = "J", .suffix = ".bz2",
+		.path = _PATH_BZIP2, .flags = bzip2_flags, .nflags = nitems(bzip2_flags)
+	},
+	[COMPRESS_XZ] = {
+		.name = "xz", .flag = "X", .suffix = ".xz",
+		.path = _PATH_XZ, .flags = xz_flags, .nflags = nitems(xz_flags)
+	},
+	[COMPRESS_ZSTD] = {
+		.name = "zstd", .flag = "Y", .suffix = ".zst",
+		.path = _PATH_ZSTD, .flags = zstd_flags, .nflags = nitems(zstd_flags)
+	},
 };
 
 struct conf_entry {
@@ -229,6 +247,7 @@ static char *timefnamefmt = NULL;/* Use time based filenames instead of .0 */
 static char *archdirname;	/* Directory path to old logfiles archive */
 static char *destdir = NULL;	/* Directory to treat at root for logs */
 static const char *conf;	/* Configuration file to use */
+static enum compress_types_enum compress_type_override = COMPRESS_LEGACY;	/* Compression type */
 
 struct ptime_data *dbg_timenow;	/* A "timenow" value set via -D option */
 static struct ptime_data *timenow; /* The time to use for checking at-fields */
@@ -628,7 +647,7 @@ do_entry(struct conf_entry * ent)
 static void
 parse_args(int argc, char **argv)
 {
-	int ch;
+	int ch, i;
 	char *p;
 
 	timenow = ptime_init(NULL);
@@ -641,12 +660,28 @@ parse_args(int argc, char **argv)
 	hostname_shortlen = strcspn(hostname, ".");
 
 	/* Parse command line options. */
-	while ((ch = getopt(argc, argv, "a:d:f:nrst:vCD:FNPR:S:")) != -1)
+	while ((ch = getopt(argc, argv, "a:c:d:f:nrst:vCD:FNPR:S:")) != -1)
 		switch (ch) {
 		case 'a':
 			archtodir++;
 			archdirname = optarg;
 			break;
+		case 'c':
+			for (i = 0; i < COMPRESS_TYPES; i++) {
+				if (strcmp(optarg, compress_type[i].name) == 0) {
+					compress_type_override = i;
+					break;
+				}
+			}
+			if (i == COMPRESS_TYPES) {
+				if (strcmp(optarg, "legacy") == 0)
+					compress_type_override = COMPRESS_LEGACY;
+				else {
+					warnx("Unrecognized compression method '%s'.", optarg);
+					usage();
+				}
+			}
+			break;
 		case 'd':
 			destdir = optarg;
 			break;
@@ -791,10 +826,26 @@ parse_doption(const char *doption)
 static void
 usage(void)
 {
+	int i;
+	char *alltypes = NULL, *tmp = NULL;
+
+	for (i = 0; i < COMPRESS_TYPES; i++) {
+		if (i == COMPRESS_NONE) {
+			(void)asprintf(&tmp, "%s|legacy", compress_type[i].name);
+		} else {
+			(void)asprintf(&tmp, "%s|%s", alltypes, compress_type[i].name);
+		}
+		if (alltypes)
+			free(alltypes);
+		alltypes = tmp;
+		tmp = NULL;
+	}
 
 	fprintf(stderr,
-	    "usage: newsyslog [-CFNPnrsv] [-a directory] [-d directory] [-f config_file]\n"
-	    "                 [-S pidfile] [-t timefmt] [[-R tagname] file ...]\n");
+	    "usage: newsyslog [-CFNPnrsv] [-a directory] [-c %s]\n"
+	    "                 [-d directory] [-f config_file]\n"
+	    "                 [-S pidfile] [-t timefmt] [[-R tagname] file ...]\n",
+	    alltypes);
 	exit(1);
 }
 
@@ -1302,7 +1353,10 @@ no_trimat:
 				working->flags |= CE_GLOB;
 				break;
 			case 'j':
-				working->compress = COMPRESS_BZIP2;
+				if (compress_type_override == COMPRESS_LEGACY)
+					working->compress = COMPRESS_BZIP2;
+				else
+					working->compress = compress_type_override;
 				break;
 			case 'n':
 				working->flags |= CE_NOSIGNAL;
@@ -1323,13 +1377,22 @@ no_trimat:
 				/* Deprecated flag - keep for compatibility purposes */
 				break;
 			case 'x':
-				working->compress = COMPRESS_XZ;
+				if (compress_type_override == COMPRESS_LEGACY)
+					working->compress = COMPRESS_XZ;
+				else
+					working->compress = compress_type_override;
 				break;
 			case 'y':
-				working->compress = COMPRESS_ZSTD;
+				if (compress_type_override == COMPRESS_LEGACY)
+					working->compress = COMPRESS_ZSTD;
+				else
+					working->compress = compress_type_override;
 				break;
 			case 'z':
-				working->compress = COMPRESS_GZIP;
+				if (compress_type_override == COMPRESS_LEGACY)
+					working->compress = COMPRESS_GZIP;
+				else
+					working->compress = compress_type_override;
 				break;
 			case '-':
 				break;
@@ -2035,6 +2098,7 @@ do_zipwork(struct zipwork_entry *zwork)
 	assert(zwork->zw_conf != NULL);
 	assert(zwork->zw_conf->compress > COMPRESS_NONE);
 	assert(zwork->zw_conf->compress < COMPRESS_TYPES);
+	assert(zwork->zw_conf->compress != COMPRESS_LEGACY);
 
 	if (zwork->zw_swork != NULL && zwork->zw_swork->sw_runcmd == 0 &&
 	    zwork->zw_swork->sw_pidok <= 0) {
diff --git a/usr.sbin/newsyslog/newsyslog.conf.5 b/usr.sbin/newsyslog/newsyslog.conf.5
index 3ca833f90196..a53af5b38319 100644
--- a/usr.sbin/newsyslog/newsyslog.conf.5
+++ b/usr.sbin/newsyslog/newsyslog.conf.5
@@ -18,7 +18,7 @@
 .\" the suitability of this software for any purpose.  It is
 .\" provided "as is" without express or implied warranty.
 .\"
-.Dd February 26, 2021
+.Dd December 22, 2023
 .Dt NEWSYSLOG.CONF 5
 .Os
 .Sh NAME
@@ -308,8 +308,11 @@ for details on syntax and matching rules.
 .It Cm J
 indicates that
 .Xr newsyslog 8
-should attempt to save disk space by compressing the rotated
-log file using
+should consider the rotated log file as compressible.
+In legacy mode,
+this also tells
+.Xr newsyslog 8
+to use
 .Xr bzip2 1 .
 .It Cm N
 indicates that there is no process which needs to be signaled
@@ -337,20 +340,29 @@ be a negative value to distinguish it from a process ID.
 .It Cm X
 indicates that
 .Xr newsyslog 8
-should attempt to save disk space by compressing the rotated
-log file using
+should consider the rotated log file as compressible.
+In legacy mode,
+this also tells
+.Xr newsyslog 8
+to use
 .Xr xz 1 .
 .It Cm Y
 indicates that
 .Xr newsyslog 8
-should attempt to save disk space by compressing the rotated
-log file using
+should consider the rotated log file as compressible.
+In legacy mode,
+this also tells
+.Xr newsyslog 8
+to use
 .Xr zstd 1 .
 .It Cm Z
 indicates that
 .Xr newsyslog 8
-should attempt to save disk space by compressing the rotated
-log file using
+should consider the rotated log file as compressible.
+In legacy mode,
+this also tells
+.Xr newsyslog 8
+to use
 .Xr gzip 1 .
 .It Fl
 a minus sign will not cause any special processing, but it