git: 181e9683778e - stable/14 - sdiff: Misc cleanup.

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Sat, 24 Feb 2024 12:15:03 UTC
The branch stable/14 has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=181e9683778ee4169d8e54e7330d4ca5281701f6

commit 181e9683778ee4169d8e54e7330d4ca5281701f6
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2024-02-18 17:39:23 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2024-02-24 12:12:49 +0000

    sdiff: Misc cleanup.
    
    MFC after:      1 week
    Sponsored by:   Klara, Inc.
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D43943
    
    (cherry picked from commit 3cc86989bfbe27c91b5db592c2af33fef153e230)
    
    sdiff: Fix --expand-tabs and --tabsize.
    
    MFC after:      1 week
    Sponsored by:   Klara, Inc.
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D43941
    
    (cherry picked from commit a834edfccd14a8c0f152a3b0078469af8e05f3fd)
    
    sdiff: Fix binary case.
    
    MFC after:      1 week
    Sponsored by:   Klara, Inc.
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D43942
    
    (cherry picked from commit ad7bef8b890768e68a48bbfa6b92ebf635068504)
---
 usr.bin/sdiff/sdiff.1             |   4 +-
 usr.bin/sdiff/sdiff.c             | 232 +++++++++++++++++++++-----------------
 usr.bin/sdiff/tests/sdiff_test.sh |  36 ++++++
 3 files changed, 164 insertions(+), 108 deletions(-)

diff --git a/usr.bin/sdiff/sdiff.1 b/usr.bin/sdiff/sdiff.1
index ef9bb95a0990..ca6594c6479a 100644
--- a/usr.bin/sdiff/sdiff.1
+++ b/usr.bin/sdiff/sdiff.1
@@ -3,7 +3,7 @@
 .\" Written by Raymond Lai <ray@cyth.net>.
 .\" Public domain.
 .\"
-.Dd April 8, 2017
+.Dd February 16, 2024
 .Dt SDIFF 1
 .Os
 .Sh NAME
@@ -117,8 +117,6 @@ Ignore all spaces.
 Ignore blank lines.
 .It Fl E -ignore-tab-expansion
 Treat tabs and eight spaces as the same.
-.It Fl t -ignore-tabs
-Ignore tabs.
 .It Fl H -speed-large-files
 Assume scattered small changes in a large file.
 .It Fl -ignore-file-name-case
diff --git a/usr.bin/sdiff/sdiff.c b/usr.bin/sdiff/sdiff.c
index b863d5875db6..6402b85017a7 100644
--- a/usr.bin/sdiff/sdiff.c
+++ b/usr.bin/sdiff/sdiff.c
@@ -9,7 +9,6 @@
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/stat.h>
-#include <sys/types.h>
 #include <sys/wait.h>
 
 #include <ctype.h>
@@ -19,6 +18,7 @@
 #include <getopt.h>
 #include <limits.h>
 #include <paths.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -51,7 +51,7 @@ static void astrcat(char **, const char *);
 static void enqueue(char *, char, char *);
 static char *mktmpcpy(const char *);
 static int istextfile(FILE *);
-static void binexec(char *, char *, char *) __dead2;
+static int bindiff(FILE *, char *, FILE *, char *);
 static void freediff(struct diffline *);
 static void int_usage(void);
 static int parsecmd(FILE *, FILE *, FILE *);
@@ -69,11 +69,13 @@ static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
 static size_t line_width;	/* width of a line (two columns and divider) */
 static size_t width;		/* width of each column */
 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
-static int Iflag = 0;	/* ignore sets matching regexp */
-static int	lflag;		/* print only left column for identical lines */
-static int	sflag;		/* skip identical lines */
-FILE *outfp;		/* file to save changes to */
-const char *tmpdir;	/* TMPDIR or /tmp */
+static bool Iflag;		/* ignore sets matching regexp */
+static bool lflag;		/* print only left column for identical lines */
+static bool sflag;		/* skip identical lines */
+static bool tflag;		/* expand tabs */
+static int tabsize = 8;		/* tab size */
+FILE *outfp;			/* file to save changes to */
+const char *tmpdir;		/* TMPDIR or /tmp */
 
 enum {
 	HELP_OPT = CHAR_MAX + 1,
@@ -127,7 +129,7 @@ static const char *help_msg[] = {
 	"\t-d, --minimal: minimize diff size.",
 	"\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
 	"\t-i, --ignore-case: do a case-insensitive comparison.",
-	"\t-t, --expand-tabs: sxpand tabs to spaces.",
+	"\t-t, --expand-tabs: expand tabs to spaces.",
 	"\t-W, --ignore-all-spaces: ignore all spaces.",
 	"\t--speed-large-files: assume large file with scattered changes.",
 	"\t--strip-trailing-cr: strip trailing carriage return.",
@@ -206,14 +208,13 @@ FAIL:
 int
 main(int argc, char **argv)
 {
-	FILE *diffpipe=NULL, *file1, *file2;
-	size_t diffargc = 0, wflag = WIDTH;
-	int ch, fd[2] = {-1}, status;
-	pid_t pid=0;
-	const char *outfile = NULL;
-	char **diffargv, *diffprog = diff_path, *filename1, *filename2,
-	     *tmp1, *tmp2, *s1, *s2;
-	int i;
+	FILE *diffpipe, *file1, *file2;
+	size_t diffargc = 0, flagc = 0, wval = WIDTH;
+	int ch, fd[2], i, ret, status;
+	pid_t pid;
+	const char *errstr, *outfile = NULL;
+	char **diffargv, *diffprog = diff_path, *flagv;
+	char *filename1, *filename2, *tmp1, *tmp2, *s1, *s2;
 	char I_arg[] = "-I";
 	char speed_lf[] = "--speed-large-files";
 
@@ -228,26 +229,26 @@ main(int argc, char **argv)
 	 * waste some memory; however we need an extra space for the
 	 * NULL at the end, so it sort of works out.
 	 */
-	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
-		err(2, "main");
+	if ((diffargv = calloc(argc, sizeof(char *) * 2)) == NULL)
+		err(2, NULL);
 
 	/* Add first argument, the program name. */
 	diffargv[diffargc++] = diffprog;
 
-	/* create a dynamic string for merging single-switch options */
-	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
-		err(2, "main");
+	/* create a dynamic string for merging single-character options */
+	if ((flagv = malloc(flagc + 2)) == NULL)
+		err(2, NULL);
+	flagv[flagc] = '-';
+	flagv[flagc + 1] = '\0';
+	diffargv[diffargc++] = flagv;
 
 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
 	    longopts, NULL)) != -1) {
-		const char *errstr;
-
 		switch (ch) {
 		/* only compatible --long-name-form with diff */
 		case FCASE_IGNORE_OPT:
 		case FCASE_SENSITIVE_OPT:
 		case STRIPCR_OPT:
-		case TSIZE_OPT:
 		case 'S':
 		break;
 		/* combine no-arg single switches */
@@ -257,16 +258,14 @@ main(int argc, char **argv)
 		case 'd':
 		case 'E':
 		case 'i':
-		case 't':
 		case 'W':
-			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
+			flagc++;
+			flagv = realloc(flagv, flagc + 2);
 			/*
 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
 			 */
-			if (ch == 'W')
-				sprintf(diffargv[1], "%sw", diffargv[1]);
-			else
-				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
+			flagv[flagc] = ch == 'W' ? 'w' : ch;
+			flagv[flagc + 1] = '\0';
 			break;
 		case 'H':
 			diffargv[diffargc++] = speed_lf;
@@ -275,21 +274,24 @@ main(int argc, char **argv)
 			diffargv[0] = diffprog = optarg;
 			break;
 		case 'I':
-			Iflag = 1;
+			Iflag = true;
 			diffargv[diffargc++] = I_arg;
 			diffargv[diffargc++] = optarg;
 			break;
 		case 'l':
-			lflag = 1;
+			lflag = true;
 			break;
 		case 'o':
 			outfile = optarg;
 			break;
 		case 's':
-			sflag = 1;
+			sflag = true;
+			break;
+		case 't':
+			tflag = true;
 			break;
 		case 'w':
-			wflag = strtonum(optarg, WIDTH_MIN,
+			wval = strtonum(optarg, WIDTH_MIN,
 			    INT_MAX, &errstr);
 			if (errstr)
 				errx(2, "width is %s: %s", errstr, optarg);
@@ -299,19 +301,23 @@ main(int argc, char **argv)
 				printf("%s\n", help_msg[i]);
 			exit(0);
 			break;
+		case TSIZE_OPT:
+			tabsize = strtonum(optarg, 1, INT_MAX, &errstr);
+			if (errstr)
+				errx(2, "tabsize is %s: %s", errstr, optarg);
+			break;
 		default:
 			usage();
 			break;
 		}
 	}
 
-	/* no single switches were used */
-	if (strcmp(diffargv[1], "-") == 0 ) {
-		for ( i = 1; i < argc-1; i++) {
-			diffargv[i] = diffargv[i+1];
-		}
-		diffargv[diffargc-1] = NULL;
+	/* no single-character options were used */
+	if (flagc == 0) {
+		memmove(diffargv + 1, diffargv + 2,
+		    sizeof(char *) * (diffargc - 2));
 		diffargc--;
+		free(flagv);
 	}
 
 	argc -= optind;
@@ -350,13 +356,22 @@ main(int argc, char **argv)
 			filename2 = tmp2;
 	}
 
+	if ((file1 = fopen(filename1, "r")) == NULL)
+		err(2, "could not open %s", filename1);
+	if ((file2 = fopen(filename2, "r")) == NULL)
+		err(2, "could not open %s", filename2);
+	if (!istextfile(file1) || !istextfile(file2)) {
+		ret = bindiff(file1, filename1, file2, filename2);
+		goto done;
+	}
+
 	diffargv[diffargc++] = filename1;
 	diffargv[diffargc++] = filename2;
 	/* Add NULL to end of array to indicate end of array. */
 	diffargv[diffargc++] = NULL;
 
 	/* Subtract column divider and divide by two. */
-	width = (wflag - 3) / 2;
+	width = (wval - 3) / 2;
 	/* Make sure line_width can fit in size_t. */
 	if (width > (SIZE_MAX - 3) / 2)
 		errx(2, "width is too large: %zu", width);
@@ -365,21 +380,18 @@ main(int argc, char **argv)
 	if (pipe(fd))
 		err(2, "pipe");
 
-	switch (pid = fork()) {
-	case 0:
+	if ((pid = fork()) < 0)
+		err(1, "fork()");
+	if (pid == 0) {
 		/* child */
 		/* We don't read from the pipe. */
 		close(fd[0]);
-		if (dup2(fd[1], STDOUT_FILENO) == -1)
-			err(2, "child could not duplicate descriptor");
+		if (dup2(fd[1], STDOUT_FILENO) != STDOUT_FILENO)
+			_exit(2);
 		/* Free unused descriptor. */
 		close(fd[1]);
 		execvp(diffprog, diffargv);
-		err(2, "could not execute diff: %s", diffprog);
-		break;
-	case -1:
-		err(2, "could not fork");
-		break;
+		_exit(2);
 	}
 
 	/* parent */
@@ -390,26 +402,6 @@ main(int argc, char **argv)
 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
 		err(2, "could not open diff pipe");
 
-	if ((file1 = fopen(filename1, "r")) == NULL)
-		err(2, "could not open %s", filename1);
-	if ((file2 = fopen(filename2, "r")) == NULL)
-		err(2, "could not open %s", filename2);
-	if (!istextfile(file1) || !istextfile(file2)) {
-		/* Close open files and pipe, delete temps */
-		fclose(file1);
-		fclose(file2);
-		if (diffpipe != NULL)
-			fclose(diffpipe);
-		if (tmp1)
-			if (unlink(tmp1))
-				warn("Error deleting %s.", tmp1);
-		if (tmp2)
-			if (unlink(tmp2))
-				warn("Error deleting %s.", tmp2);
-		free(tmp1);
-		free(tmp2);
-		binexec(diffprog, filename1, filename2);
-	}
 	/* Line numbers start at one. */
 	file1ln = file2ln = 1;
 
@@ -421,20 +413,10 @@ main(int argc, char **argv)
 	/* Wait for diff to exit. */
 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
 	    WEXITSTATUS(status) >= 2)
-		err(2, "diff exited abnormally.");
+		errx(2, "diff exited abnormally");
+	ret = WEXITSTATUS(status);
 
-	/* Delete and free unneeded temporary files. */
-	if (tmp1)
-		if (unlink(tmp1))
-			warn("Error deleting %s.", tmp1);
-	if (tmp2)
-		if (unlink(tmp2))
-			warn("Error deleting %s.", tmp2);
-	free(tmp1);
-	free(tmp2);
-	filename1 = filename2 = tmp1 = tmp2 = NULL;
-
-	/* No more diffs, so print common lines. */
+	/* No more diffs, so enqueue common lines. */
 	if (lflag)
 		while ((s1 = xfgets(file1)))
 			enqueue(s1, ' ', NULL);
@@ -452,23 +434,55 @@ main(int argc, char **argv)
 	/* Process unmodified lines. */
 	processq();
 
+done:
+	/* Delete and free unneeded temporary files. */
+	if (tmp1 != NULL) {
+		if (unlink(tmp1) != 0)
+			warn("failed to delete %s", tmp1);
+		free(tmp1);
+	}
+	if (tmp2 != NULL) {
+		if (unlink(tmp2) != 0)
+			warn("failed to delete %s", tmp2);
+		free(tmp2);
+	}
+
 	/* Return diff exit status. */
-	return (WEXITSTATUS(status));
+	free(diffargv);
+	if (flagc > 0)
+		free(flagv);
+	return (ret);
 }
 
 /*
- * When sdiff detects a binary file as input, executes them with
- * diff to maintain the same behavior as GNU sdiff with binary input.
+ * When sdiff detects a binary file as input.
  */
-static void
-binexec(char *diffprog, char *f1, char *f2)
+static int
+bindiff(FILE *f1, char *fn1, FILE *f2, char *fn2)
 {
-
-	char *args[] = {diffprog, f1, f2, (char *) 0};
-	execv(diffprog, args);
-
-	/* If execv() fails, sdiff's execution will continue below. */
-	errx(1, "could not execute diff process");
+	int ch1, ch2;
+
+	flockfile(f1);
+	flockfile(f2);
+	do {
+		ch1 = getc_unlocked(f1);
+		ch2 = getc_unlocked(f2);
+	} while (ch1 != EOF && ch2 != EOF && ch1 == ch2);
+	funlockfile(f2);
+	funlockfile(f1);
+	if (ferror(f1)) {
+		warn("%s", fn1);
+		return (2);
+	}
+	if (ferror(f2)) {
+		warn("%s", fn2);
+		return (2);
+	}
+	if (ch1 != EOF || ch2 != EOF) {
+		printf("Binary files %s and %s differ\n", fn1, fn2);
+		return (1);
+	}
+	return (0);
 }
 
 /*
@@ -514,11 +528,11 @@ printcol(const char *s, size_t *col, const size_t col_max)
 			 * If rounding to next multiple of eight causes
 			 * an integer overflow, just return.
 			 */
-			if (*col > SIZE_MAX - 8)
+			if (*col > SIZE_MAX - tabsize)
 				return;
 
 			/* Round to next multiple of eight. */
-			new_col = (*col / 8 + 1) * 8;
+			new_col = (*col / tabsize + 1) * tabsize;
 
 			/*
 			 * If printing the tab goes past the column
@@ -526,12 +540,20 @@ printcol(const char *s, size_t *col, const size_t col_max)
 			 */
 			if (new_col > col_max)
 				return;
-			*col = new_col;
+
+			if (tflag) {
+				do {
+					putchar(' ');
+				} while (++*col < new_col);
+			} else {
+				putchar(*s);
+				*col = new_col;
+			}
 			break;
 		default:
-			++(*col);
+			++*col;
+			putchar(*s);
 		}
-		putchar(*s);
 	}
 }
 
@@ -552,7 +574,7 @@ prompt(const char *s1, const char *s2)
 		const char *p;
 
 		/* Skip leading whitespace. */
-		for (p = cmd; isspace(*p); ++p)
+		for (p = cmd; isspace((unsigned char)*p); ++p)
 			;
 		switch (*p) {
 		case 'e':
@@ -578,10 +600,10 @@ prompt(const char *s1, const char *s2)
 			/* End of command parsing. */
 			break;
 		case 's':
-			sflag = 1;
+			sflag = true;
 			goto PROMPT;
 		case 'v':
-			sflag = 0;
+			sflag = false;
 			/* FALLTHROUGH */
 		default:
 			/* Interactive usage help. */
@@ -702,7 +724,7 @@ parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
 
 	p = line;
 	/* Go to character after line number. */
-	while (isdigit(*p))
+	while (isdigit((unsigned char)*p))
 		++p;
 	c = *p;
 	*p++ = 0;
@@ -714,7 +736,7 @@ parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
 	if (c == ',') {
 		q = p;
 		/* Go to character after file2end. */
-		while (isdigit(*p))
+		while (isdigit((unsigned char)*p))
 			++p;
 		c = *p;
 		*p++ = 0;
@@ -733,7 +755,7 @@ parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
 
 	q = p;
 	/* Go to character after line number. */
-	while (isdigit(*p))
+	while (isdigit((unsigned char)*p))
 		++p;
 	c = *p;
 	*p++ = 0;
diff --git a/usr.bin/sdiff/tests/sdiff_test.sh b/usr.bin/sdiff/tests/sdiff_test.sh
index 100fa1b123b0..83ed93503f18 100755
--- a/usr.bin/sdiff/tests/sdiff_test.sh
+++ b/usr.bin/sdiff/tests/sdiff_test.sh
@@ -191,6 +191,40 @@ short_body()
 $(atf_get_srcdir)/d_input2 >/dev/null ; cat merge.out"
 }
 
+atf_test_case tflag
+tflag_head()
+{
+	atf_set "descr" "Checks tab expansion"
+}
+tflag_body()
+{
+	printf "a\tb\n" >a
+	printf "b\ta\n" >b
+	atf_check -s exit:1 -o match:$'a\tb' \
+		  sdiff a b
+	atf_check -s exit:1 -o match:"a {7}b" \
+		  sdiff -t a b
+	atf_check -s exit:1 -o match:"a {3}b" \
+		  sdiff -t --tabsize 4 a b
+}
+
+atf_test_case binary
+binary_head()
+{
+	atf_set "descr" "Checks binary file handling"
+}
+binary_body()
+{
+	printf "a\0\n" >a
+	printf "b\0\n" >b
+	atf_check -o empty sdiff a a
+	atf_check -o empty sdiff a - <a
+	atf_check -s exit:1 -o match:"Binary files .* differ" \
+		  sdiff a b
+	atf_check -s exit:1 -o match:"Binary files .* differ" \
+		  sdiff a - <b
+}
+
 atf_init_test_cases()
 {
 	atf_add_test_case flags
@@ -203,4 +237,6 @@ atf_init_test_cases()
 	atf_add_test_case dot
 	atf_add_test_case stdin
 	atf_add_test_case short
+	atf_add_test_case tflag
+	atf_add_test_case binary
 }