git: da2905289748 - stable/14 - diff: Fix --expand-tabs and --side-by-side.

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Mon, 11 Mar 2024 12:35:06 UTC
The branch stable/14 has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=da290528974846d3ed49138ca0f2de7fff02eaf6

commit da290528974846d3ed49138ca0f2de7fff02eaf6
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2024-02-26 18:08:06 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2024-03-11 12:19:06 +0000

    diff: Fix --expand-tabs and --side-by-side.
    
    * Overhaul column width and padding calculation.
    * Rewrite print_space() so it is now a) correct and b) understandable.
    * Rewrite tab expansion in fetch() for the same reason.
    
    This brings us in line with GNU diff for all cases I could think of.
    
    Sponsored by:   Klara, Inc.
    Reviewed by:    imp
    Differential Revision:  https://reviews.freebsd.org/D44014
    
    (cherry picked from commit 53de23f4d140becc3166e87665b0064f215a220e)
    
    diff: Bump manual page date.
    
    Sponsored by:   Klara, Inc.
    
    (cherry picked from commit 312b1076c6b0aff9bbcaff058b93385eaf607685)
---
 usr.bin/diff/diff.1    |  11 ++++-
 usr.bin/diff/diff.c    |  12 ++----
 usr.bin/diff/diffreg.c | 113 ++++++++++++++++++++++++++++---------------------
 3 files changed, 79 insertions(+), 57 deletions(-)

diff --git a/usr.bin/diff/diff.1 b/usr.bin/diff/diff.1
index e04aeb5d6d67..d3978158d1d1 100644
--- a/usr.bin/diff/diff.1
+++ b/usr.bin/diff/diff.1
@@ -29,7 +29,7 @@
 .\"
 .\"     @(#)diff.1	8.1 (Berkeley) 6/30/93
 .\"
-.Dd March 10, 2022
+.Dd February 26, 2024
 .Dt DIFF 1
 .Os
 .Sh NAME
@@ -429,6 +429,15 @@ Output at most
 .Ar number
 columns when using side by side format.
 The default value is 130.
+Note that unless
+.It Fl t
+was specified,
+.Nm
+will always align the second column to a tab stop, so values of
+.Fl -width
+smaller than approximately five times the value of
+.Fl -tabsize
+may yield surprising results.
 .It Fl -changed-group-format Ar GFMT
 Format input groups in the provided
 .Pp
diff --git a/usr.bin/diff/diff.c b/usr.bin/diff/diff.c
index 03eb363fc323..d947c1e01705 100644
--- a/usr.bin/diff/diff.c
+++ b/usr.bin/diff/diff.c
@@ -276,10 +276,8 @@ main(int argc, char **argv)
 			break;
 		case 'W':
 			width = (int) strtonum(optarg, 1, INT_MAX, &errstr);
-			if (errstr) {
-				warnx("Invalid argument for width");
-				usage();
-			}
+			if (errstr)
+				errx(1, "width is %s: %s", errstr, optarg);
 			break;
 		case 'X':
 			read_excludes_file(optarg);
@@ -317,10 +315,8 @@ main(int argc, char **argv)
 			break;
 		case OPT_TSIZE:
 			tabsize = (int) strtonum(optarg, 1, INT_MAX, &errstr);
-			if (errstr) {
-				warnx("Invalid argument for tabsize");
-				usage();
-			}
+			if (errstr)
+				errx(1, "tabsize is %s: %s", errstr, optarg);
 			break;
 		case OPT_STRIPCR:
 			dflags |= D_STRIPCR;
diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c
index 11aefb73cda6..8042003cd836 100644
--- a/usr.bin/diff/diffreg.c
+++ b/usr.bin/diff/diffreg.c
@@ -169,7 +169,6 @@ struct context_vec {
 
 enum readhash { RH_BINARY, RH_OK, RH_EOF };
 
-#define MIN_PAD		1
 static FILE	*opentemp(const char *);
 static void	 output(char *, FILE *, char *, FILE *, int);
 static void	 check(FILE *, FILE *, int);
@@ -209,7 +208,7 @@ static int	 len[2];
 static int	 pref, suff;	/* length of prefix and suffix */
 static int	 slen[2];
 static int	 anychange;
-static int	 hw, padding;	/* half width and padding */
+static int	 hw, lpad, rpad;	/* half width and padding */
 static int	 edoffset;
 static long	*ixnew;		/* will be overlaid on file[1] */
 static long	*ixold;		/* will be overlaid on klist */
@@ -254,21 +253,44 @@ diffreg(char *file1, char *file2, int flags, int capsicum)
 	lastline = 0;
 	lastmatchline = 0;
 
-	 /*
-	  * hw excludes padding and make sure when -t is not used,
-	  * the second column always starts from the closest tab stop
-	  */
+	/*
+	 * In side-by-side mode, we need to print the left column, a
+	 * change marker surrounded by padding, and the right column.
+	 *
+	 * If expanding tabs, we don't care about alignment, so we simply
+	 * subtract 3 from the width and divide by two.
+	 *
+	 * If not expanding tabs, we need to ensure that the right column
+	 * is aligned to a tab stop.  We start with the same formula, then
+	 * decrement until we reach a size that lets us tab-align the
+	 * right column.  We then adjust the width down if necessary for
+	 * the padding calculation to work.
+	 *
+	 * Left padding is half the space left over, rounded down; right
+	 * padding is whatever is needed to match the width.
+	 */
 	if (diff_format == D_SIDEBYSIDE) {
-		hw = width >> 1;
-		padding = tabsize - (hw % tabsize);
-		if ((flags & D_EXPANDTABS) != 0 || (padding % tabsize == 0))
-			padding = MIN_PAD;
-
-		hw = (width >> 1) -
-		    ((padding == MIN_PAD) ? (padding << 1) : padding) - 1;
+		if (flags & D_EXPANDTABS) {
+			if (width > 3) {
+				hw = (width - 3) / 2;
+			} else {
+				/* not enough space */
+				hw = 0;
+			}
+		} else if (width <= 3 || width <= tabsize) {
+			/* not enough space */
+			hw = 0;
+		} else {
+			hw = (width - 3) / 2;
+			while (hw > 0 && roundup(hw + 3, tabsize) + hw > width)
+				hw--;
+			if (width - (roundup(hw + 3, tabsize) + hw) < tabsize)
+				width = roundup(hw + 3, tabsize) + hw;
+		}
+		lpad = (width - hw * 2 - 1) / 2;
+		rpad = (width - hw * 2 - 1) - lpad;
 	}
 
-
 	if (flags & D_IGNORECASE)
 		chrtran = cup2low;
 	else
@@ -869,7 +891,7 @@ output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
 			while (i0 <= m && J[i0] == J[i0 - 1] + 1) {
 				if (diff_format == D_SIDEBYSIDE && suppress_common != 1) {
 					nc = fetch(ixold, i0, i0, f1, '\0', 1, flags);
-					print_space(nc, (hw - nc) + (padding << 1) + 1, flags);
+					print_space(nc, hw - nc + lpad + 1 + rpad, flags);
 					fetch(ixnew, J[i0], J[i0], f2, '\0', 0, flags);
 					printf("\n");
 				}
@@ -1147,10 +1169,10 @@ proceed:
 		else if (color && c > d)
 			printf("\033[%sm", del_code);
 		if (a > b) {
-			print_space(0, hw + padding , *pflags);
+			print_space(0, hw + lpad, *pflags);
 		} else {
 			nc = fetch(ixold, a, b, f1, '\0', 1, *pflags);
-			print_space(nc, hw - nc + padding, *pflags);
+			print_space(nc, hw - nc + lpad, *pflags);
 		}
 		if (color && a > b)
 			printf("\033[%sm", add_code);
@@ -1159,7 +1181,7 @@ proceed:
 		printf("%c", (a > b) ? '>' : ((c > d) ? '<' : '|'));
 		if (color && c > d)
 			printf("\033[m");
-		print_space(hw + padding + 1 , padding, *pflags);
+		print_space(hw + lpad + 1, rpad, *pflags);
 		fetch(ixnew, c, d, f2, '\0', 0, *pflags);
 		printf("\n");
 	}
@@ -1265,30 +1287,24 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
 					printf("\n\\ No newline at end of file\n");
 				return (col);
 			}
-			/*
-			 * when using --side-by-side, col needs to be increased
-			 * in any case to keep the columns aligned
-			 */
 			if (c == '\t') {
-				if (flags & D_EXPANDTABS) {
-					newcol = ((col / tabsize) + 1) * tabsize;
-					do {
-						printf(" ");
-					} while (++col < newcol && col < hw);
+				/*
+				 * Calculate where the tab would bring us.
+				 * If it would take us to the end of the
+				 * column, either clip it (if expanding
+				 * tabs) or return right away (if not).
+				 */
+				newcol = roundup(col + 1, tabsize);
+				if ((flags & D_EXPANDTABS) == 0) {
+					if (hw > 0 && newcol >= hw)
+						return (col);
+					printf("\t");
 				} else {
-					if (diff_format == D_SIDEBYSIDE) {
-						if ((col + tabsize) > hw) {
-							printf("%*s", hw - col, "");
-							col = hw;
-						} else {
-							printf("\t");
-							col += tabsize - 1;
-						}
-					} else {
-						printf("\t");
-						col++;
-					}
+					if (hw > 0 && newcol > hw)
+						newcol = hw;
+					printf("%*s", newcol - col, "");
 				}
+				col = newcol;
 			} else {
 				if (diff_format == D_EDIT && j == 1 && c == '\n' &&
 				    lastc == '.') {
@@ -1668,18 +1684,19 @@ print_header(const char *file1, const char *file2)
  * nc is the preceding number of characters
  */
 static void
-print_space(int nc, int n, int flags) {
-	int i, col;
+print_space(int nc, int n, int flags)
+{
+	int col, newcol, tabstop;
 
-	col = n;
+	col = nc;
+	newcol = nc + n;
+	/* first, use tabs if allowed */
 	if ((flags & D_EXPANDTABS) == 0) {
-		/* first tabstop may be closer than tabsize */
-		i = tabsize - (nc % tabsize);
-		while (col >= tabsize) {
+		while ((tabstop = roundup(col + 1, tabsize)) <= newcol) {
 			printf("\t");
-			col -= i;
-			i = tabsize;
+			col = tabstop;
 		}
 	}
-	printf("%*s", col, "");
+	/* finish with spaces */
+	printf("%*s", newcol - col, "");
 }