svn commit: r357648 - in head/usr.bin/diff: . tests

Baptiste Daroussin bapt at FreeBSD.org
Fri Feb 7 10:17:14 UTC 2020


Author: bapt
Date: Fri Feb  7 10:17:13 2020
New Revision: 357648
URL: https://svnweb.freebsd.org/changeset/base/357648

Log:
  diff: implement -y (--side-by-side) along with -W and --suppress-common-lines
  
  PR:		219933
  Submitted by:	fehmi noyan isi <fnoyanisi at yahoo.com>
  MFC after:	3 weeks

Modified:
  head/usr.bin/diff/diff.1
  head/usr.bin/diff/diff.c
  head/usr.bin/diff/diff.h
  head/usr.bin/diff/diffreg.c
  head/usr.bin/diff/tests/diff_test.sh

Modified: head/usr.bin/diff/diff.1
==============================================================================
--- head/usr.bin/diff/diff.1	Fri Feb  7 09:22:08 2020	(r357647)
+++ head/usr.bin/diff/diff.1	Fri Feb  7 10:17:13 2020	(r357648)
@@ -30,7 +30,7 @@
 .\"     @(#)diff.1	8.1 (Berkeley) 6/30/93
 .\" $FreeBSD$
 .\"
-.Dd August 18, 2018
+.Dd February 07, 2020
 .Dt DIFF 1
 .Os
 .Sh NAME
@@ -41,7 +41,7 @@
 .Op Fl aBbdipTtw
 .Oo
 .Fl c | e | f |
-.Fl n | q | u
+.Fl n | q | u | y
 .Oc
 .Op Fl -brief
 .Op Fl -changed-group-format Ar GFMT
@@ -182,6 +182,21 @@
 .Op Fl x Ar pattern | Fl -exclude Ar pattern
 .Ek
 .Ar dir1 dir2
+.Nm diff
+.Op Fl aBbditwW
+.Op --expand-tabs
+.Op --ignore-all-blanks
+.Op --ignore-blank-lines
+.Op --ignore-case
+.Op --minimal
+.Op --no-ignore-file-name-case
+.Op --strip-trailing-cr
+.Op --suppress-common-lines
+.Op --tabsize
+.Op --text
+.Op --width
+.Fl y | Fl -side-by-side
+.Ar file1 file2
 .Sh DESCRIPTION
 The
 .Nm
@@ -284,7 +299,21 @@ However, unlike with
 .Fl c ,
 all lines to be changed (added and/or removed) are present in
 a single section.
+.It Fl y Fl -side-by-side
+Output in two columns with a marker between them. The marker can be one 
+of the following:
+.Pp
+.Bl -tag -width Ds -offset indent -compact
+.It space 
+Corresponding lines are identical.
+.It '|'
+Corresponding lines are different.
+.It '<'
+Files differ and only the first file contains the line.
+.It '>'
+Files differ and only the second file contains the line.
 .El
+.El
 .Pp
 Comparison options:
 .Bl -tag -width Ds
@@ -362,6 +391,10 @@ E.g.,
 .Dq if (\ \&a == b \&)
 will compare equal to
 .Dq if(a==b) .
+.It Fl W Ar number Fl -width Ar number
+Output at most
+.Ar number
+columns when using side by side format. The default value is 130.
 .It Fl -changed-group-format Ar GFMT
 Format input groups in the provided
 .Pp
@@ -382,7 +415,9 @@ default diff output
 stub option for compatibility with GNU diff
 .It Fl -strip-trailing-cr
 strip carriage return on input files
-.It Fl tabsize Ar number
+.It Fl -suppress-common-lines
+Do not output common lines when using the side by side format
+.It Fl -tabsize Ar number
 Number of spaces representing a tab (default 8)
 .El
 .Pp

Modified: head/usr.bin/diff/diff.c
==============================================================================
--- head/usr.bin/diff/diff.c	Fri Feb  7 09:22:08 2020	(r357647)
+++ head/usr.bin/diff/diff.c	Fri Feb  7 10:17:13 2020	(r357648)
@@ -37,16 +37,16 @@ __FBSDID("$FreeBSD$");
 #include "diff.h"
 #include "xmalloc.h"
 
-int	 lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag;
-int	 diff_format, diff_context, status, ignore_file_case;
-int	 tabsize = 8;
+int	 lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag, Wflag;
+int	 diff_format, diff_context, status, ignore_file_case, suppress_common;
+int	 tabsize = 8, width = 130;
 char	*start, *ifdefname, *diffargs, *label[2], *ignore_pats;
 char	*group_format = NULL;
 struct stat stb1, stb2;
 struct excludes *excludes_list;
 regex_t	 ignore_re;
 
-#define	OPTIONS	"0123456789aBbC:cdD:efHhI:iL:lnNPpqrS:sTtU:uwX:x:"
+#define	OPTIONS	"0123456789aBbC:cdD:efHhI:iL:lnNPpqrS:sTtU:uwW:X:x:y"
 enum {
 	OPT_TSIZE = CHAR_MAX + 1,
 	OPT_STRIPCR,
@@ -55,6 +55,7 @@ enum {
 	OPT_NORMAL,
 	OPT_HORIZON_LINES,
 	OPT_CHANGED_GROUP_FORMAT,
+	OPT_SUPPRESS_COMMON,
 };
 
 static struct option longopts[] = {
@@ -83,8 +84,10 @@ static struct option longopts[] = {
 	{ "initial-tab",		no_argument,		0,	'T' },
 	{ "unified",			optional_argument,	0,	'U' },
 	{ "ignore-all-space",		no_argument,		0,	'w' },
+	{ "width",			required_argument,	0,	'W' },
 	{ "exclude",			required_argument,	0,	'x' },
 	{ "exclude-from",		required_argument,	0,	'X' },
+	{ "side-by-side",		no_argument,		NULL,	'y' },
 	{ "ignore-file-name-case",	no_argument,		NULL,	OPT_IGN_FN_CASE },
 	{ "horizon-lines",		required_argument,	NULL,	OPT_HORIZON_LINES },
 	{ "no-ignore-file-name-case",	no_argument,		NULL,	OPT_NO_IGN_FN_CASE },
@@ -92,6 +95,7 @@ static struct option longopts[] = {
 	{ "strip-trailing-cr",		no_argument,		NULL,	OPT_STRIPCR },
 	{ "tabsize",			optional_argument,	NULL,	OPT_TSIZE },
 	{ "changed-group-format",	required_argument,	NULL,	OPT_CHANGED_GROUP_FORMAT},
+	{ "suppress-common-lines",	no_argument,		NULL,	OPT_SUPPRESS_COMMON },
 	{ NULL,				0,			0,	'\0'}
 };
 
@@ -230,12 +234,23 @@ main(int argc, char **argv)
 		case 'w':
 			dflags |= D_IGNOREBLANKS;
 			break;
+		case 'W':
+			Wflag = 1;
+			width = (int) strtonum(optarg, 1, INT_MAX, &errstr);
+			if (errstr) {
+				warnx("Invalid argument for width");
+				usage();
+			}
+			break;
 		case 'X':
 			read_excludes_file(optarg);
 			break;
 		case 'x':
 			push_excludes(optarg);
 			break;
+		case 'y':
+			diff_format = D_SIDEBYSIDE;
+			break;
 		case OPT_CHANGED_GROUP_FORMAT:
 			diff_format = D_GFORMAT;
 			group_format = optarg;
@@ -261,6 +276,9 @@ main(int argc, char **argv)
 		case OPT_STRIPCR:
 			dflags |= D_STRIPCR;
 			break;
+		case OPT_SUPPRESS_COMMON:
+			suppress_common = 1;
+			break;
 		default:
 			usage();
 			break;
@@ -464,7 +482,12 @@ usage(void)
 	    "            -U number file1 file2\n"
 	    "       diff [-aBbdilNPprsTtw] [-c | -e | -f | -n | -q | -u] [--ignore-case]\n"
 	    "            [--no-ignore-case] [--normal] [--tabsize] [-I pattern] [-L label]\n"
-	    "            [-S name] [-X file] [-x pattern] dir1 dir2\n");
+	    "            [-S name] [-X file] [-x pattern] dir1 dir2\n"
+	    "       diff [-aBbditwW] [--expand-tabs] [--ignore-all-blanks]\n"
+            "            [--ignore-blank-lines] [--ignore-case] [--minimal]\n"
+            "            [--no-ignore-file-name-case] [--strip-trailing-cr]\n"
+            "            [--suppress-common-lines] [--tabsize] [--text] [--width]\n"
+            "            -y | --side-by-side file1 file2\n");
 
 	exit(2);
 }

Modified: head/usr.bin/diff/diff.h
==============================================================================
--- head/usr.bin/diff/diff.h	Fri Feb  7 09:22:08 2020	(r357647)
+++ head/usr.bin/diff/diff.h	Fri Feb  7 10:17:13 2020	(r357648)
@@ -48,6 +48,7 @@
 				   lines and no trailing . */
 #define	D_BRIEF		6	/* Say if the files differ */
 #define	D_GFORMAT	7	/* Diff with defined changed group format */
+#define D_SIDEBYSIDE    8	/* Side by side */
 
 /*
  * Output flags
@@ -85,9 +86,10 @@ struct excludes {
 	struct excludes *next;
 };
 
-extern int	lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag;
+extern int	lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag, Wflag;
 extern int	diff_format, diff_context, status, ignore_file_case;
-extern int	tabsize;
+extern int	suppress_common;
+extern int	tabsize, width;
 extern char	*start, *ifdefname, *diffargs, *label[2], *ignore_pats;
 extern char	*group_format;
 extern struct	stat stb1, stb2;

Modified: head/usr.bin/diff/diffreg.c
==============================================================================
--- head/usr.bin/diff/diffreg.c	Fri Feb  7 09:22:08 2020	(r357647)
+++ head/usr.bin/diff/diffreg.c	Fri Feb  7 10:17:13 2020	(r357648)
@@ -181,6 +181,7 @@ struct context_vec {
 };
 
 #define	diff_output	printf
+#define MIN_PAD		1
 static FILE	*opentemp(const char *);
 static void	 output(char *, FILE *, char *, FILE *, int);
 static void	 check(FILE *, FILE *, int);
@@ -196,6 +197,7 @@ static void	 unsort(struct line *, int, int *);
 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
 static void	 sort(struct line *, int);
 static void	 print_header(const char *, const char *);
+static void	 print_space(int, int, int);
 static bool	 ignoreline_pattern(char *);
 static bool	 ignoreline(char *, bool);
 static int	 asciifile(FILE *);
@@ -220,6 +222,8 @@ static int   len[2];
 static int   pref, suff;	/* length of prefix and suffix */
 static int   slen[2];
 static int   anychange;
+static int   hw, padding;	/* half width and padding */
+static int   edoffset;
 static long *ixnew;		/* will be overlaid on file[1] */
 static long *ixold;		/* will be overlaid on klist */
 static struct cand *clist;	/* merely a free storage pot for candidates */
@@ -263,6 +267,22 @@ diffreg(char *file1, char *file2, int flags, int capsi
 	lastline = 0;
 	lastmatchline = 0;
 	context_vec_ptr = context_vec_start - 1;
+
+	 /* 
+	  * hw excludes padding and make sure when -t is not used, 
+	  * the second column always starts from the closest tab stop
+	  */
+	if (diff_format == D_SIDEBYSIDE) { 
+		hw = width >> 1;
+		padding = tabsize - (hw % tabsize);
+		if ((flags & D_EXPANDTABS) != 0 || (padding % tabsize == 0))
+			padding = MIN_PAD;
+	
+		hw = (width >> 1) - 
+		    ((padding == MIN_PAD) ? (padding << 1) : padding) - 1;
+	}
+	
+
 	if (flags & D_IGNORECASE)
 		chrtran = cup2low;
 	else
@@ -865,7 +885,7 @@ skipline(FILE *f)
 static void
 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
 {
-	int m, i0, i1, j0, j1;
+	int i, j, m, i0, i1, j0, j1, nc;
 
 	rewind(f1);
 	rewind(f2);
@@ -874,15 +894,55 @@ output(char *file1, FILE *f1, char *file2, FILE *f2, i
 	J[m + 1] = len[1] + 1;
 	if (diff_format != D_EDIT) {
 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
-			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
+			while (i0 <= m && J[i0] == J[i0 - 1] + 1){
+				if (diff_format == D_SIDEBYSIDE && 
+				    suppress_common != 1) {
+					nc = fetch(ixold, i0, i0, f1, '\0', 
+					    1, flags);
+					print_space(nc, 
+					    (hw - nc) + (padding << 1) + 1, 
+					    flags);
+					fetch(ixnew, J[i0], J[i0], f2, '\0', 
+					    0, flags);
+					diff_output("\n");
+				}
 				i0++;
+			}
 			j0 = J[i0 - 1] + 1;
 			i1 = i0 - 1;
 			while (i1 < m && J[i1 + 1] == 0)
 				i1++;
 			j1 = J[i1 + 1] - 1;
 			J[i1] = j1;
-			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
+
+			/*
+			 * When using side-by-side, lines from both of the 
+			 * files are printed. The algorithm used by diff(1) 
+			 * identifies the ranges in which two files differ. 
+			 * See the change() function below. 
+			 * The for loop below consumes the shorter range, 
+			 * whereas one of the while loops deals with the 
+			 * longer one.
+			 */
+			if (diff_format == D_SIDEBYSIDE) {
+				for (i=i0, j=j0; i<=i1 && j<=j1; i++, j++) 
+					change(file1, f1, file2, f2, i, i, 
+					    j, j, &flags);
+
+				while (i <= i1) {
+					change(file1, f1, file2, f2, 
+					    i, i, j+1, j, &flags);
+					i++;
+				}
+
+				while (j <= j1) {
+					change(file1, f1, file2, f2, 
+					    i+1, i, j, j, &flags);
+					j++;
+				}
+			} else
+				change(file1, f1, file2, f2, i0, i1, j0, 
+				    j1, &flags);
 		}
 	} else {
 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
@@ -987,7 +1047,7 @@ change(char *file1, FILE *f1, char *file2, FILE *f2, i
 {
 	static size_t max_context = 64;
 	long curpos;
-	int i, nc, f;
+	int i, nc;
 	const char *walk;
 	bool skip_blanks;
 
@@ -1116,27 +1176,38 @@ proceed:
 			diff_output("%c", *walk);
 		}
 	}
+	if (diff_format == D_SIDEBYSIDE) {
+		if (a > b) {
+			print_space(0, hw + padding , *pflags);
+		} else {
+			nc = fetch(ixold, a, b, f1, '\0', 1, *pflags);
+			print_space(nc, hw - nc + padding, *pflags); 
+		}
+		diff_output("%c", (a>b)? '>' : ((c>d)? '<' : '|'));
+		print_space(hw + padding + 1 , padding, *pflags); 
+		fetch(ixnew, c, d, f2, '\0', 0, *pflags);
+		diff_output("\n");
+	}
 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
 		fetch(ixold, a, b, f1, '<', 1, *pflags);
 		if (a <= b && c <= d && diff_format == D_NORMAL)
 			diff_output("---\n");
 	}
-	f = 0;
-	if (diff_format != D_GFORMAT)
-		f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
-	if (f != 0 && diff_format == D_EDIT) {
+	if (diff_format != D_GFORMAT && diff_format != D_SIDEBYSIDE)
+		fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
+	if (edoffset != 0 && diff_format == D_EDIT) {
 		/*
-		 * A non-zero return value for D_EDIT indicates that the
+		 * A non-zero edoffset value for D_EDIT indicates that the
 		 * last line printed was a bare dot (".") that has been
 		 * escaped as ".." to prevent ed(1) from misinterpreting
 		 * it.  We have to add a substitute command to change this
 		 * back and restart where we left off.
 		 */
 		diff_output(".\n");
-		diff_output("%ds/.//\n", a + f - 1);
-		b = a + f - 1;
+		diff_output("%ds/.//\n", a + edoffset - 1);
+		b = a + edoffset - 1;
 		a = b + 1;
-		c += f;
+		c += edoffset;
 		goto restart;
 	}
 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
@@ -1150,9 +1221,10 @@ proceed:
 static int
 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
 {
-	int i, j, c, lastc, col, nc;
-	int	newcol;
+	int i, j, c, lastc, col, nc, newcol;
 
+	edoffset = 0;
+	nc = 0;
 	/*
 	 * When doing #ifdef's, copy down to current line
 	 * if this is the first file, so that stuff makes it to output.
@@ -1180,12 +1252,15 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int old
 	}
 	for (i = a; i <= b; i++) {
 		fseek(lb, f[i - 1], SEEK_SET);
-		nc = f[i] - f[i - 1];
-		if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) &&
+		nc = (f[i] - f[i - 1]);
+		if (diff_format == D_SIDEBYSIDE && hw < nc) 
+			nc = hw;
+		if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) && 
 		    ch != '\0') {
 			diff_output("%c", ch);
-			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
-			    || diff_format == D_UNIFIED))
+			if (Tflag && (diff_format == D_NORMAL || 
+			    diff_format == D_CONTEXT || 
+			    diff_format == D_UNIFIED))
 				diff_output("\t");
 			else if (diff_format != D_UNIFIED)
 				diff_output(" ");
@@ -1193,38 +1268,68 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int old
 		col = 0;
 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
 			if ((c = getc(lb)) == EOF) {
-				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
+				if (diff_format == D_EDIT || 
+				    diff_format == D_REVERSE ||
 				    diff_format == D_NREVERSE)
 					warnx("No newline at end of file");
 				else
 					diff_output("\n\\ No newline at end of "
 					    "file\n");
-				return (0);
+				return col;
 			}
-			if (c == '\t' && (flags & D_EXPANDTABS)) {
-				newcol = ((col/tabsize)+1)*tabsize;
-				do {
-					diff_output(" ");
-				} while (++col < newcol);
+			/* 
+			 * when using --side-by-side, col needs to be increased 
+			 * in any case to keep the columns aligned
+			 */
+			if (c == '\t') {
+				if (flags & D_EXPANDTABS) {
+					newcol = ((col/tabsize)+1)*tabsize;
+					do {	
+						if (diff_format == D_SIDEBYSIDE)
+							j++;
+						diff_output(" ");
+					} while (++col < newcol && j < nc);
+				} else {
+					if (diff_format == D_SIDEBYSIDE) { 
+						if ((j + tabsize) > nc) {
+							diff_output("%*s", 
+							nc - j,"");
+							j = col = nc;
+						} else {
+							diff_output("\t");
+							col += tabsize - 1;
+							j += tabsize - 1;
+						}
+					} else {
+						diff_output("\t");
+						col++;
+					}
+				}
 			} else {
 				if (diff_format == D_EDIT && j == 1 && c == '\n'
 				    && lastc == '.') {
 					/*
 					 * Don't print a bare "." line
 					 * since that will confuse ed(1).
-					 * Print ".." instead and return,
-					 * giving the caller an offset
-					 * from which to restart.
+					 * Print ".." instead and set the,
+					 * global variable edoffset to an
+					 * offset from which to restart.
+					 * The caller must check the value
+					 * of edoffset
 					 */
 					diff_output(".\n");
-					return (i - a + 1);
+					edoffset = i - a + 1;
+					return edoffset;
 				}
-				diff_output("%c", c);
-				col++;
+				/* when side-by-side, do not print a newline */
+				if (diff_format != D_SIDEBYSIDE || c != '\n') {
+					diff_output("%c", c);
+					col++;
+				}
 			}
 		}
 	}
-	return (0);
+	return col;
 }
 
 /*
@@ -1577,4 +1682,26 @@ print_header(const char *file1, const char *file2)
 	else
 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
 		    file2, buf2);
-}
+}
+
+/* 
+ * Prints n number of space characters either by using tab
+ * or single space characters. 
+ * nc is the preceding number of characters
+ */
+static void
+print_space(int nc, int n, int flags) {
+	int i, col;
+
+	col = n;
+	if ((flags & D_EXPANDTABS) == 0) {
+		/* first tabstop may be closer than tabsize */
+		i = tabsize - (nc % tabsize);
+		while (col >= tabsize) {
+			diff_output("\t");
+			col -= i;
+			i = tabsize;
+		}
+	}
+	diff_output("%*s", col, "");
+}
\ No newline at end of file

Modified: head/usr.bin/diff/tests/diff_test.sh
==============================================================================
--- head/usr.bin/diff/tests/diff_test.sh	Fri Feb  7 09:22:08 2020	(r357647)
+++ head/usr.bin/diff/tests/diff_test.sh	Fri Feb  7 10:17:13 2020	(r357648)
@@ -103,8 +103,6 @@ group_format_body()
 
 side_by_side_body()
 {
-	atf_expect_fail "--side-by-side not currently implemented (bug # 219933)"
-
 	atf_check -o save:A printf "A\nB\nC\n"
 	atf_check -o save:B printf "D\nB\nE\n"
 


More information about the svn-src-all mailing list