Re: git: cf73401c4f7a - main - diff3: Fix merge mode.

From: Baptiste Daroussin <bapt_at_freebsd.org>
Date: Thu, 26 Sep 2024 20:56:01 UTC
On Wed 25 Sep 17:15, Dag-Erling Smørgrav wrote:
> The branch main has been updated by des:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=cf73401c4f7af063c91a0c8e4d5b46e08f06db87
> 
> commit cf73401c4f7af063c91a0c8e4d5b46e08f06db87
> Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
> AuthorDate: 2024-09-25 17:14:32 +0000
> Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
> CommitDate: 2024-09-25 17:14:55 +0000
> 
>     diff3: Fix merge mode.
>     
>     This is mostly thj@'s work, with some tweaks and cleanup by me.  There
>     are still some cases where our output differs from GNU diff3, but it's
>     much better than before and I'd rather commit what I have now than let
>     it continue to languish in a metaphorical drawer.
>     
>     MFC after       3 weeks
>     Sponsored by:   Klara, Inc.
>     Reviewed by:    thj
>     Differential Revision:  https://reviews.freebsd.org/D46762
> ---
>  usr.bin/diff3/diff3.c | 255 +++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 179 insertions(+), 76 deletions(-)
> 
> diff --git a/usr.bin/diff3/diff3.c b/usr.bin/diff3/diff3.c
> index c72ea0747589..f20b1d74678d 100644
> --- a/usr.bin/diff3/diff3.c
> +++ b/usr.bin/diff3/diff3.c
> @@ -79,7 +79,6 @@
>  #include <string.h>
>  #include <unistd.h>
>  
> -
>  /*
>   * "from" is first in range of changed lines; "to" is last+1
>   * from=to=line after point of insertion for added lines.
> @@ -90,6 +89,7 @@ struct range {
>  };
>  
>  struct diff {
> +#define DIFF_TYPE1 1
>  #define DIFF_TYPE2 2
>  #define DIFF_TYPE3 3
>  	int type;
> @@ -147,6 +147,7 @@ static void keep(int, struct range *);
>  static void merge(int, int);
>  static void prange(struct range *, bool);
>  static void repos(int);
> +static void separate(const char *);
>  static void edscript(int) __dead2;
>  static void Ascript(int) __dead2;
>  static void mergescript(int) __dead2;
> @@ -154,7 +155,7 @@ static void increase(void);
>  static void usage(void);
>  static void printrange(FILE *, struct range *);
>  
> -static const char diff3_version[] = "FreeBSD diff3 20220517";
> +static const char diff3_version[] = "FreeBSD diff3 20240925";
>  
>  enum {
>  	DIFFPROG_OPT,
> @@ -189,49 +190,110 @@ usage(void)
>  	    "[-L label3] file1 file2 file3\n");
>  }
>  
> +static int
> +strtoi(char *str, char **end)
> +{
> +	intmax_t num;
> +
> +	errno = 0;
> +	num = strtoimax(str, end, 10);
> +	if ((end != NULL && *end == str) ||
> +	    num < 0 || num > INT_MAX ||
> +	    errno == EINVAL || errno == ERANGE)
> +		err(1, "error in diff output");
> +	return (int)num;
> +}
> +
> +/*
> + * Read diff hunks into the array pointed to by *dd.
> + *
> + * The output from `diff foo bar` consists of a series of hunks describing
> + * an addition (lines in bar not present in foo), change (lines in bar
> + * different from lines in foo), or deletion (lines in foo not present in
> + * bar).  Each record starts with a line of the form:
> + *
> + * a[,b]xc[,d]
> + *
> + * where a, b, c, and d are nonnegative integers (b and d are printed only
> + * if they differ from a and c, respectively), and x is either 'a' for an
> + * addition, 'c' for a change, or 'd' for a deletion.  This is then
> + * followed by a series of lines (which we ignore) giving the added,
> + * changed, or deleted text.
> + *
> + * For an addition, a == b is the last line in 'foo' before the addition,
> + * while c through d is the range of lines in 'bar' to be added to 'foo'.
> + *
> + * For a change, a through b is the range of lines in 'foo' to be replaced
> + * and c through d is the range of lines in 'bar' to replace them with.
> + *
> + * For a deletion, a through b is the range of lines in 'foo' to remove
> + * and c == d is the line in 'bar' which corresponds to the last line
> + * before the deletion.
> + *
> + * The observant reader will have noticed that x is not really needed and
> + * that we can fully describe any hunk using only a, b, c, and d:
> + *
> + * - an addition replaces a zero-length range in one file with a
> + *   non-zero-length range from the other
> + *
> + * - a change replaces a non-zero-length range in one file with a
> + *   non-zero-length range from the other
> + *
> + * - a deletion replaces a non-zero-length range in one file with a
> + *   zero-length range from the other
> + */
>  static int
>  readin(int fd, struct diff **dd)
>  {
>  	int a, b, c, d;
> -	size_t i;
> +	int i;
>  	char kind, *p;
>  	FILE *f;
>  
>  	f = fdopen(fd, "r");
>  	if (f == NULL)
>  		err(2, "fdopen");
> -	for (i = 0; (p = getchange(f)); i++) {
> +	for (i = 0; (p = getchange(f)) != NULL; i++) {
> +		if ((size_t)i >= szchanges - 1)
> +			increase();
>  #if DEBUG
>  		(*dd)[i].line = strdup(p);
>  #endif	/* DEBUG */
>  
> -		if (i >= szchanges - 1)
> -			increase();
> -		a = b = (int)strtoimax(p, &p, 10);
> -		if (*p == ',') {
> -			p++;
> -			b = (int)strtoimax(p, &p, 10);
> -		}
> +		a = b = strtoi(p, &p);
> +		if (*p == ',')
> +			b = strtoi(p + 1, &p);
>  		kind = *p++;
> -		c = d = (int)strtoimax(p, &p, 10);
> -		if (*p == ',') {
> -			p++;
> -			d = (int)strtoimax(p, &p, 10);
> -		}
> +		c = d = strtoi(p, &p);
> +		if (*p == ',')
> +			d = strtoi(p + 1, &p);
> +		if (*p != '\n')
> +			errx(1, "error in diff output");
>  		if (kind == 'a')
>  			a++;
> -		if (kind == 'd')
> +		else if (kind == 'c')
> +			/* nothing */ ;
> +		else if (kind == 'd')
>  			c++;
> +		else
> +			errx(1, "error in diff output");
>  		b++;
>  		d++;
> +		if (b < a || d < c)
> +			errx(1, "error in diff output");
>  		(*dd)[i].old.from = a;
>  		(*dd)[i].old.to = b;
>  		(*dd)[i].new.from = c;
>  		(*dd)[i].new.to = d;
> +		if (i > 0) {
> +			if ((*dd)[i].old.from < (*dd)[i - 1].old.to ||
> +			    (*dd)[i].new.from < (*dd)[i - 1].new.to)
> +				errx(1, "diff output out of order");
> +		}
>  	}
> -	if (i) {
> -		(*dd)[i].old.from = (*dd)[i - 1].old.to;
> -		(*dd)[i].new.from = (*dd)[i - 1].new.to;
> +	if (i > 0) {
> +		(*dd)[i].old.from = (*dd)[i].old.to = (*dd)[i - 1].old.to;
> +		(*dd)[i].new.from = (*dd)[i].new.to = (*dd)[i - 1].new.to;
>  	}
>  	fclose(f);
>  	return (i);
> @@ -264,7 +326,7 @@ getchange(FILE *b)
>  {
>  	char *line;
>  
> -	while ((line = get_line(b, NULL))) {
> +	while ((line = get_line(b, NULL)) != NULL) {
>  		if (isdigit((unsigned char)line[0]))
>  			return (line);
>  	}
> @@ -305,15 +367,23 @@ merge(int m1, int m2)
>  	d2 = d23;
>  	j = 0;
>  
> -	while (t1 = d1 < d13 + m1, t2 = d2 < d23 + m2, t1 || t2) {
> +	for (;;) {
> +		t1 = (d1 < d13 + m1);
> +		t2 = (d2 < d23 + m2);
> +		if (!t1 && !t2)
> +			break;
> +
>  		/* first file is different from the others */
>  		if (!t2 || (t1 && d1->new.to < d2->new.from)) {
>  			/* stuff peculiar to 1st file */
>  			if (eflag == EFLAG_NONE) {
> -				printf("====1\n");
> +				separate("1");
>  				change(1, &d1->old, false);
>  				keep(2, &d1->new);
>  				change(3, &d1->new, false);
> +			} else if (eflag == EFLAG_OVERLAP) {
> +				j = edit(d2, dup, j, DIFF_TYPE1);
> +				printdiff(d2);

This is defined nowhere, so the code does not build.

Best regards,
Bapt