git: cf73401c4f7a - main - diff3: Fix merge mode.
- Reply: Baptiste Daroussin : "Re: git: cf73401c4f7a - main - diff3: Fix merge mode."
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 25 Sep 2024 17:15:21 UTC
The branch main has been updated by des:
URL: https://cgit.FreeBSD.org/src/commit/?id=cf73401c4f7af063c91a0c8e4d5b46e08f06db87
commit cf73401c4f7af063c91a0c8e4d5b46e08f06db87
Author: Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2024-09-25 17:14:32 +0000
Commit: Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2024-09-25 17:14:55 +0000
diff3: Fix merge mode.
This is mostly thj@'s work, with some tweaks and cleanup by me. There
are still some cases where our output differs from GNU diff3, but it's
much better than before and I'd rather commit what I have now than let
it continue to languish in a metaphorical drawer.
MFC after 3 weeks
Sponsored by: Klara, Inc.
Reviewed by: thj
Differential Revision: https://reviews.freebsd.org/D46762
---
usr.bin/diff3/diff3.c | 255 +++++++++++++++++++++++++++++++++++---------------
1 file changed, 179 insertions(+), 76 deletions(-)
diff --git a/usr.bin/diff3/diff3.c b/usr.bin/diff3/diff3.c
index c72ea0747589..f20b1d74678d 100644
--- a/usr.bin/diff3/diff3.c
+++ b/usr.bin/diff3/diff3.c
@@ -79,7 +79,6 @@
#include <string.h>
#include <unistd.h>
-
/*
* "from" is first in range of changed lines; "to" is last+1
* from=to=line after point of insertion for added lines.
@@ -90,6 +89,7 @@ struct range {
};
struct diff {
+#define DIFF_TYPE1 1
#define DIFF_TYPE2 2
#define DIFF_TYPE3 3
int type;
@@ -147,6 +147,7 @@ static void keep(int, struct range *);
static void merge(int, int);
static void prange(struct range *, bool);
static void repos(int);
+static void separate(const char *);
static void edscript(int) __dead2;
static void Ascript(int) __dead2;
static void mergescript(int) __dead2;
@@ -154,7 +155,7 @@ static void increase(void);
static void usage(void);
static void printrange(FILE *, struct range *);
-static const char diff3_version[] = "FreeBSD diff3 20220517";
+static const char diff3_version[] = "FreeBSD diff3 20240925";
enum {
DIFFPROG_OPT,
@@ -189,49 +190,110 @@ usage(void)
"[-L label3] file1 file2 file3\n");
}
+static int
+strtoi(char *str, char **end)
+{
+ intmax_t num;
+
+ errno = 0;
+ num = strtoimax(str, end, 10);
+ if ((end != NULL && *end == str) ||
+ num < 0 || num > INT_MAX ||
+ errno == EINVAL || errno == ERANGE)
+ err(1, "error in diff output");
+ return (int)num;
+}
+
+/*
+ * Read diff hunks into the array pointed to by *dd.
+ *
+ * The output from `diff foo bar` consists of a series of hunks describing
+ * an addition (lines in bar not present in foo), change (lines in bar
+ * different from lines in foo), or deletion (lines in foo not present in
+ * bar). Each record starts with a line of the form:
+ *
+ * a[,b]xc[,d]
+ *
+ * where a, b, c, and d are nonnegative integers (b and d are printed only
+ * if they differ from a and c, respectively), and x is either 'a' for an
+ * addition, 'c' for a change, or 'd' for a deletion. This is then
+ * followed by a series of lines (which we ignore) giving the added,
+ * changed, or deleted text.
+ *
+ * For an addition, a == b is the last line in 'foo' before the addition,
+ * while c through d is the range of lines in 'bar' to be added to 'foo'.
+ *
+ * For a change, a through b is the range of lines in 'foo' to be replaced
+ * and c through d is the range of lines in 'bar' to replace them with.
+ *
+ * For a deletion, a through b is the range of lines in 'foo' to remove
+ * and c == d is the line in 'bar' which corresponds to the last line
+ * before the deletion.
+ *
+ * The observant reader will have noticed that x is not really needed and
+ * that we can fully describe any hunk using only a, b, c, and d:
+ *
+ * - an addition replaces a zero-length range in one file with a
+ * non-zero-length range from the other
+ *
+ * - a change replaces a non-zero-length range in one file with a
+ * non-zero-length range from the other
+ *
+ * - a deletion replaces a non-zero-length range in one file with a
+ * zero-length range from the other
+ */
static int
readin(int fd, struct diff **dd)
{
int a, b, c, d;
- size_t i;
+ int i;
char kind, *p;
FILE *f;
f = fdopen(fd, "r");
if (f == NULL)
err(2, "fdopen");
- for (i = 0; (p = getchange(f)); i++) {
+ for (i = 0; (p = getchange(f)) != NULL; i++) {
+ if ((size_t)i >= szchanges - 1)
+ increase();
#if DEBUG
(*dd)[i].line = strdup(p);
#endif /* DEBUG */
- if (i >= szchanges - 1)
- increase();
- a = b = (int)strtoimax(p, &p, 10);
- if (*p == ',') {
- p++;
- b = (int)strtoimax(p, &p, 10);
- }
+ a = b = strtoi(p, &p);
+ if (*p == ',')
+ b = strtoi(p + 1, &p);
kind = *p++;
- c = d = (int)strtoimax(p, &p, 10);
- if (*p == ',') {
- p++;
- d = (int)strtoimax(p, &p, 10);
- }
+ c = d = strtoi(p, &p);
+ if (*p == ',')
+ d = strtoi(p + 1, &p);
+ if (*p != '\n')
+ errx(1, "error in diff output");
if (kind == 'a')
a++;
- if (kind == 'd')
+ else if (kind == 'c')
+ /* nothing */ ;
+ else if (kind == 'd')
c++;
+ else
+ errx(1, "error in diff output");
b++;
d++;
+ if (b < a || d < c)
+ errx(1, "error in diff output");
(*dd)[i].old.from = a;
(*dd)[i].old.to = b;
(*dd)[i].new.from = c;
(*dd)[i].new.to = d;
+ if (i > 0) {
+ if ((*dd)[i].old.from < (*dd)[i - 1].old.to ||
+ (*dd)[i].new.from < (*dd)[i - 1].new.to)
+ errx(1, "diff output out of order");
+ }
}
- if (i) {
- (*dd)[i].old.from = (*dd)[i - 1].old.to;
- (*dd)[i].new.from = (*dd)[i - 1].new.to;
+ if (i > 0) {
+ (*dd)[i].old.from = (*dd)[i].old.to = (*dd)[i - 1].old.to;
+ (*dd)[i].new.from = (*dd)[i].new.to = (*dd)[i - 1].new.to;
}
fclose(f);
return (i);
@@ -264,7 +326,7 @@ getchange(FILE *b)
{
char *line;
- while ((line = get_line(b, NULL))) {
+ while ((line = get_line(b, NULL)) != NULL) {
if (isdigit((unsigned char)line[0]))
return (line);
}
@@ -305,15 +367,23 @@ merge(int m1, int m2)
d2 = d23;
j = 0;
- while (t1 = d1 < d13 + m1, t2 = d2 < d23 + m2, t1 || t2) {
+ for (;;) {
+ t1 = (d1 < d13 + m1);
+ t2 = (d2 < d23 + m2);
+ if (!t1 && !t2)
+ break;
+
/* first file is different from the others */
if (!t2 || (t1 && d1->new.to < d2->new.from)) {
/* stuff peculiar to 1st file */
if (eflag == EFLAG_NONE) {
- printf("====1\n");
+ separate("1");
change(1, &d1->old, false);
keep(2, &d1->new);
change(3, &d1->new, false);
+ } else if (eflag == EFLAG_OVERLAP) {
+ j = edit(d2, dup, j, DIFF_TYPE1);
+ printdiff(d2);
}
d1++;
continue;
@@ -321,7 +391,7 @@ merge(int m1, int m2)
/* second file is different from others */
if (!t1 || (t2 && d2->new.to < d1->new.from)) {
if (eflag == EFLAG_NONE) {
- printf("====2\n");
+ separate("2");
keep(1, &d2->new);
change(3, &d2->new, false);
change(2, &d2->old, false);
@@ -359,7 +429,7 @@ merge(int m1, int m2)
* dup = 1 means files 1 and 2 identical
*/
if (eflag == EFLAG_NONE) {
- printf("====%s\n", dup ? "3" : "");
+ separate(dup ? "3" : "");
change(1, &d1->old, dup);
change(2, &d2->old, false);
d3 = d1->old.to > d1->old.from ? d1 : d2;
@@ -400,6 +470,12 @@ merge(int m1, int m2)
edscript(j);
}
+static void
+separate(const char *s)
+{
+ printf("====%s\n", s);
+}
+
/*
* The range of lines rold.from thru rold.to in file i is to be changed.
* It is to be printed only if it does not duplicate something to be
@@ -555,7 +631,6 @@ printrange(FILE *p, struct range *r)
char *line = NULL;
size_t len = 0;
int i = 1;
- ssize_t rlen = 0;
/* We haven't been asked to print anything */
if (r->from == r->to)
@@ -570,7 +645,7 @@ printrange(FILE *p, struct range *r)
* files with lots of ranges.
*/
fseek(p, 0L, SEEK_SET);
- while ((rlen = getline(&line, &len, p)) > 0) {
+ while (getline(&line, &len, p) > 0) {
if (i >= r->from)
printf("%s", line);
if (++i > r->to - 1)
@@ -591,20 +666,31 @@ edscript(int n)
old = &de[n].old;
delete = (new->from == new->to);
- if (!oflag || !overlap[n]) {
- prange(old, delete);
- } else {
- printf("%da\n", old->to - 1);
- printf("%s\n", divider);
- }
- printrange(fp[2], new);
- if (!oflag || !overlap[n]) {
- if (!delete)
+ if (de[n].type == DIFF_TYPE1) {
+ if (delete)
+ printf("%dd\n", new->from - 1);
+ else if (old->from == new->from && old->to == new->to) {
+ printf("%dc\n", old->from);
+ printrange(fp[2], old);
printf(".\n");
+ }
+ continue;
} else {
- printf("%s %s\n.\n", newmark, f3mark);
- printf("%da\n%s %s\n.\n", old->from - 1,
- oldmark, f1mark);
+ if (!oflag || !overlap[n]) {
+ prange(old, delete);
+ } else {
+ printf("%da\n", old->to - 1);
+ printf("%s\n", divider);
+ }
+ printrange(fp[2], new);
+ if (!oflag || !overlap[n]) {
+ if (!delete)
+ printf(".\n");
+ } else {
+ printf("%s %s\n.\n", newmark, f3mark);
+ printf("%da\n%s %s\n.\n", old->from - 1,
+ oldmark, f1mark);
+ }
}
}
if (iflag)
@@ -639,10 +725,7 @@ Ascript(int n)
prange(old, deletenew);
printrange(fp[2], new);
} else {
- startmark = new->to;
-
- if (!deletenew)
- startmark--;
+ startmark = new->to - 1;
printf("%da\n", startmark);
printf("%s %s\n", newmark, f3mark);
@@ -711,25 +794,43 @@ mergescript(int i)
{
struct range r, *new, *old;
int n;
+ bool delete = false;
r.from = 1;
r.to = 1;
- for (n = 1; n < i+1; n++) {
+ for (n = 1; n <= i; n++) {
new = &de[n].new;
old = &de[n].old;
- /* print any lines leading up to here */
- r.to = old->from;
- printrange(fp[0], &r);
+ /*
+ * Print any lines leading up to here. If we are merging don't
+ * print deleted ranges.
+ */
+ delete = (new->from == new->to);
+ if (de[n].type == DIFF_TYPE1 && delete)
+ r.to = new->from - 1;
+ else if (de[n].type == DIFF_TYPE3 && (old->from == old->to)) {
+ r.from = old->from - 1;
+ r.to = new->from;
+ } else
+ r.to = old->from;
- if (de[n].type == DIFF_TYPE2) {
+ printrange(fp[0], &r);
+ switch (de[n].type) {
+ case DIFF_TYPE1:
+ /* If this isn't a delete print it */
+ if (!delete)
+ printrange(fp[2], new);
+ break;
+ case DIFF_TYPE2:
printf("%s %s\n", oldmark, f2mark);
printrange(fp[1], old);
printf("%s\n", divider);
printrange(fp[2], new);
printf("%s %s\n", newmark, f3mark);
- } else if (de[n].type == DIFF_TYPE3) {
+ break;
+ case DIFF_TYPE3:
if (!oflag || !overlap[n]) {
printrange(fp[2], new);
} else {
@@ -737,20 +838,27 @@ mergescript(int i)
printf("%s %s\n", oldmark, f1mark);
printrange(fp[0], old);
- printf("%s %s\n", orgmark, f2mark);
- if (old->from == old->to) {
- struct range or;
- or.from = old->from - 1;
- or.to = new->to;
- printrange(fp[1], &or);
- } else
- printrange(fp[1], old);
+ if (eflag != EFLAG_OVERLAP) {
+ printf("%s %s\n", orgmark, f2mark);
+ if (old->from == old->to) {
+ struct range or;
+ or.from = old->from - 1;
+ or.to = new->to;
+ printrange(fp[1], &or);
+ } else {
+ printrange(fp[1], old);
+ }
+ }
printf("%s\n", divider);
printrange(fp[2], new);
printf("%s %s\n", newmark, f3mark);
}
+ break;
+ default:
+ printf("Error: Unhandled diff type - exiting\n");
+ exit(EXIT_FAILURE);
}
if (old->from == old->to)
@@ -758,6 +866,7 @@ mergescript(int i)
else
r.from = old->to;
}
+
/*
* Print from the final range to the end of 'myfile'. Any deletions or
* additions to this file should have been handled by now.
@@ -768,21 +877,14 @@ mergescript(int i)
*/
new = &de[n-1].new;
old = &de[n-1].old;
- if ((old->from == new->from) &&
- (old->to == new->to))
+
+ if (old->from == new->from && old->to == new->to)
r.from--;
else if (new->from == new->to)
r.from = old->from;
- /*
- * If the range is a 3 way merge then we need to skip a line in the
- * trailing output.
- */
- if (de[n-1].type == DIFF_TYPE3)
- r.from++;
-
r.to = INT_MAX;
- printrange(fp[0], &r);
+ printrange(fp[2], &r);
exit(overlapcnt > 0);
}
@@ -797,25 +899,25 @@ increase(void)
newsz = szchanges == 0 ? 64 : 2 * szchanges;
incr = newsz - szchanges;
- p = reallocarray(d13, newsz, sizeof(struct diff));
+ p = reallocarray(d13, newsz, sizeof(*p));
if (p == NULL)
err(1, NULL);
- memset(p + szchanges, 0, incr * sizeof(struct diff));
+ memset(p + szchanges, 0, incr * sizeof(*p));
d13 = p;
- p = reallocarray(d23, newsz, sizeof(struct diff));
+ p = reallocarray(d23, newsz, sizeof(*p));
if (p == NULL)
err(1, NULL);
- memset(p + szchanges, 0, incr * sizeof(struct diff));
+ memset(p + szchanges, 0, incr * sizeof(*p));
d23 = p;
- p = reallocarray(de, newsz, sizeof(struct diff));
+ p = reallocarray(de, newsz, sizeof(*p));
if (p == NULL)
err(1, NULL);
- memset(p + szchanges, 0, incr * sizeof(struct diff));
+ memset(p + szchanges, 0, incr * sizeof(*p));
de = p;
- q = reallocarray(overlap, newsz, sizeof(char));
+ q = reallocarray(overlap, newsz, 1);
if (q == NULL)
err(1, NULL);
- memset(q + szchanges, 0, incr * sizeof(char));
+ memset(q + szchanges, 0, incr * 1);
overlap = q;
szchanges = newsz;
}
@@ -919,7 +1021,7 @@ main(int argc, char **argv)
if (kq == -1)
err(2, "kqueue");
- e = malloc(2 * sizeof(struct kevent));
+ e = malloc(2 * sizeof(*e));
if (e == NULL)
err(2, "malloc");
@@ -1007,6 +1109,7 @@ main(int argc, char **argv)
}
nleft -= nke;
}
+ free(e);
merge(m, n);
return (EXIT_SUCCESS);