git: 2184ca3f191d - main - diff3: Clean up printing of ranges for edscript output

From: Tom Jones <thj_at_FreeBSD.org>
Date: Tue, 19 Apr 2022 13:51:37 UTC
The branch main has been updated by thj:

URL: https://cgit.FreeBSD.org/src/commit/?id=2184ca3f191d8750fb56b6d50997d437646c9384

commit 2184ca3f191d8750fb56b6d50997d437646c9384
Author:     Tom Jones <thj@FreeBSD.org>
AuthorDate: 2022-04-19 13:47:07 +0000
Commit:     Tom Jones <thj@FreeBSD.org>
CommitDate: 2022-04-19 13:51:18 +0000

    diff3: Clean up printing of ranges for edscript output
    
    Replace the edscript code that tracked and printed lines using byte
    offsets with code that can work from line offsets.
    
    This tidies up the reduces duplication in the edscript output code. It
    also fixes the usage of the de struct so that it only tracks diffs as
    line offsets rather than the usage changing from line offsets to byte
    offsets during the lifetime of diff3.
    
    Large files with large numbers of ranges will probably suffer in
    performance here, but as we don't use diff3 yet this isn't a regression.
    Include a warning for future hackers so they have a place to start
    hacking from.
    
    Reviewed by:    pstef
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D34941
---
 usr.bin/diff3/diff3.c | 72 ++++++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/usr.bin/diff3/diff3.c b/usr.bin/diff3/diff3.c
index 37aa599eaab7..396b2620d22c 100644
--- a/usr.bin/diff3/diff3.c
+++ b/usr.bin/diff3/diff3.c
@@ -110,10 +110,9 @@ static struct diff *d13;
 static struct diff *d23;
 /*
  * "de" is used to gather editing scripts.  These are later spewed out in
- * reverse order.  Its first element must be all zero, the "new" component
- * of "de" contains line positions or byte positions depending on when you
- * look (!?).  Array overlap indicates which sections in "de" correspond to
- * lines that are different in all three files.
+ * reverse order.  Its first element must be all zero, the "old" and "new"
+ * components of "de" contain line positions. Array overlap indicates which
+ * sections in "de" correspond to lines that are different in all three files.
  */
 static struct diff *de;
 static char *overlap;
@@ -144,6 +143,7 @@ static void repos(int);
 static void edscript(int) __dead2;
 static void increase(void);
 static void usage(void) __dead2;
+static void printrange(FILE *, struct range *);
 
 enum {
 	DIFFPROG_OPT,
@@ -507,19 +507,46 @@ edit(struct diff *diff, bool dup, int j)
 		overlapcnt++;
 	de[j].old.from = diff->old.from;
 	de[j].old.to = diff->old.to;
-	de[j].new.from = de[j-1].new.to + skip(2, diff->new.from, NULL);
-	de[j].new.to = de[j].new.from + skip(2, diff->new.to, NULL);
+	de[j].new.from = diff->new.from;
+	de[j].new.to = diff->new.to;
 	return (j);
 }
 
+static void
+printrange(FILE *p, struct range *r)
+{
+	char *line = NULL;
+	size_t len = 0;
+	int i = 1;
+	ssize_t rlen = 0;
+
+	/* We haven't been asked to print anything */
+	if (r->from == r->to)
+		return;
+
+	if (r->from > r->to)
+			errx(EXIT_FAILURE, "invalid print range");
+
+	/*
+	 * XXX-THJ: We read through all of the file for each range printed.
+	 * This duplicates work and will probably impact performance on large
+	 * files with lots of ranges.
+	 */
+	fseek(p, 0L, SEEK_SET);
+	while ((rlen = getline(&line, &len, p)) > 0) {
+		if (i >= r->from)
+			printf("%s", line);
+		if (++i > r->to - 1)
+			break;
+	}
+	free(line);
+}
+
 /* regurgitate */
 static void
 edscript(int n)
 {
-	int k;
 	bool delete;
-	size_t j;
-	char block[BUFSIZ];
 
 	for (; n > 0; n--) {
 		delete = (de[n].new.from == de[n].new.to);
@@ -527,34 +554,9 @@ edscript(int n)
 			prange(&de[n].old, delete);
 		} else {
 			printf("%da\n", de[n].old.to - 1);
-			if (Aflag) {
-				printf("%s\n", f2mark);
-				fseek(fp[1], de[n].old.from, SEEK_SET);
-				for (k = de[n].old.to - de[n].old.from; k > 0; k -= j) {
-					j = k > BUFSIZ ? BUFSIZ : k;
-					if (fread(block, 1, j, fp[1]) != j)
-						errx(2, "logic error");
-					fwrite(block, 1, j, stdout);
-				}
-				printf("\n");
-			}
 			printf("=======\n");
 		}
-		fseek(fp[2], (long)de[n].new.from, SEEK_SET);
-		for (k = de[n].new.to - de[n].new.from; k > 0; k -= j) {
-			size_t r;
-
-			j = k > BUFSIZ ? BUFSIZ : k;
-			r = fread(block, 1, j, fp[2]);
-			if (r == 0) {
-				if (feof(fp[2]))
-					break;
-				errx(2, "logic error");
-			}
-			if (r != j)
-				j = r;
-			(void)fwrite(block, 1, j, stdout);
-		}
+		printrange(fp[2], &de[n].new);
 		if (!oflag || !overlap[n]) {
 			if (!delete)
 				printf(".\n");