git: 974ea6b297f8 - main - libdiff: Detect and recover from file truncation.

From: Dag-Erling Smørgrav <des_at_FreeBSD.org>
Date: Mon, 20 May 2024 13:33:12 UTC
The branch main has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=974ea6b297f8f9821bbb60670e2b90ba9989b283

commit 974ea6b297f8f9821bbb60670e2b90ba9989b283
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2024-05-20 13:26:33 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2024-05-20 13:26:33 +0000

    libdiff: Detect and recover from file truncation.
    
    If a memory-mapped file is truncated before we get to the end, the
    atomizer may catch SIGBUS.  Detect that, reduce the input length to
    what we were actually able to read, and set a flag so the caller can
    take further action (e.g. warn the user and / or start over).
    
    Sponsored by:   Klara, Inc.
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D45217
---
 contrib/libdiff/include/diff_main.h     |  1 +
 contrib/libdiff/lib/diff_atomize_text.c | 35 ++++++++++++++++++++++++++++-----
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/contrib/libdiff/include/diff_main.h b/contrib/libdiff/include/diff_main.h
index 04a6c6e748c9..11700580db4b 100644
--- a/contrib/libdiff/include/diff_main.h
+++ b/contrib/libdiff/include/diff_main.h
@@ -118,6 +118,7 @@ struct diff_data {
 
 /* Flags set by file atomizer. */
 #define DIFF_ATOMIZER_FOUND_BINARY_DATA	0x00000001
+#define DIFF_ATOMIZER_FILE_TRUNCATED	0x00000002
 
 /* Flags set by caller of diff_main(). */
 #define DIFF_FLAG_IGNORE_WHITESPACE	0x00000001
diff --git a/contrib/libdiff/lib/diff_atomize_text.c b/contrib/libdiff/lib/diff_atomize_text.c
index 32023105af94..d8a69733fc00 100644
--- a/contrib/libdiff/lib/diff_atomize_text.c
+++ b/contrib/libdiff/lib/diff_atomize_text.c
@@ -16,6 +16,8 @@
  */
 
 #include <errno.h>
+#include <setjmp.h>
+#include <signal.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -122,10 +124,18 @@ diff_data_atomize_text_lines_fd(struct diff_data *d)
 	return DIFF_RC_OK;
 }
 
+static sigjmp_buf diff_data_signal_env;
+static void
+diff_data_signal_handler(int sig)
+{
+	siglongjmp(diff_data_signal_env, sig);
+}
+
 static int
 diff_data_atomize_text_lines_mmap(struct diff_data *d)
 {
-	const uint8_t *pos = d->data;
+	struct sigaction act, oact;
+	const uint8_t *volatile pos = d->data;
 	const uint8_t *end = pos + d->len;
 	bool ignore_whitespace = (d->diff_flags & DIFF_FLAG_IGNORE_WHITESPACE);
 	bool embedded_nul = false;
@@ -136,8 +146,22 @@ diff_data_atomize_text_lines_mmap(struct diff_data *d)
 
 	ARRAYLIST_INIT(d->atoms, 1 << pow2);
 
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = 0;
+	act.sa_handler = diff_data_signal_handler;
+	sigaction(SIGBUS, &act, &oact);
+	if (sigsetjmp(diff_data_signal_env, 0) > 0) {
+		/*
+		 * The file was truncated while we were reading it.  Set
+		 * the end pointer to the beginning of the line we were
+		 * trying to read, adjust the file length, and set a flag.
+		 */
+		end = pos;
+		d->len = end - d->data;
+		d->atomizer_flags |= DIFF_ATOMIZER_FILE_TRUNCATED;
+	}
 	while (pos < end) {
-		const uint8_t *line_end = pos;
+		const uint8_t *line_start = pos, *line_end = pos;
 		unsigned int hash = 0;
 
 		while (line_end < end && *line_end != '\r' && *line_end != '\n') {
@@ -164,15 +188,16 @@ diff_data_atomize_text_lines_mmap(struct diff_data *d)
 
 		*atom = (struct diff_atom){
 			.root = d,
-			.pos = (off_t)(pos - d->data),
-			.at = pos,
-			.len = line_end - pos,
+			.pos = (off_t)(line_start - d->data),
+			.at = line_start,
+			.len = line_end - line_start,
 			.hash = hash,
 		};
 
 		/* Starting point for next line: */
 		pos = line_end;
 	}
+	sigaction(SIGBUS, &oact, NULL);
 
 	/* File are considered binary if they contain embedded '\0' bytes. */
 	if (embedded_nul)