git: f823c6dc730b - main - grep: fix null pattern and empty pattern file behavior

Kyle Evans kevans at FreeBSD.org
Fri Feb 5 02:59:58 UTC 2021


The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=f823c6dc730b0dd08b54a53be1d8fd587eee7021

commit f823c6dc730b0dd08b54a53be1d8fd587eee7021
Author:     Kyle Evans <kevans at FreeBSD.org>
AuthorDate: 2021-02-04 21:26:45 +0000
Commit:     Kyle Evans <kevans at FreeBSD.org>
CommitDate: 2021-02-05 02:59:42 +0000

    grep: fix null pattern and empty pattern file behavior
    
    The null pattern semantics were terrible because I tried to match gnugrep,
    but I got it wrong.  Let's unwind that:
    
    - The null pattern should match every line if neither -w nor -x.
    - The null pattern should match empty lines if -x.
    - The null pattern should not match any lines if -w.
    
    The first two will stop processing (shortcut) even if additional patterns
    are specified. In any other case, we will continue processing other
    patterns.  If no other patterns are specified beside a null pattern, then
    we match if neither -w nor -x or set and do not match if either of those
    are specified.
    
    The justification for -w is that it should match on a whole word, but the
    null pattern deos not have a whole word to match on.
    
    Empty pattern files should never match anything, and more importantly, -v
    should cause everything to be written.
    
    PR:             253209
    MFC-after:      4 days
---
 contrib/netbsd-tests/usr.bin/grep/t_grep.sh | 22 +++++++++++++++---
 usr.bin/grep/grep.c                         | 11 ---------
 usr.bin/grep/util.c                         | 35 +++++++++++++----------------
 3 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
index e094b15c6d67..ef3f0617465e 100755
--- a/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
+++ b/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
@@ -489,11 +489,11 @@ wflag_emptypat_body()
 
 	atf_check -s exit:1 -o empty grep -w -e "" test1
 
-	atf_check -o file:test2 grep -w -e "" test2
+	atf_check -o file:test2 grep -vw -e "" test2
 
 	atf_check -s exit:1 -o empty grep -w -e "" test3
 
-	atf_check -o file:test4 grep -w -e "" test4
+	atf_check -o file:test4 grep -vw -e "" test4
 }
 
 atf_test_case xflag_emptypat
@@ -504,7 +504,6 @@ xflag_emptypat_body()
 	printf "qaz" > test3
 	printf " qaz\n" > test4
 
-	# -x is whole-line, more strict than -w.
 	atf_check -s exit:1 -o empty grep -x -e "" test1
 
 	atf_check -o file:test2 grep -x -e "" test2
@@ -550,6 +549,22 @@ xflag_emptypat_plus_body()
 	atf_check -o file:spacelines grep -Fxvf patlist1 target_spacelines
 }
 
+atf_test_case emptyfile
+emptyfile_descr()
+{
+	atf_set "descr" "Check for proper handling of empty pattern files (PR 253209)"
+}
+emptyfile_body()
+{
+	:> epatfile
+	echo "blubb" > subj
+
+	# From PR 253209, bsdgrep was short-circuiting completely on an empty
+	# file, but we should have still been processing lines.
+	atf_check -s exit:1 -o empty fgrep -f epatfile subj
+	atf_check -o file:subj fgrep -vf epatfile subj
+}
+
 atf_test_case excessive_matches
 excessive_matches_head()
 {
@@ -946,6 +961,7 @@ atf_init_test_cases()
 	atf_add_test_case wflag_emptypat
 	atf_add_test_case xflag_emptypat
 	atf_add_test_case xflag_emptypat_plus
+	atf_add_test_case emptyfile
 	atf_add_test_case excessive_matches
 	atf_add_test_case wv_combo_break
 	atf_add_test_case fgrep_sanity
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 307a91353b66..33541e4fe734 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -69,13 +69,6 @@ const char	*errstr[] = {
 int		 cflags = REG_NOSUB | REG_NEWLINE;
 int		 eflags = REG_STARTEND;
 
-/* XXX TODO: Get rid of this flag.
- * matchall is a gross hack that means that an empty pattern was passed to us.
- * It is a necessary evil at the moment because our regex(3) implementation
- * does not allow for empty patterns, as supported by POSIX's definition of
- * grammar for BREs/EREs. When libregex becomes available, it would be wise
- * to remove this and let regex(3) handle the dirty details of empty patterns.
- */
 bool		 matchall;
 
 /* Searching patterns */
@@ -637,10 +630,6 @@ main(int argc, char *argv[])
 	aargc -= optind;
 	aargv += optind;
 
-	/* Empty pattern file matches nothing */
-	if (!needpattern && (patterns == 0) && !matchall)
-		exit(1);
-
 	/* Fail if we don't have any pattern */
 	if (aargc == 0 && needpattern)
 		usage();
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
index e517e4eaee6d..f22b7abd79ef 100644
--- a/usr.bin/grep/util.c
+++ b/usr.bin/grep/util.c
@@ -471,31 +471,28 @@ procline(struct parsec *pc)
 
 	matchidx = pc->matchidx;
 
-	/*
-	 * With matchall (empty pattern), we can try to take some shortcuts.
-	 * Emtpy patterns trivially match every line except in the -w and -x
-	 * cases.  For -w (whole-word) cases, we only match if the first
-	 * character isn't a word-character.  For -x (whole-line) cases, we only
-	 * match if the line is empty.
-	 */
+	/* Null pattern shortcuts. */
 	if (matchall) {
-		if (pc->ln.len == 0)
+		if (xflag && pc->ln.len == 0) {
+			/* Matches empty lines (-x). */
 			return (true);
-		if (wflag) {
-			wend = L' ';
-			if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 &&
-			    !iswword(wend))
-				return (true);
-		} else if (!xflag)
+		} else if (!wflag && !xflag) {
+			/* Matches every line (no -w or -x). */
 			return (true);
+		}
 
 		/*
-		 * If we don't have any other patterns, we really don't match.
-		 * If we do have other patterns, we must fall through and check
-		 * them.
+		 * If we only have the NULL pattern, whether we match or not
+		 * depends on if we got here with -w or -x.  If either is set,
+		 * the answer is no.  If we have other patterns, we'll defer
+		 * to them.
 		 */
-		if (patterns == 0)
-			return (false);
+		if (patterns == 0) {
+			return (!(wflag || xflag));
+		}
+	} else if (patterns == 0) {
+		/* Pattern file with no patterns. */
+		return (false);
 	}
 
 	matched = false;


More information about the dev-commits-src-all mailing list