svn commit: r316477 - head/usr.bin/grep

Ed Maste emaste at FreeBSD.org
Mon Apr 3 23:16:52 UTC 2017


Author: emaste
Date: Mon Apr  3 23:16:51 2017
New Revision: 316477
URL: https://svnweb.freebsd.org/changeset/base/316477

Log:
  bsdgrep: fix matching behaviour
  
  - Set REG_NOTBOL if we've already matched beginning of line and we're
    examining later parts
  
  - For each pattern we examine, apply it to the remaining bits of the
    line rather than (potentially) smaller subsets
  
  - Check for REG_NOSUB after we've looked at all patterns initially
    matching the line
  
  - Keep track of the last match we made to later determine if we're
    simply not matching any longer or if we need to proceed another byte
    because we hit a zero-length match
  
  - Match the earliest and longest bit of each line before moving the
    beginning of what we match to further in the line, past the end of the
    longest match; this generally matches how gnugrep(1) seems to behave,
    and seems like pretty good behavior to me
  
  - Finally, bail out of printing any matches if we were set to print all
    (empty pattern) but -o (output matches) was set
  
  PR:		195763, 180990, 197555, 197531, 181263, 209116
  Submitted by:	"Kyle Evans" <kevans91 at ksu.edu>
  Reviewed by:	cem
  MFC after:	1 month
  Relnotes:	Yes
  Differential Revision:	https://reviews.freebsd.org/D10104

Modified:
  head/usr.bin/grep/util.c

Modified: head/usr.bin/grep/util.c
==============================================================================
--- head/usr.bin/grep/util.c	Mon Apr  3 22:36:45 2017	(r316476)
+++ head/usr.bin/grep/util.c	Mon Apr  3 23:16:51 2017	(r316477)
@@ -276,28 +276,29 @@ static int
 procline(struct str *l, int nottext)
 {
 	regmatch_t matches[MAX_LINE_MATCHES];
-	regmatch_t pmatch;
-	size_t st = 0;
+	regmatch_t pmatch, lastmatch;
+	size_t st = 0, nst = 0;
 	unsigned int i;
-	int c = 0, m = 0, r = 0;
+	int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags;
+	int startm = 0;
 
 	/* Loop to process the whole line */
 	while (st <= l->len) {
-		pmatch.rm_so = st;
-		pmatch.rm_eo = l->len;
-
+		lastmatches = 0;
+		startm = m;
+		if (st > 0)
+			leflags |= REG_NOTBOL;
 		/* Loop to compare with all the patterns */
 		for (i = 0; i < patterns; i++) {
+			pmatch.rm_so = st;
+			pmatch.rm_eo = l->len;
 			if (fg_pattern[i].pattern)
 				r = fastexec(&fg_pattern[i],
-				    l->dat, 1, &pmatch, eflags);
+				    l->dat, 1, &pmatch, leflags);
 			else
 				r = regexec(&r_pattern[i], l->dat, 1,
-				    &pmatch, eflags);
+				    &pmatch, leflags);
 			r = (r == 0) ? 0 : REG_NOMATCH;
-			st = (cflags & REG_NOSUB)
-				? (size_t)l->len
-				: (size_t)pmatch.rm_eo;
 			if (r == REG_NOMATCH)
 				continue;
 			/* Check for full match */
@@ -324,10 +325,29 @@ procline(struct str *l, int nottext)
 					r = REG_NOMATCH;
 			}
 			if (r == 0) {
+				lastmatches++;
+				lastmatch = pmatch;
+				/* Skip over zero-length matches */
+				if (pmatch.rm_so == pmatch.rm_eo)
+					continue;
 				if (m == 0)
 					c++;
-				if (m < MAX_LINE_MATCHES)
-					matches[m++] = pmatch;
+
+				if (m < MAX_LINE_MATCHES) {
+					/* Replace previous match if the new one is earlier and/or longer */
+					if (m > startm) {
+						if (pmatch.rm_so < matches[m-1].rm_so ||
+						    (pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) {
+							matches[m-1] = pmatch;
+							nst = pmatch.rm_eo;
+						}
+					} else {
+						/* Advance as normal if not */
+						matches[m++] = pmatch;
+						nst = pmatch.rm_eo;
+					}
+				}
+
 				/* matches - skip further patterns */
 				if ((color == NULL && !oflag) ||
 				    qflag || lflag)
@@ -344,8 +364,19 @@ procline(struct str *l, int nottext)
 		if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag))
 			break;
 
-		if (st == (size_t)pmatch.rm_so)
-			break; 	/* No matches */
+		/* If we didn't have any matches or REG_NOSUB set */
+		if (lastmatches == 0 || (cflags & REG_NOSUB))
+			nst = l->len;
+
+		if (lastmatches == 0)
+			/* No matches */
+			break;
+		else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo)
+			/* Zero-length match -- advance one more so we don't get stuck */
+			nst++;
+
+		/* Advance st based on previous matches */
+		st = nst;
 	}
 
 
@@ -444,6 +475,10 @@ printline(struct str *line, int sep, reg
 	size_t a = 0;
 	int i, n = 0;
 
+	/* If matchall, everything matches but don't actually print for -o */
+	if (oflag && matchall)
+		return;
+
 	if (!hflag) {
 		if (!nullflag) {
 			fputs(line->file, stdout);
@@ -474,13 +509,13 @@ printline(struct str *line, int sep, reg
 				fwrite(line->dat + a, matches[i].rm_so - a, 1,
 				    stdout);
 			if (color) 
-				fprintf(stdout, "\33[%sm\33[K", color);
+				fprintf(stdout, "\33[%sm", color);
 
 				fwrite(line->dat + matches[i].rm_so, 
 				    matches[i].rm_eo - matches[i].rm_so, 1,
 				    stdout);
 			if (color) 
-				fprintf(stdout, "\33[m\33[K");
+				fprintf(stdout, "\33[00m\33[K");
 			a = matches[i].rm_eo;
 			if (oflag)
 				putchar('\n');


More information about the svn-src-head mailing list