svn commit: r226590 - user/gabor/tre-integration/contrib/tre/lib

Gabor Kovesdan gabor at FreeBSD.org
Thu Oct 20 22:38:24 UTC 2011


Author: gabor
Date: Thu Oct 20 22:38:24 2011
New Revision: 226590
URL: http://svn.freebsd.org/changeset/base/226590

Log:
  - Fix return value in heuristic compiler
  - Implement longest fragment heuristic for REG_NEWLINE behavior

Modified:
  user/gabor/tre-integration/contrib/tre/lib/regexec.c
  user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
  user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h

Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regexec.c	Thu Oct 20 21:49:26 2011	(r226589)
+++ user/gabor/tre-integration/contrib/tre/lib/regexec.c	Thu Oct 20 22:38:24 2011	(r226590)
@@ -165,10 +165,10 @@ tre_match(const tre_tnfa_t *tnfa, const 
 			    pmatch, eflags);
     }
 
-#define FIX_OFFSETS							\
+#define FIX_OFFSETS(adj)						\
   if (ret == REG_NOMATCH)						\
     {									\
-      st += n;								\
+      adj;								\
       continue;								\
     }									\
   else if ((ret == REG_OK) && !(tnfa->cflags & REG_NOSUB))		\
@@ -191,57 +191,94 @@ tre_match(const tre_tnfa_t *tnfa, const 
       const char *data_byte = string;
       const tre_char_t *data_wide = string;
 
-      DPRINT(("tre_match: using a heuristic [%s/%s] to speed up the "
-	     "search\n", heur->start->pattern, heur->end->pattern));
-
-      while (st < len)
+      if (heur->type == HEUR_LONGEST)
 	{
-	  SEEK_TO(st);
-
-	  /* Prefix heuristic */
-	  ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch,
-			       pmatch, eflags);
-	  if (ret != REG_OK)
-	    return ret;
-	  st += pmatch[0].rm_so;
-	  n = pmatch[0].rm_eo;
-
-	  /* Intermediate heuristics */
-	  while (!((heur->heurs[i] == NULL) ||
-		(heur->prefix && heur->heurs[i + 1] == NULL)))
+	  while (st < len)
 	    {
-	      SEEK_TO(st + n);
-	      ret = tre_match_fast(heur->heurs[i], string, len - st - n, type,
-				   nmatch, pmatch, eflags);
-	      if (ret != REG_OK)
-		return ret;
-	      n += pmatch[0].rm_eo;
-	      i++;
-	    }
+	      size_t eo, so;
 
-	  /* Suffix heuristic available */
-	  if (heur->prefix && heur->heurs[i] != NULL)
-	    {
-	      SEEK_TO(st + n);
-	      ret = tre_match_fast(heur->heurs[i], string, len - st - n, type,
-				   nmatch, pmatch, eflags);
+	      SEEK_TO(st);
+	      ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch,
+				   pmatch, eflags);
 	      if (ret != REG_OK)
 		return ret;
-	      n += pmatch[0].rm_eo;
 
-	      SEEK_TO(st);
-	      ret = tre_match(tnfa, string, n, type, nmatch, pmatch,
-			      eflags, NULL, NULL);
-	      FIX_OFFSETS;
-	    }
-	  /* Suffix heuristic not available */
-	  else
+	      for (so = st + pmatch[0].rm_so - 1; ; so--)
+		{
+		  if ((type == STR_WIDE) ? (data_wide[so] == TRE_CHAR('\n')) :
+		      (data_byte[so] == '\n'))
+		    break;
+		  if (so == 0)
+		    break;
+		}
+
+	      for (eo = st + pmatch[0].rm_eo; st + eo < len; eo++)
+		{
+		  if ((type == STR_WIDE) ? (data_wide[eo] == TRE_CHAR('\n')) :
+		      (data_byte[eo] == '\n'))
+		    break;
+		}
+
+	      SEEK_TO(so);
+	      ret = tre_match(tnfa, string, eo - so, type, nmatch, pmatch, eflags, NULL, NULL);
+	      FIX_OFFSETS(st = eo);
+
+	   }
+	   return REG_NOMATCH;
+	}
+      else
+	{
+	  while (st < len)
 	    {
 	      SEEK_TO(st);
-	      ret = tre_match(tnfa, string, len - st, type, nmatch, pmatch,
-			      eflags, NULL, NULL);
-	      FIX_OFFSETS;
-	    }
+
+	      /* Prefix heuristic */
+	     ret = tre_match_fast(heur->heurs[0], string, len - st,
+				  type, nmatch, pmatch, eflags);
+	     if (ret != REG_OK)
+	       return ret;
+	     st += pmatch[0].rm_so;
+	     n = pmatch[0].rm_eo;
+
+	     /* Intermediate heuristics */
+	     while (!((heur->heurs[i] == NULL) ||
+		    ((heur->type == HEUR_PREFIX_ARRAY) &&
+		    heur->heurs[i + 1] == NULL)))
+		{
+		  SEEK_TO(st + n);
+		  ret = tre_match_fast(heur->heurs[i], string, len - st - n,
+				       type, nmatch, pmatch, eflags);
+		  if (ret != REG_OK)
+		    return ret;
+		  n += pmatch[0].rm_eo;
+		  i++;
+		}
+
+	    /* Suffix heuristic available */
+	    if ((heur->type == HEUR_ARRAY) && heur->heurs[i] != NULL)
+	      {
+		SEEK_TO(st + n);
+		ret = tre_match_fast(heur->heurs[i], string, len - st - n,
+				     type, nmatch, pmatch, eflags);
+		if (ret != REG_OK)
+		  return ret;
+		n += pmatch[0].rm_eo;
+
+		SEEK_TO(st);
+		ret = tre_match(tnfa, string, n, type, nmatch, pmatch,
+				eflags, NULL, NULL);
+		FIX_OFFSETS(st += n);
+	      }
+	    /* Suffix heuristic not available */
+	    else
+	      {
+		SEEK_TO(st);
+		ret = tre_match(tnfa, string, len - st, type, nmatch,
+			        pmatch, eflags, NULL, NULL);
+		FIX_OFFSETS(st += n);
+	      }
+	  }
+	  return REG_NOMATCH;
 	}
     }
 

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c	Thu Oct 20 21:49:26 2011	(r226589)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c	Thu Oct 20 22:38:24 2011	(r226590)
@@ -133,6 +133,8 @@ tre_compile_heur(heur_t *h, const tre_ch
   if (!heur)
     return REG_ESPACE;
 
+  h->type = HEUR_ARRAY;
+
   while (true)
     {
 
@@ -316,7 +318,7 @@ end_segment:
 	      errcode = REG_BADPAT;
 	      goto err;
 	    }
-	  h->prefix = true;
+	  h->type = HEUR_PREFIX_ARRAY;
 	  goto ok;
 	}
 
@@ -340,7 +342,6 @@ end_segment:
     }
 
 ok:
-
   {
     size_t m = 1;
     int ret;
@@ -348,12 +349,6 @@ ok:
     for (int i = 1; i < j; i++)
       m = (length[i] > length[m]) ? i : m;
 
-    if (!h->heurs)
-      {
-	errcode = REG_ESPACE;
-	goto err;
-      }
-
     for (int i = 0; i < MIN(3, j + 1); i++)
       {
 	h->heurs[i] = xmalloc(sizeof(fastmatch_t));
@@ -371,30 +366,41 @@ ok:
       goto err2;							\
     }
 
-    ret = tre_compile_fast(h->heurs[0], arr[0], length[0], 0);
-    CHECK_ERR
-    if (j == 1)
+    if (cflags & REG_NEWLINE)
       {
-	xfree(h->heurs[1]);
-	h->heurs[1] = NULL;
-	goto finish;
+	ret = tre_compile_fast(h->heurs[0], arr[m], length[m], 0);
+	CHECK_ERR
+	h->type = HEUR_LONGEST;
       }
     else
-      ret = tre_compile_fast(h->heurs[1], arr[m], length[m], 0);
-    CHECK_ERR
-    if (h->prefix || (m == j - 1))
       {
-	xfree(h->heurs[2]);
-	h->heurs[2] = NULL;
-	goto finish;
+	ret = tre_compile_fast(h->heurs[0], arr[0], length[0], 0);
+	CHECK_ERR
+	if (j == 1)
+	  {
+	    free(h->heurs[1]);
+	    h->heurs[1] = NULL;
+	    errcode = REG_OK;
+	    goto finish;
+	  }
+	else
+	  ret = tre_compile_fast(h->heurs[1], arr[m], length[m], 0);
+	CHECK_ERR
+	if ((h->type == HEUR_PREFIX_ARRAY) || (m == j - 1))
+	  {
+	    xfree(h->heurs[2]);
+	    h->heurs[2] = NULL;
+	    errcode = REG_OK;
+	    goto finish;
+	  }
+	else
+	  ret = tre_compile_fast(h->heurs[2], arr[j - 1], length[j - 1], 0);
+	CHECK_ERR
+	h->heurs[3] = NULL;
       }
-    else
-      ret = tre_compile_fast(h->heurs[2], arr[j - 1], length[j - 1], 0);
-    CHECK_ERR
-    h->heurs[3] = NULL;
 
-    errcode = REG_OK;
-    goto finish;
+      errcode = REG_OK;
+      goto finish;
   }
 
 err2:

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h	Thu Oct 20 21:49:26 2011	(r226589)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h	Thu Oct 20 22:38:24 2011	(r226590)
@@ -7,10 +7,13 @@
 #include "tre-fastmatch.h"
 #include "tre-internal.h"
 
+#define HEUR_ARRAY		0
+#define HEUR_PREFIX_ARRAY	1
+#define HEUR_LONGEST		2
+
 typedef struct {
   fastmatch_t *heurs[4];
-  bool prefix;
-  bool newline;
+  int type;
 } heur_t;
 
 extern int tre_compile_heur(heur_t *h, const tre_char_t *regex,


More information about the svn-src-user mailing list