svn commit: r231755 - user/gabor/tre-integration/contrib/tre/lib

Gabor Kovesdan gabor at FreeBSD.org
Wed Feb 15 11:11:59 UTC 2012


Author: gabor
Date: Wed Feb 15 11:11:58 2012
New Revision: 231755
URL: http://svn.freebsd.org/changeset/base/231755

Log:
  - Separate different compile logics for better later reuse and readability
  - Rename some functions that belong to a lower layer for clarity

Modified:
  user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
  user/gabor/tre-integration/contrib/tre/lib/tre-compile.h
  user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h
  user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
  user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h

Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Wed Feb 15 10:33:29 2012	(r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Wed Feb 15 11:11:58 2012	(r231755)
@@ -51,12 +51,12 @@ tre_fixncomp(fastmatch_t *preg, const ch
       if (ret != REG_OK)
 	return ret;
       else 
-	ret = tre_compile_literal(preg, wregex, wlen, cflags);
+	ret = tre_proc_literal(preg, wregex, wlen, cflags);
       tre_free_pattern(wregex);
       return ret;
     }
   else
-    return tre_compile_literal(preg, NULL, 0, cflags);
+    return tre_proc_literal(preg, NULL, 0, cflags);
 }
 
 int
@@ -73,13 +73,13 @@ tre_fastncomp(fastmatch_t *preg, const c
 	return ret;
       else
 	ret = (cflags & REG_LITERAL)
-	      ? tre_compile_literal(preg, wregex, wlen, cflags)
-	      : tre_compile_fast(preg, wregex, wlen, cflags);
+	      ? tre_proc_literal(preg, wregex, wlen, cflags)
+	      : tre_proc_fast(preg, wregex, wlen, cflags);
       tre_free_pattern(wregex);
       return ret;
     }
   else
-    return tre_compile_literal(preg, NULL, 0, cflags);
+    return tre_proc_literal(preg, NULL, 0, cflags);
 }
 
 
@@ -98,15 +98,15 @@ tre_fastcomp(fastmatch_t *preg, const ch
 int
 tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
 {
-  return tre_compile_literal(preg, regex, n, cflags);
+  return tre_proc_literal(preg, regex, n, cflags);
 }
 
 int
 tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
 {
   return (cflags & REG_LITERAL) ?
-    tre_compile_literal(preg, regex, n, cflags) :
-    tre_compile_fast(preg, regex, n, cflags);
+    tre_proc_literal(preg, regex, n, cflags) :
+    tre_proc_fast(preg, regex, n, cflags);
 }
 
 int

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c	Wed Feb 15 10:33:29 2012	(r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c	Wed Feb 15 11:11:58 2012	(r231755)
@@ -1947,49 +1947,107 @@ tre_free_pattern(tre_char_t *wregex)
 int
 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
 {
+  int ret;
+
+  /*
+   * First, we always compile the NFA and it also serves as
+   * pattern validation.  In this way, validation is not
+   * scattered through the code.
+   */
+  ret = tre_compile_nfa(preg, regex, n, cflags);
+  if (ret != REG_OK)
+    return ret;
+
+  /*
+   * Check if we can cheat with a fixed string algorithm
+   * if the pattern is long enough.
+   */
+  ret = tre_compile_bm(preg, regex, n, cflags);
+
+  /* Only try to compile heuristic if the fast matcher failed. */
+  if (ret != REG_OK)
+    ret = tre_compile_heur(preg, regex, n, cflags);
+  else
+    preg->heur = NULL;
+
+  /* When here, at least NFA surely succeeded. */
+  return REG_OK;
+}
+
+int
+tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+{
+  fastmatch_t *shortcut;
+  int ret;
+
+  if (n < 2)
+    goto too_short;
+  shortcut = xmalloc(sizeof(fastmatch_t));
+  if (!shortcut)
+    return REG_ESPACE;
+  ret = (cflags & REG_LITERAL)
+	 ? tre_proc_literal(shortcut, regex, n, cflags)
+	 : tre_proc_fast(shortcut, regex, n, cflags);
+  if (ret == REG_OK)
+    {
+      preg->shortcut = shortcut;
+      DPRINT("tre_compile_bm: pattern compiled for fast matcher\n");
+    }        
+  else
+    {
+too_short:
+      xfree(shortcut);
+      preg->shortcut = NULL;
+      DPRINT("tre_compile_bm: pattern compilation failed for fast matcher\n");
+    }
+  return ret;
+}
+
+int
+tre_compile_heur(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+{
+  heur_t *heur;
+  int ret;
+
+  heur = xmalloc(sizeof(heur_t));
+  if (!heur)
+    return REG_ESPACE;
+
+  ret = tre_proc_heur(heur, regex, n, cflags);
+  if (ret != REG_OK)
+    {
+      xfree(heur);
+      preg->heur = NULL;
+      DPRINT("tre_compile_heur: heuristic compilation failed, NFA will be used "
+	     "entirely\n");
+    }
+  else
+    {
+      preg->heur = heur;
+      DPRINT("tre_compile_heur: heuristic compiled to speed up the search\n");
+    }
+
+  return ret;
+}
+
+int
+tre_compile_nfa(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+{
   tre_stack_t *stack;
   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
   tre_pos_and_tags_t *p;
   int *counts = NULL, *offs = NULL;
-  int i, add = 0, ret;
+  int i, add = 0;
   tre_tnfa_transition_t *transitions, *initial;
   tre_tnfa_t *tnfa = NULL;
   tre_submatch_data_t *submatch_data;
   tre_tag_direction_t *tag_directions = NULL;
   reg_errcode_t errcode;
   tre_mem_t mem;
-  fastmatch_t *shortcut;
-  heur_t *heur;
 
   /* Parse context. */
   tre_parse_ctx_t parse_ctx;
 
-  /*
-   * Check if we can cheat with a fixed string algorithm
-   * if the pattern is long enough.
-   */
-  if (n >= 2)
-    {
-      shortcut = xmalloc(sizeof(fastmatch_t));
-      if (!shortcut)
-	return REG_ESPACE;
-      ret = (cflags & REG_LITERAL)
-	     ? tre_compile_literal(shortcut, regex, n, cflags)
-	     : tre_compile_fast(shortcut, regex, n, cflags);
-      if (ret == REG_OK)
-	{
-	  preg->shortcut = shortcut;
-	  preg->re_nsub = 0;
-	  DPRINT("tre_compile: pattern compiled for fast matcher\n");
-	}
-      else
-	{
-	  xfree(shortcut);
-	  preg->shortcut = NULL;
-	  DPRINT("tre_compile: pattern compilation failed for fast matcher\n");
-	}
-    }
-
   /* Allocate a stack used throughout the compilation process for various
      purposes. */
   stack = tre_stack_new(512, 10240, 128);
@@ -2008,7 +2066,7 @@ tre_compile(regex_t *preg, const tre_cha
   parse_ctx.len = n;
   parse_ctx.cflags = cflags;
   parse_ctx.max_backref = -1;
-  DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
+  DPRINT(("tre_compile_nfa: parsing '%.*" STRF "'\n", (int)n, regex));
   errcode = tre_parse(&parse_ctx);
   if (errcode != REG_OK)
     ERROR_EXIT(errcode);
@@ -2040,7 +2098,7 @@ tre_compile(regex_t *preg, const tre_cha
      regexp does not have back references, this can be skipped. */
   if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
     {
-      DPRINT(("tre_compile: setting up tags\n"));
+      DPRINT(("tre_compile_nfa: setting up tags\n"));
 
       /* Figure out how many tags we will need. */
       errcode = tre_add_tags(NULL, stack, tree, tnfa);
@@ -2277,42 +2335,10 @@ tre_compile(regex_t *preg, const tre_cha
 
   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
 
-  /*
-   * If we reach here, the regex is parsed and legal. Now we try to construct
-   * a heuristic to speed up matching if we do not already have a shortcut
-   * pattern.
-   */
-  if (!preg->shortcut)
-    {
-      heur = xmalloc(sizeof(heur_t));
-      if (!heur)
-	ERROR_EXIT(REG_ESPACE);
-
-      ret = tre_compile_heur(heur, regex, n, cflags);
-      if (ret != REG_OK)
-	{
-	  xfree(heur);
-	  preg->heur = NULL;
-	  DPRINT("tre_compile: heuristic compilation failed, NFA will be used "
-		 "entirely\n");
-	}
-      else
-	{
-	  preg->heur = heur;
-	  DPRINT("tre_compile: heuristic compiled to speed up the search\n");
-	}
-    }
-  else
-    preg->heur = NULL;
-
   return REG_OK;
 
  error_exit:
   /* Free everything that was allocated and return the error code. */
-  if (shortcut != NULL)
-    xfree(shortcut);
-  if (heur != NULL)
-    xfree(heur);
   if (mem != NULL)
     tre_mem_destroy(mem);
   if (stack != NULL)

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.h	Wed Feb 15 10:33:29 2012	(r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.h	Wed Feb 15 11:11:58 2012	(r231755)
@@ -10,6 +10,8 @@
 #ifndef TRE_COMPILE_H
 #define TRE_COMPILE_H 1
 
+#include <regex.h>
+
 typedef struct {
   int position;
   int code_min;
@@ -22,6 +24,13 @@ typedef struct {
   int *params;
 } tre_pos_and_tags_t;
 
+int tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n,
+		   int cflags);
+int tre_compile_heur(regex_t *preg, const tre_char_t *regex, size_t n,
+		     int cflags);
+int tre_compile_nfa(regex_t *preg, const tre_char_t *regex, size_t n,
+		    int cflags);
+
 #endif /* TRE_COMPILE_H */
 
 /* EOF */

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c	Wed Feb 15 10:33:29 2012	(r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c	Wed Feb 15 11:11:58 2012	(r231755)
@@ -402,8 +402,8 @@ static int	fastcmp(const fastmatch_t *fg
  * Returns: REG_OK on success, error code otherwise
  */
 int
-tre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
-		    int cflags)
+tre_proc_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
+		 int cflags)
 {
 
   INIT_COMP;
@@ -421,7 +421,7 @@ tre_compile_literal(fastmatch_t *fg, con
   SAVE_PATTERN(pat, n, fg->pattern, fg->len);
 #endif
 
-  DPRINT(("tre_compile_literal: pattern: %s, len %zu, icase: %c, word: %c, "
+  DPRINT(("tre_proc_literal: pattern: %s, len %zu, icase: %c, word: %c, "
 	 "newline %c\n", fg->pattern, fg->len, fg->icase ? 'y' : 'n',
 	 fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n'));
 
@@ -439,8 +439,8 @@ tre_compile_literal(fastmatch_t *fg, con
  * Returns: REG_OK on success, error code otherwise
  */
 int
-tre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n,
-		 int cflags)
+tre_proc_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n,
+	      int cflags)
 {
   tre_char_t *tmp;
   size_t pos = 0;
@@ -563,7 +563,7 @@ tre_compile_fast(fastmatch_t *fg, const 
       continue;
 badpat:
       xfree(tmp);
-      DPRINT(("tre_compile_fast: compilation of pattern failed, falling"
+      DPRINT(("tre_proc_fast: compilation of pattern failed, falling"
 	      "back to NFA\n"));
       return REG_BADPAT;
     }
@@ -582,7 +582,7 @@ badpat:
 
   xfree(tmp);
 
-  DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, "
+  DPRINT(("tre_proc_fast: pattern: %s, len %zu, bol %c, eol %c, "
 	 "icase: %c, word: %c, newline %c\n", fg->pattern, fg->len,
 	 fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n',
 	 fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n',

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h	Wed Feb 15 10:33:29 2012	(r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h	Wed Feb 15 11:11:58 2012	(r231755)
@@ -9,9 +9,9 @@
 #include "hashtable.h"
 #include "tre-internal.h"
 
-int	tre_compile_literal(fastmatch_t *preg, const tre_char_t *regex,
+int	tre_proc_literal(fastmatch_t *preg, const tre_char_t *regex,
 	    size_t, int);
-int	tre_compile_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);
+int	tre_proc_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);
 int	tre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
 	    tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags);
 void	tre_free_fast(fastmatch_t *preg);

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c	Wed Feb 15 10:33:29 2012	(r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c	Wed Feb 15 11:11:58 2012	(r231755)
@@ -124,7 +124,7 @@
  * heuristic cannot be constructed.
  */
 int
-tre_compile_heur(heur_t *h, const tre_char_t *regex, size_t len, int cflags)
+tre_proc_heur(heur_t *h, const tre_char_t *regex, size_t len, int cflags)
 {
   tre_char_t **arr, *heur;
   tre_char_t **farr;
@@ -513,7 +513,7 @@ ok:
 	    errcode = REG_ESPACE;
 	    goto err;
 	  }
-	ret = tre_compile_literal(h->heurs[i], farr[i], fsiz[i], 0);
+	ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i], 0);
 	if (ret != REG_OK)
 	  {
 	    errcode = REG_BADPAT;

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h	Wed Feb 15 10:33:29 2012	(r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h	Wed Feb 15 11:11:58 2012	(r231755)
@@ -23,8 +23,8 @@ typedef struct {
   int type;
 } heur_t;
 
-extern int tre_compile_heur(heur_t *h, const tre_char_t *regex,
-			    size_t len, int cflags);
+extern int tre_proc_heur(heur_t *h, const tre_char_t *regex,
+			 size_t len, int cflags);
 extern void tre_free_heur(heur_t *h);
 
 #endif	/* TRE_HEURISTIC_H */


More information about the svn-src-user mailing list