svn commit: r225123 - user/gabor/tre-integration/contrib/tre/lib

Gabor Kovesdan gabor at FreeBSD.org
Tue Aug 23 21:10:23 UTC 2011


Author: gabor
Date: Tue Aug 23 21:10:23 2011
New Revision: 225123
URL: http://svn.freebsd.org/changeset/base/225123

Log:
  - Add some debug output

Modified:
  user/gabor/tre-integration/contrib/tre/lib/regexec.c
  user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
  user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c

Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regexec.c	Tue Aug 23 20:25:11 2011	(r225122)
+++ user/gabor/tre-integration/contrib/tre/lib/regexec.c	Tue Aug 23 21:10:23 2011	(r225123)
@@ -159,8 +159,11 @@ tre_match(const tre_tnfa_t *tnfa, const 
 
   /* Check if we can cheat with a faster algorithm. */
   if (shortcut != NULL)
-    return tre_match_fast(shortcut, string, len, type, nmatch,
-			  pmatch, eflags);
+    {
+      DPRINT("tre_match: using tre_match_fast() instead of the full NFA\n");
+      return tre_match_fast(shortcut, string, len, type, nmatch,
+			    pmatch, eflags);
+    }
 
 #define FIX_OFFSETS							\
   if (ret == REG_NOMATCH)						\
@@ -188,6 +191,9 @@ tre_match(const tre_tnfa_t *tnfa, const 
       const char *data_byte = string;
       const tre_char_t *data_wide = string;
 
+      DPRINT(("tre_match: using a heuristic [%s/%s] to speed up the "
+	     "search\n", heur->start->pattern, heur->end->pattern));
+
       while (st < len)
 	{
 	  SEEK_TO(st);
@@ -208,6 +214,9 @@ tre_match(const tre_tnfa_t *tnfa, const 
 	    {
 	      SEEK_TO(st);
 
+	      DPRINT(("tre_match: calling NFA with offsets [%u/%u]\n",
+		     st, heur->prefix ? len : n + st));
+
 	      ret = tre_match(tnfa, string,
 			      heur->prefix ? (len - st) :
 			      n, type, nmatch,
@@ -228,6 +237,9 @@ tre_match(const tre_tnfa_t *tnfa, const 
 
 	  SEEK_TO(st);
 
+	  DPRINT(("tre_match: calling NFA with offsets [%u/%u]\n",
+		 st, st + n));
+
 	  ret = tre_match(tnfa, string, n,
 			  type, nmatch, pmatch, eflags, NULL, NULL);
 

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c	Tue Aug 23 20:25:11 2011	(r225122)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c	Tue Aug 23 21:10:23 2011	(r225123)
@@ -1884,12 +1884,14 @@ tre_compile(regex_t *preg, const tre_cha
     {
       preg->shortcut = shortcut;
       preg->re_nsub = 0;
+      DPRINT("tre_compile: pattern compiled for fast matcher\n");
       return REG_OK;
     }
   else
     {
       xfree(shortcut);
       preg->shortcut = NULL;
+      DPRINT("tre_compile: pattern compilation failed for fast matcher\n");
     }
 
   /* Allocate a stack used throughout the compilation process for various
@@ -2186,7 +2188,6 @@ tre_compile(regex_t *preg, const tre_cha
    * If we reach here, the regex is parsed and legal. Now we try to construct
    * a heuristic to speed up matching.
    */
-
   heur = xmalloc(sizeof(heur_t));
   if (!heur)
     {
@@ -2199,9 +2200,14 @@ tre_compile(regex_t *preg, const tre_cha
     {
       xfree(heur);
       preg->heur = NULL;
+      DPRINT("tre_compile: heuristic compilation failed, NFA will be used "
+	     "entirely\n");
     }
   else
-    preg->heur = heur;
+    {
+      preg->heur = heur;
+      DPRINT("tre_compile: heuristic compiled to speed up the search\n");
+    }
 
   return REG_OK;
 

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c	Tue Aug 23 20:25:11 2011	(r225122)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c	Tue Aug 23 21:10:23 2011	(r225123)
@@ -134,6 +134,9 @@ static int	fastcmp(const void *, const v
 	      gs = fg->bmGs[mismatch];					\
 	    }								\
 	    bc = (r == 0) ? bc : fg->defBc;				\
+	    DPRINT(("tre_fast_match: mismatch on character %lc,"	\
+		    "BC %d, GS %d\n",					\
+		    ((tre_char_t *)startptr)[mismatch + 1], bc, gs));	\
             break;							\
 	default:							\
 	  if (!fg->hasdot)						\
@@ -144,6 +147,9 @@ static int	fastcmp(const void *, const v
 	      gs = fg->sbmGs[mismatch];					\
 	    }								\
 	  bc = fg->qsBc[((unsigned char *)startptr)[mismatch + 1]];	\
+	  DPRINT(("tre_fast_match: mismatch on character %c,"		\
+		 "BC %d, GS %d\n",					\
+		 ((unsigned char *)startptr)[mismatch + 1], bc, gs));	\
       }									\
     if (fg->hasdot)							\
       shift = bc;							\
@@ -161,6 +167,7 @@ static int	fastcmp(const void *, const v
 	    u = 0;							\
 	  }								\
       }									\
+      DPRINT(("tre_fast_match: shifting %d characters\n", shift));	\
       j += shift;							\
   }
 
@@ -190,6 +197,8 @@ static int	fastcmp(const void *, const v
   for (int i = fg->hasdot + 1; i < fg->len; i++)			\
     {									\
       fg->qsBc[(unsigned)fg->pattern[i]] = fg->len - i;			\
+      DPRINT(("BC shift for char %c is %d\n", fg->pattern[i],		\
+	     fg->len - i));						\
       if (fg->icase)							\
         {								\
           char c = islower(fg->pattern[i]) ? toupper(fg->pattern[i])	\
@@ -218,6 +227,8 @@ static int	fastcmp(const void *, const v
     {									\
       int k = fg->wlen - i;						\
       hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k);		\
+      DPRINT(("BC shift for wide char %lc is %d\n", fg->wpattern[i],	\
+	     fg->wlen - i));						\
       if (fg->icase)							\
 	{								\
 	  tre_char_t wc = iswlower(fg->wpattern[i]) ?			\
@@ -375,6 +386,10 @@ tre_compile_literal(fastmatch_t *fg, con
   SAVE_PATTERN(fg->pattern, fg->len);
 #endif
 
+  DPRINT(("tre_compile_literal: pattern: %s, icase: %c, word: %c, "
+	 "newline %c\n", fg->pattern, fg->icase ? 'y' : 'n',
+	 fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n'));
+
   FILL_QSBC;
   FILL_BMGS;
 #ifdef TRE_WCHAR
@@ -452,6 +467,12 @@ tre_compile_fast(fastmatch_t *fg, const 
   SAVE_PATTERN(fg->pattern, fg->len);
 #endif
 
+  DPRINT(("tre_compile_fast: pattern: %s, bol %c, eol %c, "
+	 "icase: %c, word: %c, newline %c\n", fg->pattern,
+	 fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n',
+	 fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n',
+	 fg->newline ? 'y' : 'n'));
+
   FILL_QSBC;
   FILL_BMGS;
 #ifdef TRE_WCHAR
@@ -635,6 +656,9 @@ void
 tre_free_fast(fastmatch_t *fg)
 {
 
+  DPRINT(("tre_fast_free: freeing structures for pattern %s\n",
+	 fg->pattern));
+
 #ifdef TRE_WCHAR
   hashtable_free(fg->qsBc_table);
   if (!fg->hasdot)
@@ -688,6 +712,7 @@ fastcmp(const void *pat, const void *dat
 		    : (pat_byte[i] == str_byte[i]))
 	  continue;
       }
+    DPRINT(("fastcmp: mismatch at position %d\n", i));
     ret = -(i + 1);
     break;
   }

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c	Tue Aug 23 20:25:11 2011	(r225122)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c	Tue Aug 23 21:10:23 2011	(r225123)
@@ -255,6 +255,8 @@ end_segment:
 	  if (pos <= 1)
 	    {
 	      errcode = REG_BADPAT;
+	      DPRINT("tre_compile_heur: pattern does not have a "
+		     " fixed-length prefix that is long enough\n");
 	      goto badpat1;
 	    }
 
@@ -271,6 +273,8 @@ end_segment:
 	      errcode = REG_BADPAT;
 	      goto badpat2;
 	    }
+	  DPRINT(("tre_compile_heur: fixed-length prefix is %s\n",
+		 h->start->pattern));
 	}
 
       /*
@@ -285,6 +289,8 @@ end_segment:
 	    {
 	      h->prefix = true;
 	      errcode = REG_OK;
+	      DPRINT("tre-compile_heur: using only a fixed-length prefix; "
+		     "no fixed-length suffix is available\n");
 	      goto ok;
 	    }
 
@@ -302,6 +308,8 @@ end_segment:
 	      h->prefix = true;
 	    }
 	  errcode = REG_OK;
+	  DPRINT(("tre_compile_heur: fixed-length suffix is %s\n",
+		 h->end->pattern));
 	  goto ok;
 	}
 
@@ -315,6 +323,7 @@ space2:
     xfree(h->start);
 badpat1:
 space1:
+  DPRINT("tre_compile_heur: compiling a heuristic failed\n");
 ok:
   xfree(heur);
   return errcode;
@@ -330,4 +339,6 @@ tre_free_heur(heur_t *h)
     xfree(h->start);
   if (h->end != NULL)
     xfree(h->end);
+
+  DPRINT("tre_free_heur: resources are freed\n");
 }


More information about the svn-src-user mailing list