svn commit: r231782 - user/gabor/tre-integration/contrib/tre/lib

Gabor Kovesdan gabor at FreeBSD.org
Wed Feb 15 21:48:30 UTC 2012


Author: gabor
Date: Wed Feb 15 21:48:29 2012
New Revision: 231782
URL: http://svn.freebsd.org/changeset/base/231782

Log:
  - Provide MBS patterns to lower layers for better flexibility and avoid
    converting back and forth.  Except with calculated results where
    it is probably cheaper to convert back than calculating the same in
    MBS.
  - Fix a bug in converting back calculated heuristics to MBS.

Modified:
  user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/mregcomp.c
  user/gabor/tre-integration/contrib/tre/lib/regcomp.c
  user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
  user/gabor/tre-integration/contrib/tre/lib/tre-compile.h
  user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h
  user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
  user/gabor/tre-integration/contrib/tre/lib/tre-internal.h

Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Wed Feb 15 21:48:29 2012	(r231782)
@@ -47,12 +47,12 @@ tre_fixncomp(fastmatch_t *preg, const ch
 
   if (n != 0)
     {
-      ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+      ret = tre_convert_pattern_to_wcs(regex, n, &wregex, &wlen);
       if (ret != REG_OK)
 	return ret;
       else 
 	ret = tre_proc_literal(preg, wregex, wlen, cflags);
-      tre_free_pattern(wregex);
+      tre_free_wcs_pattern(wregex);
       return ret;
     }
   else
@@ -68,14 +68,14 @@ tre_fastncomp(fastmatch_t *preg, const c
 
   if (n != 0)
     {
-      ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+      ret = tre_convert_pattern_to_wcs(regex, n, &wregex, &wlen);
       if (ret != REG_OK)
 	return ret;
       else
 	ret = (cflags & REG_LITERAL)
 	      ? tre_proc_literal(preg, wregex, wlen, cflags)
 	      : tre_proc_fast(preg, wregex, wlen, cflags);
-      tre_free_pattern(wregex);
+      tre_free_wcs_pattern(wregex);
       return ret;
     }
   else

Modified: user/gabor/tre-integration/contrib/tre/lib/mregcomp.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/mregcomp.c	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/mregcomp.c	Wed Feb 15 21:48:29 2012	(r231782)
@@ -80,7 +80,7 @@ tre_mregncomp(mregex_t *preg, size_t nr,
 
   for (i = 0; i++; i < nr)
     {
-      ret = tre_convert_pattern(regex[i], n[i], &wregex[i], &wlen[i]);
+      ret = tre_convert_pattern_to_wcs(regex[i], n[i], &wregex[i], &wlen[i]);
       if (ret != REG_OK)
 	goto fail;
     }
@@ -89,7 +89,7 @@ tre_mregncomp(mregex_t *preg, size_t nr,
 
 fail:
   for (int j = 0; j++; j < i)
-    tre_free_pattern(wregex[j]);
+    tre_free_wcs_pattern(wregex[j]);
   return ret;
 }
 
@@ -117,7 +117,30 @@ int
 tre_mregwncomp(mregex_t *preg, size_t nr, const wchar_t *regex[],
 	       size_t n[], int cflags)
 {
-  return tre_compile(preg, nr, regex, n, cflags);
+  int i, ret;
+  char **sregex;
+  size_t *slen;
+
+  sregex = xmalloc(nr * sizeof(char *);
+  if (!sregex)
+    return REG_ENOMEM;
+  slen = xmalloc(nr * sizeof(size_t);
+  if (!slen)
+    return REG_ENOMEM;
+
+  for (i = 0; i++; i < nr)
+    {
+      ret = tre_convert_pattern_to_mbs(regex[i], n[i], &sregex[i], &slen[i]);
+      if (ret != REG_OK)
+        goto fail;
+    }
+
+  ret = tre_mcompile(preg, nr, regex, n, cflags);
+
+fail:
+  for (int j = 0; j++; j < i)
+    tre_free_mbs_pattern(wregex[j]);
+  return ret;
 }
 
 int

Modified: user/gabor/tre-integration/contrib/tre/lib/regcomp.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regcomp.c	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/regcomp.c	Wed Feb 15 21:48:29 2012	(r231782)
@@ -35,12 +35,12 @@ tre_regncomp(regex_t *preg, const char *
   tre_char_t *wregex;
   size_t wlen;
 
-  ret = tre_convert_pattern(regex, n, &wregex, &wlen);
+  ret = tre_convert_pattern_to_wcs(regex, n, &wregex, &wlen);
   if (ret != REG_OK)
     return ret;
   else
-    ret = tre_compile(preg, wregex, wlen, cflags);
-  tre_free_pattern(wregex);
+    ret = tre_compile(preg, wregex, wlen, regex, n, cflags);
+  tre_free_wcs_pattern(wregex);
   return ret;
 }
 
@@ -58,16 +58,26 @@ tre_regcomp(regex_t *preg, const char *r
 int
 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags)
 {
-  return tre_compile(preg, regex, n, cflags);
+  int ret;
+  char *sregex;
+  size_t slen;
+
+  ret = tre_convert_pattern_to_mbs(regex, n, &sregex, &slen);
+  if (ret != REG_OK)
+    return ret;
+  else
+    ret = tre_compile(preg, regex, n, sregex, slen, cflags);
+  tre_free_mbs_pattern(sregex);
+  return ret;
 }
 
 int
 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags)
 {
   if ((cflags & REG_PEND) && (preg->re_wendp >= regex))
-    return tre_compile(preg, regex, preg->re_wendp - regex, cflags);
+    return tre_regwncomp(preg, regex, preg->re_wendp - regex, cflags);
   else
-    return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags);
+    return tre_regwncomp(preg, regex, regex ? wcslen(regex) : 0, cflags);
 }
 #endif /* TRE_WCHAR */
 

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c	Wed Feb 15 21:48:29 2012	(r231782)
@@ -1844,8 +1844,8 @@ tre_ast_to_tnfa(tre_ast_node_t *node, tr
 }
 
 int
-tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
-		    size_t *wn)
+tre_convert_pattern_to_wcs(const char *regex, size_t n, tre_char_t **w,
+			   size_t *wn)
 {
 #if TRE_WCHAR
   tre_char_t *wregex;
@@ -1926,14 +1926,50 @@ tre_convert_pattern(const char *regex, s
 #endif /* !TRE_WCHAR */
 }
 
+int
+tre_convert_pattern_to_mbs(const tre_char_t *wregex, size_t n, char **s,
+			   size_t *sn)
+{
+#ifdef TRE_WCHAR
+  size_t siz;
+  char *mbs;
+
+  siz = wcstombs(NULL, wregex, 0);
+  if (siz == (size_t)-1)
+    return REG_BADPAT;
+
+  mbs = xmalloc(siz + 1);
+  if (!mbs)
+    return REG_ESPACE;
+
+  wcstombs(mbs, wregex, siz);
+  mbs[siz] = '\0';
+  *s = mbs;
+  *sn = siz;
+  return REG_OK;
+#else /* !TRE_WCHAR */
+  *s = (char * const *)wregex;
+  *sn = n;
+  return REG_OK;
+#endif
+}
+
 void
-tre_free_pattern(tre_char_t *wregex)
+tre_free_wcs_pattern(tre_char_t *wregex)
 {
 #if TRE_WCHAR
   xfree(wregex);
 #endif
 }
 
+void
+tre_free_mbs_pattern(char *regex)
+{
+#if TRE_WCHAR
+  xfree(regex);
+#endif
+}
+
 #define ERROR_EXIT(err)		  \
   do				  \
     {				  \
@@ -1945,7 +1981,8 @@ tre_free_pattern(tre_char_t *wregex)
 
 
 int
-tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+tre_compile(regex_t *preg, const tre_char_t *wregex, size_t wn,
+	    const char *regex, size_t n, int cflags)
 {
   int ret;
 
@@ -1954,7 +1991,7 @@ tre_compile(regex_t *preg, const tre_cha
    * pattern validation.  In this way, validation is not
    * scattered through the code.
    */
-  ret = tre_compile_nfa(preg, regex, n, cflags);
+  ret = tre_compile_nfa(preg, wregex, wn, cflags);
   if (ret != REG_OK)
     return ret;
 
@@ -1962,11 +1999,11 @@ tre_compile(regex_t *preg, const tre_cha
    * Check if we can cheat with a fixed string algorithm
    * if the pattern is long enough.
    */
-  ret = tre_compile_bm(preg, regex, n, cflags);
+  ret = tre_compile_bm(preg, wregex, wn, regex, n, cflags);
 
   /* Only try to compile heuristic if the fast matcher failed. */
   if (ret != REG_OK)
-    ret = tre_compile_heur(preg, regex, n, cflags);
+    ret = tre_compile_heur(preg, wregex, wn, cflags);
   else
     preg->heur = NULL;
 
@@ -1975,7 +2012,8 @@ tre_compile(regex_t *preg, const tre_cha
 }
 
 int
-tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+tre_compile_bm(regex_t *preg, const tre_char_t *wregex, size_t wn,
+	       const char *regex, size_t n, int cflags)
 {
   fastmatch_t *shortcut;
   int ret;
@@ -1986,8 +2024,8 @@ tre_compile_bm(regex_t *preg, const tre_
   if (!shortcut)
     return REG_ESPACE;
   ret = (cflags & REG_LITERAL)
-	 ? tre_proc_literal(shortcut, regex, n, cflags)
-	 : tre_proc_fast(shortcut, regex, n, cflags);
+	 ? tre_proc_literal(shortcut, wregex, wn, regex, n, cflags)
+	 : tre_proc_fast(shortcut, wregex, wn, regex, n, cflags);
   if (ret == REG_OK)
     {
       preg->shortcut = shortcut;

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.h	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.h	Wed Feb 15 21:48:29 2012	(r231782)
@@ -24,8 +24,8 @@ typedef struct {
   int *params;
 } tre_pos_and_tags_t;
 
-int tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n,
-		   int cflags);
+int tre_compile_bm(regex_t *preg, const tre_char_t *wregex, size_t wn,
+		   const char *regex, size_t n, int cflags);
 int tre_compile_heur(regex_t *preg, const tre_char_t *regex, size_t n,
 		     int cflags);
 int tre_compile_nfa(regex_t *preg, const tre_char_t *regex, size_t n,

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c	Wed Feb 15 21:48:29 2012	(r231782)
@@ -344,13 +344,13 @@ static int	fastcmp(const fastmatch_t *fg
  * Copies the pattern pat having lenght n to p and stores
  * the size in l.
  */
-#define SAVE_PATTERN(src, srclen, dst, dstlen)				\
+#define SAVE_PATTERN(src, srclen, dst, dstlen, l)			\
   dstlen = srclen;							\
-  dst = xmalloc((dstlen + 1) * sizeof(tre_char_t));			\
+  dst = xmalloc((dstlen + 1) * sizeof(l));				\
   if (dst == NULL)							\
     return REG_ESPACE;							\
   if (dstlen > 0)							\
-    memcpy(dst, src, dstlen * sizeof(tre_char_t));			\
+    memcpy(dst, src, dstlen * sizeof(l));				\
   dst[dstlen] = TRE_CHAR('\0');
 
 /*
@@ -402,8 +402,8 @@ static int	fastcmp(const fastmatch_t *fg
  * Returns: REG_OK on success, error code otherwise
  */
 int
-tre_proc_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
-		 int cflags)
+tre_proc_literal(fastmatch_t *fg, const tre_char_t *wpat, size_t wn,
+		 const char *pat, size_t n, int cflags)
 {
 
   INIT_COMP;
@@ -415,10 +415,10 @@ tre_proc_literal(fastmatch_t *fg, const 
     return REG_BADPAT;
 
 #ifdef TRE_WCHAR
-  SAVE_PATTERN(pat, n, fg->wpattern, fg->wlen);
-  STORE_MBS_PAT;
+  SAVE_PATTERN(wpat, wn, fg->wpattern, fg->wlen, tre_char_t);
+  SAVE_PATTERN(pat, n, fg->pattern, fg->len, char);
 #else
-  SAVE_PATTERN(pat, n, fg->pattern, fg->len);
+  SAVE_PATTERN(pat, n, fg->pattern, fg->len, char);
 #endif
 
   DPRINT(("tre_proc_literal: pattern: %s, len %zu, icase: %c, word: %c, "
@@ -439,8 +439,8 @@ tre_proc_literal(fastmatch_t *fg, const 
  * Returns: REG_OK on success, error code otherwise
  */
 int
-tre_proc_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n,
-	      int cflags)
+tre_proc_fast(fastmatch_t *fg, const tre_char_t *wpat, size_t wn,
+	      const char *pat, size_t n, int cflags)
 {
   tre_char_t *tmp;
   size_t pos = 0;
@@ -449,23 +449,23 @@ tre_proc_fast(fastmatch_t *fg, const tre
   INIT_COMP;
 
   /* Remove beginning-of-line character ('^'). */
-  if (pat[0] == TRE_CHAR('^'))
+  if (wpat[0] == TRE_CHAR('^'))
     {
       fg->bol = true;
-      n--;
-      pat++;
+      wn--;
+      wpat++;
     }
 
   CHECK_MATCHALL(false);
 
   /* Handle word-boundary matching when GNU extensions are enabled */
-  if ((cflags & REG_GNU) && (n >= 14) &&
-      (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) &&
-      (memcmp(pat + n - 7, TRE_CHAR("[[:>:]]"),
+  if ((cflags & REG_GNU) && (wn >= 14) &&
+      (memcmp(wpat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) &&
+      (memcmp(wpat + wn - 7, TRE_CHAR("[[:>:]]"),
 	      7 * sizeof(tre_char_t)) == 0))
     {
-      n -= 14;
-      pat += 7;
+      wn -= 14;
+      wpat += 7;
       fg->word = true;
     }
 
@@ -473,7 +473,7 @@ tre_proc_fast(fastmatch_t *fg, const tre
   if (fg->word && (TRE_MB_CUR_MAX > 1))
     return REG_BADPAT;
 
-  tmp = xmalloc((n + 1) * sizeof(tre_char_t));
+  tmp = xmalloc((wn + 1) * sizeof(tre_char_t));
   if (tmp == NULL)
     return REG_ESPACE;
 
@@ -481,15 +481,15 @@ tre_proc_fast(fastmatch_t *fg, const tre
 #define STORE_CHAR							\
   do									\
     {									\
-      tmp[pos++] = pat[i];						\
+      tmp[pos++] = wpat[i];						\
       escaped = false;							\
       continue;								\
     } while (0)
 
   /* Traverse the input pattern for processing */
-  for (unsigned int i = 0; i < n; i++)
+  for (unsigned int i = 0; i < wn; i++)
     {
-      switch (pat[i])
+      switch (wpat[i])
 	{
 	  case TRE_CHAR('\\'):
 	    if (escaped)
@@ -574,10 +574,12 @@ badpat:
    * classes stripped out.
    */
 #ifdef TRE_WCHAR
-  SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen);
+  SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen, tre_char_t);
+
+  /* Convert back to MBS instead of processing again */
   STORE_MBS_PAT;
 #else
-  SAVE_PATTERN(tmp, pos, fg->pattern, fg->len);
+  SAVE_PATTERN(tmp, pos, fg->pattern, fg->len, char);
 #endif
 
   xfree(tmp);

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h	Wed Feb 15 21:48:29 2012	(r231782)
@@ -9,9 +9,10 @@
 #include "hashtable.h"
 #include "tre-internal.h"
 
-int	tre_proc_literal(fastmatch_t *preg, const tre_char_t *regex,
-	    size_t, int);
-int	tre_proc_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);
+int	tre_proc_literal(fastmatch_t *, const tre_char_t *, size_t,
+	    const char *, size_t, int);
+int	tre_proc_fast(fastmatch_t *, const tre_char_t *, size_t,
+	    const char *, size_t, int);
 int	tre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
 	    tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags);
 void	tre_free_fast(fastmatch_t *preg);

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c	Wed Feb 15 21:48:29 2012	(r231782)
@@ -475,14 +475,14 @@ ok:
 
     for (i = 0; farr[i] != NULL; i++)
       {
-	bsiz[i] = mbstowcs(farr[i], NULL, 0);
+	bsiz[i] = wcstombs(NULL, farr[i], 0);
 	barr[i] = xmalloc(bsiz[i] + 1);
 	if (!barr[i])
 	  {
 	    errcode = REG_ESPACE;
 	    goto err;
 	  }
-	mbstowcs(farr[i], barr[i], bsiz[i]);
+	wcstombs(barr[i], farr[i], bsiz[i]);
 	barr[i][bsiz[i]] = '\0';
       }
     barr[i] = NULL;
@@ -513,7 +513,13 @@ ok:
 	    errcode = REG_ESPACE;
 	    goto err;
 	  }
-	ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i], 0);
+#ifdef TRE_WCHAR
+	ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i],
+			       barr[i], bsiz[i], 0);
+#else
+	ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i],
+			       farr[i], fsiz[i], 0);
+#endif
 	if (ret != REG_OK)
 	  {
 	    errcode = REG_BADPAT;

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-internal.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-internal.h	Wed Feb 15 21:32:05 2012	(r231781)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-internal.h	Wed Feb 15 21:48:29 2012	(r231782)
@@ -277,14 +277,22 @@ struct tnfa {
     } while (0 /*CONSTCOND*/)
 
 int
-tre_convert_pattern(const char *regex, size_t n, tre_char_t **w,
-		    size_t *wn);
+tre_convert_pattern_to_wcs(const char *regex, size_t n, tre_char_t **w,
+			  size_t *wn);
 
 void
-tre_free_pattern(tre_char_t *wregex);
+tre_free_wcs_pattern(tre_char_t *wregex);
 
 int
-tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
+tre_convert_pattern_to_mbs(const tre_char_t *wregex, size_t n, char **s,
+			   size_t *sn);
+
+void
+tre_free_mbs_pattern(char *wregex);
+
+int
+tre_compile(regex_t *preg, const tre_char_t *wregex, size_t wn,
+	    const char *regex, size_t n, int cflags);
 
 void
 tre_free(regex_t *preg);


More information about the svn-src-user mailing list