svn commit: r231094 - in user/gabor/tre-integration: contrib/tre/lib include

Gabor Kovesdan gabor at FreeBSD.org
Mon Feb 6 18:13:35 UTC 2012


Author: gabor
Date: Mon Feb  6 18:13:34 2012
New Revision: 231094
URL: http://svn.freebsd.org/changeset/base/231094

Log:
  Implement some missing parts of the Wu-Manber algorithm. It is almost complete
  now.

Modified:
  user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h
  user/gabor/tre-integration/include/regex.h

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c	Mon Feb  6 18:11:01 2012	(r231093)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c	Mon Feb  6 18:13:34 2012	(r231094)
@@ -27,9 +27,18 @@
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif /* HAVE_CONFIG_H */
+#include <mregex.h>
+#include <regex.h>
 #include "tre-mfastmatch.h"
 #include "xmalloc.h"
 
+/* TODO:
+ *
+ * - REG_ICASE
+ * - Store pattern and sizes in pat/wpat/siz/wsiz
+ * - Test
+ */
+
 #define WM_B 2
 
 #define ALLOC(var, siz)							\
@@ -40,6 +49,13 @@
       goto fail;							\
     }
 
+#define FAIL								\
+  do									\
+    {									\
+      err = REG_BADPAT;							\
+      goto fail;							\
+    } while (0)
+
 #define _PROC_WM(pat_arr, siz_arr, char_size, sh_field, m_field)	\
   /* Determine shortest pattern length */				\
   wm->m_field = size_arr[0];						\
@@ -71,13 +87,13 @@
             entry->pref_list[0] = i;					\
             ret = hashtable_put(wm->sh_field, pat_arr[i], entry);	\
             if (ret != HASH_OK)						\
-              ; // XXX: error handling					\
+              FAIL;							\
             break;							\
           case HASH_OK:							\
             entry->shift = entry->shift < sh ? entry->shift : sh;	\
             entry->pref_list[entry->pref++] = i;			\
             if (ret != HASH_UPDATED)					\
-              ; // XXX: error handling					\
+              FAIL;							\
         }								\
       /* Intermediate fragments, only shift calculated */		\
       for (int j = 1; j < size_arr[i] - WB_M; j++)			\
@@ -93,12 +109,12 @@
                 ret = hashtable_put(wm->sh_field, &pat_arr[i][j],	\
 				    entry);				\
                 if (ret != HASH_OK)					\
-                  ;     // XXX: error handling				\
+                  FAIL;							\
                 break;							\
               case HASH_OK:						\
                 entry->shift = entry->shift < sh ? entry->shift : sh;	\
                 if (ret != HASH_UPDATED)				\
-                  ; // XXX: error handling				\
+                  FAIL;							\
         }								\
       ret = hashtable_get(wm->sh_field, &pat_arr[i][n[i] - WB_M],	\
 			  entry);					\
@@ -112,12 +128,12 @@
             ret = hashtable_put(wm->sh_field, &pat_arr[i][n[i] - WB_M],	\
 				entry);					\
             if (ret != HASH_OK)						\
-              ; // XXX: error handling					\
+              FAIL;							\
           case HASH_OK:							\
             entry->shift = entry->shift < sh ? entry->shift : sh;	\
             entry->suff_list[entry->suff++] = i;			\
             if (ret != HASH_UPDATED)					\
-              ; // XXX: error handling					\
+              FAIL;							\
         }								\
     }									\
   xfree(entry);
@@ -204,16 +220,75 @@ fail:
   return err;
 }
 
+#define MATCH(beg, end, p)						\
+  do									\
+    {									\
+      match->rm_so = beg;						\
+      match->rm_eo = end;						\
+      match->p = p;							\
+      err = REG_OK;							\
+      goto finish;							\
+    } while (0);
+
+#define _WMSEARCH(data, pats, sizes, mlen, tbl, dshift)			\
+  do									\
+    {									\
+      ret = hashtable_get(tbl, data[pos - WM_B], s_entry);		\
+      shift = (ret == HASH_OK) ? s_entry->shift : dshift;		\
+      if (shift == 0)							\
+	{								\
+	  ret = hashtable_get(tbl, data[pos - mlen, p_entry);		\
+	  if (ret == HASH_NOTFOUND)					\
+	    {								\
+	      pos += 1;							\
+	      continue;							\
+	    }								\
+	  else								\
+	    {								\
+	      for (int i = 0; i < p_entry->pref; i++)			\
+		for (int j = 0; (j < s_entry->suff) &&			\
+		    (s_entry->suff_list[j] <= p_entry->pref_list[i]);	\
+		     j++)						\
+		  if (s_entry->suff_list[j] == p_entry->pref_list[i])	\
+                    {							\
+		      size_t idx = s_entry->suff_list[j];		\
+		      int k;						\
+									\
+		      if (len - pos > sizes[idx] - mlen)		\
+			{						\
+			  for (k = WM_B; k < sizes[idx]; k++)		\
+			    if (pats[idx][k] != data[pos - mlen + k])	\
+			      break;					\
+			  if (k == sizes[idx])				\
+			    // XXX: match				\
+			}						\
+		    }							\
+		  else							\
+		     continue;						\
+	      pos += 1;							\
+            }								\
+	}								\
+      else								\
+	pos += shift;							\
+    } while (0);
+
+#define WMSEARCH							\
+  _WMSEARCH(byte_str, wm->pat, wm->siz, wm->m, wm->hash,		\
+	    wm->defsh)
+#define WMSEARCH_WIDE							\
+   _WMSEARCH(wide_str, wm->wpats, wm->wsiz, wm->wm, wm->whash,		\
+	     wm->wdefsh)
+
 int
 tre_wmexec(const void *str, size_t len, tre_str_type_t type,
 	   size_t nmatch, regmatch_t pmatch[], int eflags,
-	   const mregex_t *preg)
+	   const mregex_t *preg, regmatch_t *match)
 {
   wmsearch_t *wm = preg->wm;
   wmentry_t *s_entry, *p_entry;
   tre_char_t *wide_str = str;
   char *byte_str = str;
-  size_t pos = preg->n;
+  size_t pos = preg->m;
   size_T shift;
   int ret;
   int err = REG_NOMATCH;
@@ -224,47 +299,43 @@ tre_wmexec(const void *str, size_t len, 
   while (pos < len)
     {
       if (type == STR_WIDE)
-	{
-	  ret = hashtable_get(wm->wshift, wide_str[pos - WM_B], s_entry);
-	  shift = (ret == HASH_OK) ? s_entry->shift : wm->wdefshift;
-	  if (shift == 0)
-	    {
-	      ret = hashtable_get(wm->wshift, wide_str[pos - wm->wm, p_entry);
-	      if (ret == HASH_NOTFOUND)
-		continue;
-	      else
-		{
-		  for (int i = 0; i < p_entry->pref; i++)
-		    for (int j = 0; (j < s_entry->suff) &&
-			(s_entry->suff_list[j] <= p_entry->pref_list[i]); j++)
-		      if (s_entry->suff_list[j] == p_entry->pref_list[i])
-			{
-			  // XXX: compare
-			}
-		      else
-			continue;
-		}
-	    }
-	  else
-	    pos += shift;
-	}
+	WMSEARCH;
       else
-	{
-	  ret = hashtable_get(wm->shift, byte_str[pos - WM_B], s_entry);
-	  shift = (ret == HASH_OK) ? s_entry->shift : wm->defshift;
-	  if (shift == 0)
-	    {
-	      // XXX: comapre
-	    }
-	  else
-	    pos += shift;
-	}
+	WMSEARCH_WIDE;
     }
 
 fail:
+finish:
   if (s_entry)
     xfree(s_entry);
   if (p_entry)
     xfree(p_entry);
   return err;
 }
+
+void
+wmfree(mregex_t *preg)
+{
+  wmsearch_t wm = preg->wm;
+
+  if (wm->hash)
+    hashtable_free(wm->hash);
+  for (int i = 0; i < wm->n; i++)
+    if (wm->pat[i])
+      xfree(wm->pat[i]);
+  if (wm->pat)
+    xfree(wm->pat);
+  if (wm->siz)
+    xfree(wm->siz);
+#ifdef TRE_WCHAR
+  if (wm->whash)
+    hashtable_free(wm->whash);
+  for (int i = 0; i < wm->wn; i++)
+    if (wm->wpat[i])
+      xfree(wm->wpat[i]);
+  if (wm->wpat)
+    xfree(wm->wpat);
+  if (wm->wsiz)
+    xfree(wm->wsiz);
+#endif
+}

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h	Mon Feb  6 18:11:01 2012	(r231093)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h	Mon Feb  6 18:13:34 2012	(r231094)
@@ -10,10 +10,16 @@
 #define WM_MAXPAT 64
 
 typedef struct {
+	char **pat;		/* Patterns */
+	size_t *siz;		/* Pattern sizes */
+	size_t n;		/* No of patterns */
         size_t m;		/* Shortest pattern length */
 	size_t defsh;		/* Default shift */
 	void *hash;		/* Wu-Manber shift table */
 #ifdef TRE_WCHAR
+	tre_char_t **wpat;	/* Patterns (wide) */
+	size_t wsiz;		/* Pattern sizes (wide) */
+	size_t wn;		/* No of patterns (wide) */
 	size_t wm;		/* Shortest pattern length (wide) */
 	size_t wdefsh;		/* Default shift (wide) */
 	void *whash;		/* Wu-Manber shift table (wide) */

Modified: user/gabor/tre-integration/include/regex.h
==============================================================================
--- user/gabor/tre-integration/include/regex.h	Mon Feb  6 18:11:01 2012	(r231093)
+++ user/gabor/tre-integration/include/regex.h	Mon Feb  6 18:13:34 2012	(r231094)
@@ -79,6 +79,7 @@ typedef struct {
 } regex_t;
 
 typedef struct {
+  size_t p;
   regoff_t rm_so;
   regoff_t rm_eo;
 } regmatch_t;


More information about the svn-src-user mailing list