svn commit: r225273 - user/gabor/grep/trunk/regex

Gabor Kovesdan gabor at FreeBSD.org
Tue Aug 30 23:16:55 UTC 2011


Author: gabor
Date: Tue Aug 30 23:16:54 2011
New Revision: 225273
URL: http://svn.freebsd.org/changeset/base/225273

Log:
  - Fix a bug in the hashtable code that caused an infinite loop
  - While here, add some debug output that helped debugging

Modified:
  user/gabor/grep/trunk/regex/fastmatch.c
  user/gabor/grep/trunk/regex/glue.h
  user/gabor/grep/trunk/regex/hashtable.c
  user/gabor/grep/trunk/regex/tre-fastmatch.c

Modified: user/gabor/grep/trunk/regex/fastmatch.c
==============================================================================
--- user/gabor/grep/trunk/regex/fastmatch.c	Tue Aug 30 20:54:55 2011	(r225272)
+++ user/gabor/grep/trunk/regex/fastmatch.c	Tue Aug 30 23:16:54 2011	(r225273)
@@ -28,6 +28,7 @@
 
 #include "glue.h"
 
+#include <errno.h>
 #include <fastmatch.h>
 #include <regex.h>
 #include <string.h>

Modified: user/gabor/grep/trunk/regex/glue.h
==============================================================================
--- user/gabor/grep/trunk/regex/glue.h	Tue Aug 30 20:54:55 2011	(r225272)
+++ user/gabor/grep/trunk/regex/glue.h	Tue Aug 30 23:16:54 2011	(r225273)
@@ -6,6 +6,7 @@
 #include <limits.h>
 #undef RE_DUP_MAX
 #include <regex.h>
+#include <stdio.h>
 
 #define TRE_WCHAR			1
 #define TRE_MULTIBYTE			1
@@ -27,7 +28,12 @@
 
 #define TRE_MB_CUR_MAX			MB_CUR_MAX
 
-#define DPRINT(msg)			
+#ifndef _GREP_DEBUG
+#define DPRINT(msg)
+#else			
+#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
+#endif
+
 #define MIN(a,b)			((a > b) ? (b) : (a))
 #define MAX(a,b)			((a > b) ? (a) : (b))
 

Modified: user/gabor/grep/trunk/regex/hashtable.c
==============================================================================
--- user/gabor/grep/trunk/regex/hashtable.c	Tue Aug 30 20:54:55 2011	(r225272)
+++ user/gabor/grep/trunk/regex/hashtable.c	Tue Aug 30 23:16:54 2011	(r225273)
@@ -24,6 +24,8 @@
  * SUCH DAMAGE.
  */
 
+#include "glue.h"
+
 #include <errno.h>
 #include <stdlib.h>
 #include <string.h>
@@ -58,6 +60,10 @@ hashtable
 {
   hashtable *tbl;
 
+  DPRINT(("hashtable_init: table_size %lu, key_size %lu, value_size %lu\n",
+	(unsigned long)table_size, (unsigned long)key_size,
+	(unsigned long)value_size));
+
   tbl = malloc(sizeof(hashtable));
   if (tbl == NULL)
     goto mem1;
@@ -76,6 +82,7 @@ hashtable
 mem2:
   free(tbl);
 mem1:
+  DPRINT(("hashtable_init: allocation failed\n"));
   errno = ENOMEM;
   return (NULL);
 }
@@ -98,9 +105,13 @@ hashtable_put(hashtable *tbl, const void
   uint32_t hash = 0;
 
   if (tbl->table_size == tbl->usage)
-    return (HASH_FULL);
+    {
+      DPRINT(("hashtable_put: hashtable is full\n"));
+      return (HASH_FULL);
+    }
 
   hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
+  DPRINT(("hashtable_put: calculated hash %lu\n", hash));
 
   /*
    * On hash collision entries are inserted at the next free space,
@@ -108,13 +119,21 @@ hashtable_put(hashtable *tbl, const void
    * with the same key (and update it) or we find a free space.
    */
   for(;;)
-    if (tbl->entries[hash] == NULL)
-      break;
-    else if (memcmp(tbl->entries[hash]->key, key, tbl->key_size) == 0)
-      {
-	memcpy(tbl->entries[hash]->value, value, tbl->value_size);
-	return (HASH_UPDATED);
-      }
+    {
+      if (tbl->entries[hash] == NULL)
+	break;
+      else if (memcmp(tbl->entries[hash]->key, key, tbl->key_size) == 0)
+	{
+	  memcpy(tbl->entries[hash]->value, value, tbl->value_size);
+	  DPRINT(("hashtable_put: effective location is %lu, "
+		  "entry updated\n", hash));
+	  return (HASH_UPDATED);
+	}
+      if (++hash == tbl->table_size)
+	hash = 0;
+    }
+
+  DPRINT(("hashtable_put: effective location is %lu\n", hash));
 
   tbl->entries[hash] = malloc(sizeof(hashtable_entry));
   if (tbl->entries[hash] == NULL)
@@ -141,6 +160,8 @@ hashtable_put(hashtable *tbl, const void
   memcpy(tbl->entries[hash]->value, value, tbl->value_size);
   tbl->usage++;
 
+  DPRINT(("hashtable_put: entry successfully inserted\n"));
+
   return (HASH_OK);
 
 mem3:
@@ -148,6 +169,7 @@ mem3:
 mem2:
   free(tbl->entries[hash]);
 mem1:
+  DPRINT(("hashtable_put: insertion failed\n"));
   return (HASH_FAIL);
 }
 
@@ -163,9 +185,13 @@ static hashtable_entry
       if (tbl->entries[hash] == NULL)
 	return (NULL);
       else if (memcmp(key, tbl->entries[hash]->key, tbl->key_size) == 0)
-	return (&tbl->entries[hash]);
+	{
+	  DPRINT(("hashtable_lookup: entry found at location %lu\n", hash));
+	  return (&tbl->entries[hash]);
+	}
 
-      hash = (hash == tbl->table_size) ? 0 : hash + 1;
+      if (++hash == tbl->table_size)
+	hash = 0;
     }
 }
 
@@ -182,9 +208,13 @@ hashtable_get(hashtable *tbl, const void
 
   entry = hashtable_lookup(tbl, key);
   if (entry == NULL)
-    return (HASH_NOTFOUND);
+    {
+      DPRINT(("hashtable_get: entry is not available in the hashtable\n"));
+      return (HASH_NOTFOUND);
+    }
 
   memcpy(value, (*entry)->value, tbl->value_size);
+  DPRINT(("hashtable_get: entry successfully copied into output buffer\n"));
   return (HASH_OK);
 }
 
@@ -200,7 +230,10 @@ hashtable_remove(hashtable *tbl, const v
 
   entry = hashtable_lookup(tbl, key);
   if (entry == NULL)
-    return (HASH_NOTFOUND);
+    {
+      DPRINT(("hashtable_remove: entry is not available in the hashtable\n"));
+      return (HASH_NOTFOUND);
+    }
 
   free((*entry)->key);
   free((*entry)->value);
@@ -208,6 +241,7 @@ hashtable_remove(hashtable *tbl, const v
   *entry = NULL;
 
   tbl->usage--;
+  DPRINT(("hashtable_remove: entry successfully removed\n"));
   return (HASH_OK);
 }
 
@@ -228,4 +262,5 @@ hashtable_free(hashtable *tbl)
       }
 
   free(tbl->entries);
+  DPRINT(("hashtable_free: resources are successfully freed\n"));
 }

Modified: user/gabor/grep/trunk/regex/tre-fastmatch.c
==============================================================================
--- user/gabor/grep/trunk/regex/tre-fastmatch.c	Tue Aug 30 20:54:55 2011	(r225272)
+++ user/gabor/grep/trunk/regex/tre-fastmatch.c	Tue Aug 30 23:16:54 2011	(r225273)
@@ -144,7 +144,7 @@ static int	fastcmp(const void *, const v
 	      gs = fg->bmGs[mismatch];					\
 	    }								\
 	    bc = (r == HASH_OK) ? bc : fg->defBc;			\
-	    DPRINT(("tre_fast_match: mismatch on character %lc,"	\
+	    DPRINT(("tre_fast_match: mismatch on character %lc, "	\
 		    "BC %d, GS %d\n",					\
 		    ((tre_char_t *)startptr)[mismatch + 1], bc, gs));	\
             break;							\
@@ -157,7 +157,7 @@ static int	fastcmp(const void *, const v
 	      gs = fg->sbmGs[mismatch];					\
 	    }								\
 	  bc = fg->qsBc[((unsigned char *)startptr)[mismatch + 1]];	\
-	  DPRINT(("tre_fast_match: mismatch on character %c,"		\
+	  DPRINT(("tre_fast_match: mismatch on character %c, "		\
 		 "BC %d, GS %d\n",					\
 		 ((unsigned char *)startptr)[mismatch + 1], bc, gs));	\
       }									\
@@ -214,6 +214,7 @@ static int	fastcmp(const void *, const v
           char c = islower(fg->pattern[i]) ? toupper(fg->pattern[i])	\
             : tolower(fg->pattern[i]);					\
           fg->qsBc[(unsigned)c] = fg->len - i;				\
+	  DPRINT(("BC shift for char %c is %d\n", c, fg->len - i));	\
         }								\
     }
 
@@ -231,7 +232,7 @@ static int	fastcmp(const void *, const v
   fg->defBc = fg->wlen - fg->hasdot;					\
 									\
   /* Preprocess pattern. */						\
-  fg->qsBc_table = hashtable_init(fg->wlen * 4, sizeof(tre_char_t),	\
+  fg->qsBc_table = hashtable_init(fg->wlen * 8, sizeof(tre_char_t),	\
     sizeof(int));							\
   if (!fg->qsBc_table)							\
     FAIL_COMP(REG_ESPACE);						\
@@ -252,6 +253,8 @@ static int	fastcmp(const void *, const v
 	  r = hashtable_put(fg->qsBc_table, &wc, &k);			\
 	  if ((r == HASH_FAIL) || (r == HASH_FULL))			\
 	    FAIL_COMP(REG_ESPACE);					\
+	  DPRINT(("BC shift for wide char %lc is %d\n", wc,		\
+		 fg->wlen - i));					\
 	}								\
     }
 


More information about the svn-src-user mailing list