svn commit: r225266 - user/gabor/grep/trunk/regex
Gabor Kovesdan
gabor at FreeBSD.org
Tue Aug 30 16:40:18 UTC 2011
Author: gabor
Date: Tue Aug 30 16:40:17 2011
New Revision: 225266
URL: http://svn.freebsd.org/changeset/base/225266
Log:
- Merge from TRE code
Modified:
user/gabor/grep/trunk/regex/glue.h
user/gabor/grep/trunk/regex/hashtable.c
user/gabor/grep/trunk/regex/hashtable.h
user/gabor/grep/trunk/regex/tre-fastmatch.c
Modified: user/gabor/grep/trunk/regex/glue.h
==============================================================================
--- user/gabor/grep/trunk/regex/glue.h Tue Aug 30 16:33:59 2011 (r225265)
+++ user/gabor/grep/trunk/regex/glue.h Tue Aug 30 16:40:17 2011 (r225266)
@@ -10,6 +10,8 @@
#define TRE_WCHAR 1
#define TRE_MULTIBYTE 1
+#define TRE_CHAR(n) L##n
+
#define tre_char_t wchar_t
#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
#define tre_strlen wcslen
@@ -19,6 +21,7 @@
#define REG_LITERAL 0020
#define REG_WORD 0100
#define REG_GNU 0400
+#define _REG_HEUR 1000
#define REG_OK 0
Modified: user/gabor/grep/trunk/regex/hashtable.c
==============================================================================
--- user/gabor/grep/trunk/regex/hashtable.c Tue Aug 30 16:33:59 2011 (r225265)
+++ user/gabor/grep/trunk/regex/hashtable.c Tue Aug 30 16:40:17 2011 (r225266)
@@ -1,5 +1,3 @@
-/* $FreeBSD$ */
-
/*-
* Copyright (C) 2011 Gabor Kovesdan <gabor at FreeBSD.org>
* All rights reserved.
@@ -26,134 +24,208 @@
* SUCH DAMAGE.
*/
-#include <sys/hash.h>
-#include <hashtable.h>
+#include <errno.h>
#include <stdlib.h>
#include <string.h>
-hashtable
-*hashtable_init(size_t table_size, size_t key_size, size_t value_size)
+#include "hashtable.h"
+
+
+/*
+ * Return a 32-bit hash of the given buffer. The init
+ * value should be 0, or the previous hash value to extend
+ * the previous hash.
+ */
+static uint32_t
+hash32_buf(const void *buf, size_t len, uint32_t hash)
{
- hashtable *tbl;
+ const unsigned char *p = buf;
- tbl = malloc(sizeof(hashtable));
- if (tbl == NULL)
- return (NULL);
-
- tbl->entries = calloc(sizeof(hashtable_entry *), table_size);
- if (tbl->entries == NULL) {
- free(tbl);
- return (NULL);
- }
-
- tbl->table_size = table_size;
- tbl->usage = 0;
- tbl->key_size = key_size;
- tbl->value_size = value_size;
+ while (len--)
+ hash = HASHSTEP(hash, *p++);
- return (tbl);
+ return hash;
}
+/*
+ * Initializes a hash table that can hold table_size number of entries,
+ * each of which has a key of key_size bytes and a value of value_size
+ * bytes. On successful allocation returns a pointer to the hash table.
+ * Otherwise, returns NULL and sets errno to indicate the error.
+ */
+hashtable
+*hashtable_init(size_t table_size, size_t key_size, size_t value_size)
+{
+ hashtable *tbl;
+
+ tbl = malloc(sizeof(hashtable));
+ if (tbl == NULL)
+ goto mem1;
+
+ tbl->entries = calloc(sizeof(hashtable_entry *), table_size);
+ if (tbl->entries == NULL)
+ goto mem2;
+
+ tbl->table_size = table_size;
+ tbl->usage = 0;
+ tbl->key_size = key_size;
+ tbl->value_size = value_size;
+
+ return (tbl);
+
+mem2:
+ free(tbl);
+mem1:
+ errno = ENOMEM;
+ return (NULL);
+}
+
+/*
+ * Places the key-value pair to the hashtable tbl.
+ * Returns:
+ * HASH_OK: if the key was not present in the hash table yet
+ * but the kay-value pair has been successfully added.
+ * HASH_UPDATED: if the value for the key has been updated with the
+ * new value.
+ * HASH_FULL: if the hash table is full and the entry could not
+ * be added.
+ * HASH_FAIL: if an error has occurred and errno has been set to
+ * indicate the error.
+ */
int
hashtable_put(hashtable *tbl, const void *key, const void *value)
{
- uint32_t hash = 0;
-
- if (tbl->table_size == tbl->usage)
- return (-1);
+ uint32_t hash = 0;
- hash = hash32_buf(key, tbl->key_size, hash);
- hash %= tbl->table_size;
+ if (tbl->table_size == tbl->usage)
+ return (HASH_FULL);
- while (tbl->entries[hash] != NULL)
- hash = (hash >= tbl->table_size) ? 0 : hash + 1;
-
- tbl->entries[hash] = malloc(sizeof(hashtable_entry));
- if (tbl->entries[hash] == NULL)
- return (-1);
-
- tbl->entries[hash]->key = malloc(tbl->key_size);
- if (tbl->entries[hash]->key == NULL) {
- free(tbl->entries[hash]);
- return (-1);
- }
-
- tbl->entries[hash]->value = malloc(tbl->value_size);
- if (tbl->entries[hash]->value == NULL) {
- free(tbl->entries[hash]->key);
- free(tbl->entries[hash]);
- return (-1);
- }
-
- memcpy(&tbl->entries[hash]->key, key, tbl->key_size);
- memcpy(&tbl->entries[hash]->value, value, tbl->value_size);
- tbl->usage++;
+ hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
- return (0);
+ /*
+ * On hash collision entries are inserted at the next free space,
+ * so we have to increase the index until we either find an entry
+ * with the same key (and update it) or we find a free space.
+ */
+ for(;;)
+ if (tbl->entries[hash] == NULL)
+ break;
+ else if (memcmp(tbl->entries[hash]->key, key, tbl->key_size) == 0)
+ {
+ memcpy(tbl->entries[hash]->value, value, tbl->value_size);
+ return (HASH_UPDATED);
+ }
+
+ tbl->entries[hash] = malloc(sizeof(hashtable_entry));
+ if (tbl->entries[hash] == NULL)
+ {
+ errno = ENOMEM;
+ goto mem1;
+ }
+
+ tbl->entries[hash]->key = malloc(tbl->key_size);
+ if (tbl->entries[hash]->key == NULL)
+ {
+ errno = ENOMEM;
+ goto mem2;
+ }
+
+ tbl->entries[hash]->value = malloc(tbl->value_size);
+ if (tbl->entries[hash]->value == NULL)
+ {
+ errno = ENOMEM;
+ goto mem3;
+ }
+
+ memcpy(tbl->entries[hash]->key, key, tbl->key_size);
+ memcpy(tbl->entries[hash]->value, value, tbl->value_size);
+ tbl->usage++;
+
+ return (HASH_OK);
+
+mem3:
+ free(tbl->entries[hash]->key);
+mem2:
+ free(tbl->entries[hash]);
+mem1:
+ return (HASH_FAIL);
}
static hashtable_entry
-*hashtable_lookup(const hashtable *tbl, const void *key)
+**hashtable_lookup(const hashtable *tbl, const void *key)
{
- uint32_t hash = 0;
+ uint32_t hash = 0;
- hash = hash32_buf(key, tbl->key_size, hash);
- hash %= tbl->table_size;
+ hash = hash32_buf(key, tbl->key_size, hash) % tbl->table_size;
- for (;;) {
- if (tbl->entries[hash] == NULL)
- return (NULL);
- else if (memcmp(key, &tbl->entries[hash]->key,
- tbl->key_size) == 0)
- return (tbl->entries[hash]);
+ for (;;)
+ {
+ if (tbl->entries[hash] == NULL)
+ return (NULL);
+ else if (memcmp(key, tbl->entries[hash]->key, tbl->key_size) == 0)
+ return (&tbl->entries[hash]);
- hash = (hash == tbl->table_size) ? 0 : hash + 1;
- }
+ hash = (hash == tbl->table_size) ? 0 : hash + 1;
+ }
}
+/*
+ * Retrieves the value for key from the hash table tbl and places
+ * it to the space indicated by the value argument.
+ * Returns HASH_OK if the value has been found and retrieved or
+ * HASH_NOTFOUND otherwise.
+ */
int
hashtable_get(hashtable *tbl, const void *key, void *value)
{
- hashtable_entry *entry;
+ hashtable_entry **entry;
- entry = hashtable_lookup(tbl, key);
- if (entry == NULL)
- return (-1);
+ entry = hashtable_lookup(tbl, key);
+ if (entry == NULL)
+ return (HASH_NOTFOUND);
- memcpy(value, &entry->value, tbl->value_size);
- return (0);
+ memcpy(value, (*entry)->value, tbl->value_size);
+ return (HASH_OK);
}
+/*
+ * Removes the entry with the specifified key from the hash table
+ * tbl. Returns HASH_OK if the entry has been found and removed
+ * or HASH_NOTFOUND otherwise.
+ */
int
hashtable_remove(hashtable *tbl, const void *key)
{
- hashtable_entry *entry;
+ hashtable_entry **entry;
- entry = hashtable_lookup(tbl, key);
- if (entry == NULL)
- return (-1);
+ entry = hashtable_lookup(tbl, key);
+ if (entry == NULL)
+ return (HASH_NOTFOUND);
-// free(entry->key);
-// free(entry->value);
- free(entry);
+ free((*entry)->key);
+ free((*entry)->value);
+ free(*entry);
+ *entry = NULL;
- tbl->usage--;
-
- return (0);
+ tbl->usage--;
+ return (HASH_OK);
}
+/*
+ * Frees the resources associated with the hash table tbl.
+ */
void
hashtable_free(hashtable *tbl)
{
+ if (tbl == NULL)
+ return;
- if (tbl == NULL)
- return;
+ for (unsigned int i = 0; i < tbl->table_size; i++)
+ if ((tbl->entries[i] != NULL))
+ {
+ free(tbl->entries[i]->key);
+ free(tbl->entries[i]->value);
+ }
- for (unsigned int i = 0; i < tbl->table_size; i++)
- if (tbl->entries[i] != NULL) {
-// free(tbl->entries[i]->key);
-// free(tbl->entries[i]->value);
-// free(tbl->entries[i]);
- }
- free(tbl->entries);
+ free(tbl->entries);
}
Modified: user/gabor/grep/trunk/regex/hashtable.h
==============================================================================
--- user/gabor/grep/trunk/regex/hashtable.h Tue Aug 30 16:33:59 2011 (r225265)
+++ user/gabor/grep/trunk/regex/hashtable.h Tue Aug 30 16:40:17 2011 (r225266)
@@ -5,23 +5,31 @@
#include <sys/types.h>
+#define HASH_OK 0
+#define HASH_UPDATED 1
+#define HASH_FAIL 2
+#define HASH_FULL 3
+#define HASH_NOTFOUND 4
+
+#define HASHSTEP(x,c) (((x << 5) + x) + (c))
+
typedef struct {
- void *key;
- void *value;
+ void *key;
+ void *value;
} hashtable_entry;
typedef struct {
- size_t key_size;
- size_t table_size;
- size_t usage;
- size_t value_size;
- hashtable_entry **entries;
+ size_t key_size;
+ size_t table_size;
+ size_t usage;
+ size_t value_size;
+ hashtable_entry **entries;
} hashtable;
-void hashtable_free(hashtable *);
-int hashtable_get(hashtable *, const void *, void *);
-hashtable *hashtable_init(size_t, size_t, size_t);
-int hashtable_put(hashtable *, const void *, const void *);
-int hashtable_remove(hashtable *, const void *);
+void hashtable_free(hashtable *);
+int hashtable_get(hashtable *, const void *, void *);
+hashtable *hashtable_init(size_t, size_t, size_t);
+int hashtable_put(hashtable *, const void *, const void *);
+int hashtable_remove(hashtable *, const void *);
#endif /* HASHTABLE.H */
Modified: user/gabor/grep/trunk/regex/tre-fastmatch.c
==============================================================================
--- user/gabor/grep/trunk/regex/tre-fastmatch.c Tue Aug 30 16:33:59 2011 (r225265)
+++ user/gabor/grep/trunk/regex/tre-fastmatch.c Tue Aug 30 16:40:17 2011 (r225266)
@@ -1,5 +1,3 @@
-/* $FreeBSD$ */
-
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* Copyright (C) 2008-2011 Gabor Kovesdan <gabor at FreeBSD.org>
@@ -30,7 +28,6 @@
#include "glue.h"
#include <ctype.h>
-#include <hashtable.h>
#include <limits.h>
#include <regex.h>
#include <stdbool.h>
@@ -48,14 +45,17 @@
static int fastcmp(const void *, const void *, size_t,
tre_str_type_t, bool, bool);
-/*
- * We will work with wide characters if they are supported
- */
-#ifdef TRE_WCHAR
-#define TRE_CHAR(n) L##n
-#else
-#define TRE_CHAR(n) n
-#endif
+#define FAIL_COMP(errcode) \
+ { \
+ if (fg->pattern) \
+ xfree(fg->pattern); \
+ if (fg->wpattern) \
+ xfree(fg->wpattern); \
+ if (fg->qsBc_table) \
+ hashtable_free(fg->qsBc_table); \
+ fg = NULL; \
+ return errcode; \
+ }
/*
* Skips n characters in the input string and assigns the start
@@ -143,7 +143,10 @@ static int fastcmp(const void *, const v
&((tre_char_t *)startptr)[mismatch + 1], &bc); \
gs = fg->bmGs[mismatch]; \
} \
- bc = (r == 0) ? bc : fg->defBc; \
+ bc = (r == HASH_OK) ? bc : fg->defBc; \
+ DPRINT(("tre_fast_match: mismatch on character %lc," \
+ "BC %d, GS %d\n", \
+ ((tre_char_t *)startptr)[mismatch + 1], bc, gs)); \
break; \
default: \
if (!fg->hasdot) \
@@ -154,6 +157,9 @@ static int fastcmp(const void *, const v
gs = fg->sbmGs[mismatch]; \
} \
bc = fg->qsBc[((unsigned char *)startptr)[mismatch + 1]]; \
+ DPRINT(("tre_fast_match: mismatch on character %c," \
+ "BC %d, GS %d\n", \
+ ((unsigned char *)startptr)[mismatch + 1], bc, gs)); \
} \
if (fg->hasdot) \
shift = bc; \
@@ -171,6 +177,7 @@ static int fastcmp(const void *, const v
u = 0; \
} \
} \
+ DPRINT(("tre_fast_match: shifting %d characters\n", shift)); \
j += shift; \
}
@@ -200,6 +207,8 @@ static int fastcmp(const void *, const v
for (int i = fg->hasdot + 1; i < fg->len; i++) \
{ \
fg->qsBc[(unsigned)fg->pattern[i]] = fg->len - i; \
+ DPRINT(("BC shift for char %c is %d\n", fg->pattern[i], \
+ fg->len - i)); \
if (fg->icase) \
{ \
char c = islower(fg->pattern[i]) ? toupper(fg->pattern[i]) \
@@ -224,15 +233,25 @@ static int fastcmp(const void *, const v
/* Preprocess pattern. */ \
fg->qsBc_table = hashtable_init(fg->wlen * 4, sizeof(tre_char_t), \
sizeof(int)); \
+ if (!fg->qsBc_table) \
+ FAIL_COMP(REG_ESPACE); \
for (unsigned int i = fg->hasdot + 1; i < fg->wlen; i++) \
{ \
int k = fg->wlen - i; \
- hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k); \
+ int r; \
+ \
+ r = hashtable_put(fg->qsBc_table, &fg->wpattern[i], &k); \
+ if ((r == HASH_FAIL) || (r == HASH_FULL)) \
+ FAIL_COMP(REG_ESPACE); \
+ DPRINT(("BC shift for wide char %lc is %d\n", fg->wpattern[i], \
+ fg->wlen - i)); \
if (fg->icase) \
{ \
tre_char_t wc = iswlower(fg->wpattern[i]) ? \
towupper(fg->wpattern[i]) : towlower(fg->wpattern[i]); \
- hashtable_put(fg->qsBc_table, &wc, &k); \
+ r = hashtable_put(fg->qsBc_table, &wc, &k); \
+ if ((r == HASH_FAIL) || (r == HASH_FULL)) \
+ FAIL_COMP(REG_ESPACE); \
} \
}
@@ -245,7 +264,10 @@ static int fastcmp(const void *, const v
fg->sbmGs = xmalloc(fg->len * sizeof(int)); \
if (!fg->sbmGs) \
return REG_ESPACE; \
- _FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false); \
+ if (fg->len == 1) \
+ fg->sbmGs[0] = 1; \
+ else \
+ _FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false); \
}
/*
@@ -257,7 +279,10 @@ static int fastcmp(const void *, const v
fg->bmGs = xmalloc(fg->wlen * sizeof(int)); \
if (!fg->bmGs) \
return REG_ESPACE; \
- _FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true); \
+ if (fg->wlen == 1) \
+ fg->bmGs[0] = 1; \
+ else \
+ _FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true); \
}
#define _FILL_BMGS(arr, pat, plen, wide) \
@@ -385,6 +410,10 @@ tre_compile_literal(fastmatch_t *fg, con
SAVE_PATTERN(fg->pattern, fg->len);
#endif
+ DPRINT(("tre_compile_literal: pattern: %s, icase: %c, word: %c, "
+ "newline %c\n", fg->pattern, fg->icase ? 'y' : 'n',
+ fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n'));
+
FILL_QSBC;
FILL_BMGS;
#ifdef TRE_WCHAR
@@ -438,10 +467,11 @@ tre_compile_fast(fastmatch_t *fg, const
for (unsigned int i = 0; i < n; i++)
{
/* Can still cheat? */
- if ((tre_isalnum(pat[i])) || tre_isspace(pat[i]) ||
+ if (!(cflags & _REG_HEUR) &&
+ ((tre_isalnum(pat[i])) || tre_isspace(pat[i]) ||
(pat[i] == TRE_CHAR('_')) || (pat[i] == TRE_CHAR(',')) ||
(pat[i] == TRE_CHAR('=')) || (pat[i] == TRE_CHAR('-')) ||
- (pat[i] == TRE_CHAR(':')) || (pat[i] == TRE_CHAR('/')))
+ (pat[i] == TRE_CHAR(':')) || (pat[i] == TRE_CHAR('/'))))
continue;
else if (pat[i] == TRE_CHAR('.'))
fg->hasdot = i;
@@ -461,6 +491,12 @@ tre_compile_fast(fastmatch_t *fg, const
SAVE_PATTERN(fg->pattern, fg->len);
#endif
+ DPRINT(("tre_compile_fast: pattern: %s, bol %c, eol %c, "
+ "icase: %c, word: %c, newline %c\n", fg->pattern,
+ fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n',
+ fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n',
+ fg->newline ? 'y' : 'n'));
+
FILL_QSBC;
FILL_BMGS;
#ifdef TRE_WCHAR
@@ -644,6 +680,9 @@ void
tre_free_fast(fastmatch_t *fg)
{
+ DPRINT(("tre_fast_free: freeing structures for pattern %s\n",
+ fg->pattern));
+
#ifdef TRE_WCHAR
hashtable_free(fg->qsBc_table);
if (!fg->hasdot)
@@ -697,6 +736,7 @@ fastcmp(const void *pat, const void *dat
: (pat_byte[i] == str_byte[i]))
continue;
}
+ DPRINT(("fastcmp: mismatch at position %d\n", i));
ret = -(i + 1);
break;
}
More information about the svn-src-user
mailing list