svn commit: r231755 - user/gabor/tre-integration/contrib/tre/lib
Gabor Kovesdan
gabor at FreeBSD.org
Wed Feb 15 11:11:59 UTC 2012
Author: gabor
Date: Wed Feb 15 11:11:58 2012
New Revision: 231755
URL: http://svn.freebsd.org/changeset/base/231755
Log:
- Separate different compile logics for better later reuse and readability
- Rename some functions that belong to a lower layer for clarity
Modified:
user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
user/gabor/tre-integration/contrib/tre/lib/tre-compile.h
user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h
user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h
Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Feb 15 10:33:29 2012 (r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Feb 15 11:11:58 2012 (r231755)
@@ -51,12 +51,12 @@ tre_fixncomp(fastmatch_t *preg, const ch
if (ret != REG_OK)
return ret;
else
- ret = tre_compile_literal(preg, wregex, wlen, cflags);
+ ret = tre_proc_literal(preg, wregex, wlen, cflags);
tre_free_pattern(wregex);
return ret;
}
else
- return tre_compile_literal(preg, NULL, 0, cflags);
+ return tre_proc_literal(preg, NULL, 0, cflags);
}
int
@@ -73,13 +73,13 @@ tre_fastncomp(fastmatch_t *preg, const c
return ret;
else
ret = (cflags & REG_LITERAL)
- ? tre_compile_literal(preg, wregex, wlen, cflags)
- : tre_compile_fast(preg, wregex, wlen, cflags);
+ ? tre_proc_literal(preg, wregex, wlen, cflags)
+ : tre_proc_fast(preg, wregex, wlen, cflags);
tre_free_pattern(wregex);
return ret;
}
else
- return tre_compile_literal(preg, NULL, 0, cflags);
+ return tre_proc_literal(preg, NULL, 0, cflags);
}
@@ -98,15 +98,15 @@ tre_fastcomp(fastmatch_t *preg, const ch
int
tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
{
- return tre_compile_literal(preg, regex, n, cflags);
+ return tre_proc_literal(preg, regex, n, cflags);
}
int
tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags)
{
return (cflags & REG_LITERAL) ?
- tre_compile_literal(preg, regex, n, cflags) :
- tre_compile_fast(preg, regex, n, cflags);
+ tre_proc_literal(preg, regex, n, cflags) :
+ tre_proc_fast(preg, regex, n, cflags);
}
int
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Wed Feb 15 10:33:29 2012 (r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Wed Feb 15 11:11:58 2012 (r231755)
@@ -1947,49 +1947,107 @@ tre_free_pattern(tre_char_t *wregex)
int
tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
{
+ int ret;
+
+ /*
+ * First, we always compile the NFA and it also serves as
+ * pattern validation. In this way, validation is not
+ * scattered through the code.
+ */
+ ret = tre_compile_nfa(preg, regex, n, cflags);
+ if (ret != REG_OK)
+ return ret;
+
+ /*
+ * Check if we can cheat with a fixed string algorithm
+ * if the pattern is long enough.
+ */
+ ret = tre_compile_bm(preg, regex, n, cflags);
+
+ /* Only try to compile heuristic if the fast matcher failed. */
+ if (ret != REG_OK)
+ ret = tre_compile_heur(preg, regex, n, cflags);
+ else
+ preg->heur = NULL;
+
+ /* When here, at least NFA surely succeeded. */
+ return REG_OK;
+}
+
+int
+tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+{
+ fastmatch_t *shortcut;
+ int ret;
+
+ if (n < 2)
+ goto too_short;
+ shortcut = xmalloc(sizeof(fastmatch_t));
+ if (!shortcut)
+ return REG_ESPACE;
+ ret = (cflags & REG_LITERAL)
+ ? tre_proc_literal(shortcut, regex, n, cflags)
+ : tre_proc_fast(shortcut, regex, n, cflags);
+ if (ret == REG_OK)
+ {
+ preg->shortcut = shortcut;
+ DPRINT("tre_compile_bm: pattern compiled for fast matcher\n");
+ }
+ else
+ {
+too_short:
+ xfree(shortcut);
+ preg->shortcut = NULL;
+ DPRINT("tre_compile_bm: pattern compilation failed for fast matcher\n");
+ }
+ return ret;
+}
+
+int
+tre_compile_heur(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+{
+ heur_t *heur;
+ int ret;
+
+ heur = xmalloc(sizeof(heur_t));
+ if (!heur)
+ return REG_ESPACE;
+
+ ret = tre_proc_heur(heur, regex, n, cflags);
+ if (ret != REG_OK)
+ {
+ xfree(heur);
+ preg->heur = NULL;
+ DPRINT("tre_compile_heur: heuristic compilation failed, NFA will be used "
+ "entirely\n");
+ }
+ else
+ {
+ preg->heur = heur;
+ DPRINT("tre_compile_heur: heuristic compiled to speed up the search\n");
+ }
+
+ return ret;
+}
+
+int
+tre_compile_nfa(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
+{
tre_stack_t *stack;
tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
tre_pos_and_tags_t *p;
int *counts = NULL, *offs = NULL;
- int i, add = 0, ret;
+ int i, add = 0;
tre_tnfa_transition_t *transitions, *initial;
tre_tnfa_t *tnfa = NULL;
tre_submatch_data_t *submatch_data;
tre_tag_direction_t *tag_directions = NULL;
reg_errcode_t errcode;
tre_mem_t mem;
- fastmatch_t *shortcut;
- heur_t *heur;
/* Parse context. */
tre_parse_ctx_t parse_ctx;
- /*
- * Check if we can cheat with a fixed string algorithm
- * if the pattern is long enough.
- */
- if (n >= 2)
- {
- shortcut = xmalloc(sizeof(fastmatch_t));
- if (!shortcut)
- return REG_ESPACE;
- ret = (cflags & REG_LITERAL)
- ? tre_compile_literal(shortcut, regex, n, cflags)
- : tre_compile_fast(shortcut, regex, n, cflags);
- if (ret == REG_OK)
- {
- preg->shortcut = shortcut;
- preg->re_nsub = 0;
- DPRINT("tre_compile: pattern compiled for fast matcher\n");
- }
- else
- {
- xfree(shortcut);
- preg->shortcut = NULL;
- DPRINT("tre_compile: pattern compilation failed for fast matcher\n");
- }
- }
-
/* Allocate a stack used throughout the compilation process for various
purposes. */
stack = tre_stack_new(512, 10240, 128);
@@ -2008,7 +2066,7 @@ tre_compile(regex_t *preg, const tre_cha
parse_ctx.len = n;
parse_ctx.cflags = cflags;
parse_ctx.max_backref = -1;
- DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
+ DPRINT(("tre_compile_nfa: parsing '%.*" STRF "'\n", (int)n, regex));
errcode = tre_parse(&parse_ctx);
if (errcode != REG_OK)
ERROR_EXIT(errcode);
@@ -2040,7 +2098,7 @@ tre_compile(regex_t *preg, const tre_cha
regexp does not have back references, this can be skipped. */
if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
{
- DPRINT(("tre_compile: setting up tags\n"));
+ DPRINT(("tre_compile_nfa: setting up tags\n"));
/* Figure out how many tags we will need. */
errcode = tre_add_tags(NULL, stack, tree, tnfa);
@@ -2277,42 +2335,10 @@ tre_compile(regex_t *preg, const tre_cha
preg->TRE_REGEX_T_FIELD = (void *)tnfa;
- /*
- * If we reach here, the regex is parsed and legal. Now we try to construct
- * a heuristic to speed up matching if we do not already have a shortcut
- * pattern.
- */
- if (!preg->shortcut)
- {
- heur = xmalloc(sizeof(heur_t));
- if (!heur)
- ERROR_EXIT(REG_ESPACE);
-
- ret = tre_compile_heur(heur, regex, n, cflags);
- if (ret != REG_OK)
- {
- xfree(heur);
- preg->heur = NULL;
- DPRINT("tre_compile: heuristic compilation failed, NFA will be used "
- "entirely\n");
- }
- else
- {
- preg->heur = heur;
- DPRINT("tre_compile: heuristic compiled to speed up the search\n");
- }
- }
- else
- preg->heur = NULL;
-
return REG_OK;
error_exit:
/* Free everything that was allocated and return the error code. */
- if (shortcut != NULL)
- xfree(shortcut);
- if (heur != NULL)
- xfree(heur);
if (mem != NULL)
tre_mem_destroy(mem);
if (stack != NULL)
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.h Wed Feb 15 10:33:29 2012 (r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.h Wed Feb 15 11:11:58 2012 (r231755)
@@ -10,6 +10,8 @@
#ifndef TRE_COMPILE_H
#define TRE_COMPILE_H 1
+#include <regex.h>
+
typedef struct {
int position;
int code_min;
@@ -22,6 +24,13 @@ typedef struct {
int *params;
} tre_pos_and_tags_t;
+int tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n,
+ int cflags);
+int tre_compile_heur(regex_t *preg, const tre_char_t *regex, size_t n,
+ int cflags);
+int tre_compile_nfa(regex_t *preg, const tre_char_t *regex, size_t n,
+ int cflags);
+
#endif /* TRE_COMPILE_H */
/* EOF */
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c Wed Feb 15 10:33:29 2012 (r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c Wed Feb 15 11:11:58 2012 (r231755)
@@ -402,8 +402,8 @@ static int fastcmp(const fastmatch_t *fg
* Returns: REG_OK on success, error code otherwise
*/
int
-tre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
- int cflags)
+tre_proc_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
+ int cflags)
{
INIT_COMP;
@@ -421,7 +421,7 @@ tre_compile_literal(fastmatch_t *fg, con
SAVE_PATTERN(pat, n, fg->pattern, fg->len);
#endif
- DPRINT(("tre_compile_literal: pattern: %s, len %zu, icase: %c, word: %c, "
+ DPRINT(("tre_proc_literal: pattern: %s, len %zu, icase: %c, word: %c, "
"newline %c\n", fg->pattern, fg->len, fg->icase ? 'y' : 'n',
fg->word ? 'y' : 'n', fg->newline ? 'y' : 'n'));
@@ -439,8 +439,8 @@ tre_compile_literal(fastmatch_t *fg, con
* Returns: REG_OK on success, error code otherwise
*/
int
-tre_compile_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n,
- int cflags)
+tre_proc_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n,
+ int cflags)
{
tre_char_t *tmp;
size_t pos = 0;
@@ -563,7 +563,7 @@ tre_compile_fast(fastmatch_t *fg, const
continue;
badpat:
xfree(tmp);
- DPRINT(("tre_compile_fast: compilation of pattern failed, falling"
+ DPRINT(("tre_proc_fast: compilation of pattern failed, falling"
"back to NFA\n"));
return REG_BADPAT;
}
@@ -582,7 +582,7 @@ badpat:
xfree(tmp);
- DPRINT(("tre_compile_fast: pattern: %s, len %zu, bol %c, eol %c, "
+ DPRINT(("tre_proc_fast: pattern: %s, len %zu, bol %c, eol %c, "
"icase: %c, word: %c, newline %c\n", fg->pattern, fg->len,
fg->bol ? 'y' : 'n', fg->eol ? 'y' : 'n',
fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n',
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h Wed Feb 15 10:33:29 2012 (r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h Wed Feb 15 11:11:58 2012 (r231755)
@@ -9,9 +9,9 @@
#include "hashtable.h"
#include "tre-internal.h"
-int tre_compile_literal(fastmatch_t *preg, const tre_char_t *regex,
+int tre_proc_literal(fastmatch_t *preg, const tre_char_t *regex,
size_t, int);
-int tre_compile_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);
+int tre_proc_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int);
int tre_match_fast(const fastmatch_t *fg, const void *data, size_t len,
tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags);
void tre_free_fast(fastmatch_t *preg);
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Wed Feb 15 10:33:29 2012 (r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Wed Feb 15 11:11:58 2012 (r231755)
@@ -124,7 +124,7 @@
* heuristic cannot be constructed.
*/
int
-tre_compile_heur(heur_t *h, const tre_char_t *regex, size_t len, int cflags)
+tre_proc_heur(heur_t *h, const tre_char_t *regex, size_t len, int cflags)
{
tre_char_t **arr, *heur;
tre_char_t **farr;
@@ -513,7 +513,7 @@ ok:
errcode = REG_ESPACE;
goto err;
}
- ret = tre_compile_literal(h->heurs[i], farr[i], fsiz[i], 0);
+ ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i], 0);
if (ret != REG_OK)
{
errcode = REG_BADPAT;
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Wed Feb 15 10:33:29 2012 (r231754)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Wed Feb 15 11:11:58 2012 (r231755)
@@ -23,8 +23,8 @@ typedef struct {
int type;
} heur_t;
-extern int tre_compile_heur(heur_t *h, const tre_char_t *regex,
- size_t len, int cflags);
+extern int tre_proc_heur(heur_t *h, const tre_char_t *regex,
+ size_t len, int cflags);
extern void tre_free_heur(heur_t *h);
#endif /* TRE_HEURISTIC_H */
More information about the svn-src-user
mailing list