svn commit: r226590 - user/gabor/tre-integration/contrib/tre/lib
Gabor Kovesdan
gabor at FreeBSD.org
Thu Oct 20 22:38:24 UTC 2011
Author: gabor
Date: Thu Oct 20 22:38:24 2011
New Revision: 226590
URL: http://svn.freebsd.org/changeset/base/226590
Log:
- Fix return value in heuristic compiler
- Implement longest fragment heuristic for REG_NEWLINE behavior
Modified:
user/gabor/tre-integration/contrib/tre/lib/regexec.c
user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h
Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regexec.c Thu Oct 20 21:49:26 2011 (r226589)
+++ user/gabor/tre-integration/contrib/tre/lib/regexec.c Thu Oct 20 22:38:24 2011 (r226590)
@@ -165,10 +165,10 @@ tre_match(const tre_tnfa_t *tnfa, const
pmatch, eflags);
}
-#define FIX_OFFSETS \
+#define FIX_OFFSETS(adj) \
if (ret == REG_NOMATCH) \
{ \
- st += n; \
+ adj; \
continue; \
} \
else if ((ret == REG_OK) && !(tnfa->cflags & REG_NOSUB)) \
@@ -191,57 +191,94 @@ tre_match(const tre_tnfa_t *tnfa, const
const char *data_byte = string;
const tre_char_t *data_wide = string;
- DPRINT(("tre_match: using a heuristic [%s/%s] to speed up the "
- "search\n", heur->start->pattern, heur->end->pattern));
-
- while (st < len)
+ if (heur->type == HEUR_LONGEST)
{
- SEEK_TO(st);
-
- /* Prefix heuristic */
- ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch,
- pmatch, eflags);
- if (ret != REG_OK)
- return ret;
- st += pmatch[0].rm_so;
- n = pmatch[0].rm_eo;
-
- /* Intermediate heuristics */
- while (!((heur->heurs[i] == NULL) ||
- (heur->prefix && heur->heurs[i + 1] == NULL)))
+ while (st < len)
{
- SEEK_TO(st + n);
- ret = tre_match_fast(heur->heurs[i], string, len - st - n, type,
- nmatch, pmatch, eflags);
- if (ret != REG_OK)
- return ret;
- n += pmatch[0].rm_eo;
- i++;
- }
+ size_t eo, so;
- /* Suffix heuristic available */
- if (heur->prefix && heur->heurs[i] != NULL)
- {
- SEEK_TO(st + n);
- ret = tre_match_fast(heur->heurs[i], string, len - st - n, type,
- nmatch, pmatch, eflags);
+ SEEK_TO(st);
+ ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch,
+ pmatch, eflags);
if (ret != REG_OK)
return ret;
- n += pmatch[0].rm_eo;
- SEEK_TO(st);
- ret = tre_match(tnfa, string, n, type, nmatch, pmatch,
- eflags, NULL, NULL);
- FIX_OFFSETS;
- }
- /* Suffix heuristic not available */
- else
+ for (so = st + pmatch[0].rm_so - 1; ; so--)
+ {
+ if ((type == STR_WIDE) ? (data_wide[so] == TRE_CHAR('\n')) :
+ (data_byte[so] == '\n'))
+ break;
+ if (so == 0)
+ break;
+ }
+
+ for (eo = st + pmatch[0].rm_eo; st + eo < len; eo++)
+ {
+ if ((type == STR_WIDE) ? (data_wide[eo] == TRE_CHAR('\n')) :
+ (data_byte[eo] == '\n'))
+ break;
+ }
+
+ SEEK_TO(so);
+ ret = tre_match(tnfa, string, eo - so, type, nmatch, pmatch, eflags, NULL, NULL);
+ FIX_OFFSETS(st = eo);
+
+ }
+ return REG_NOMATCH;
+ }
+ else
+ {
+ while (st < len)
{
SEEK_TO(st);
- ret = tre_match(tnfa, string, len - st, type, nmatch, pmatch,
- eflags, NULL, NULL);
- FIX_OFFSETS;
- }
+
+ /* Prefix heuristic */
+ ret = tre_match_fast(heur->heurs[0], string, len - st,
+ type, nmatch, pmatch, eflags);
+ if (ret != REG_OK)
+ return ret;
+ st += pmatch[0].rm_so;
+ n = pmatch[0].rm_eo;
+
+ /* Intermediate heuristics */
+ while (!((heur->heurs[i] == NULL) ||
+ ((heur->type == HEUR_PREFIX_ARRAY) &&
+ heur->heurs[i + 1] == NULL)))
+ {
+ SEEK_TO(st + n);
+ ret = tre_match_fast(heur->heurs[i], string, len - st - n,
+ type, nmatch, pmatch, eflags);
+ if (ret != REG_OK)
+ return ret;
+ n += pmatch[0].rm_eo;
+ i++;
+ }
+
+ /* Suffix heuristic available */
+ if ((heur->type == HEUR_ARRAY) && heur->heurs[i] != NULL)
+ {
+ SEEK_TO(st + n);
+ ret = tre_match_fast(heur->heurs[i], string, len - st - n,
+ type, nmatch, pmatch, eflags);
+ if (ret != REG_OK)
+ return ret;
+ n += pmatch[0].rm_eo;
+
+ SEEK_TO(st);
+ ret = tre_match(tnfa, string, n, type, nmatch, pmatch,
+ eflags, NULL, NULL);
+ FIX_OFFSETS(st += n);
+ }
+ /* Suffix heuristic not available */
+ else
+ {
+ SEEK_TO(st);
+ ret = tre_match(tnfa, string, len - st, type, nmatch,
+ pmatch, eflags, NULL, NULL);
+ FIX_OFFSETS(st += n);
+ }
+ }
+ return REG_NOMATCH;
}
}
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Thu Oct 20 21:49:26 2011 (r226589)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Thu Oct 20 22:38:24 2011 (r226590)
@@ -133,6 +133,8 @@ tre_compile_heur(heur_t *h, const tre_ch
if (!heur)
return REG_ESPACE;
+ h->type = HEUR_ARRAY;
+
while (true)
{
@@ -316,7 +318,7 @@ end_segment:
errcode = REG_BADPAT;
goto err;
}
- h->prefix = true;
+ h->type = HEUR_PREFIX_ARRAY;
goto ok;
}
@@ -340,7 +342,6 @@ end_segment:
}
ok:
-
{
size_t m = 1;
int ret;
@@ -348,12 +349,6 @@ ok:
for (int i = 1; i < j; i++)
m = (length[i] > length[m]) ? i : m;
- if (!h->heurs)
- {
- errcode = REG_ESPACE;
- goto err;
- }
-
for (int i = 0; i < MIN(3, j + 1); i++)
{
h->heurs[i] = xmalloc(sizeof(fastmatch_t));
@@ -371,30 +366,41 @@ ok:
goto err2; \
}
- ret = tre_compile_fast(h->heurs[0], arr[0], length[0], 0);
- CHECK_ERR
- if (j == 1)
+ if (cflags & REG_NEWLINE)
{
- xfree(h->heurs[1]);
- h->heurs[1] = NULL;
- goto finish;
+ ret = tre_compile_fast(h->heurs[0], arr[m], length[m], 0);
+ CHECK_ERR
+ h->type = HEUR_LONGEST;
}
else
- ret = tre_compile_fast(h->heurs[1], arr[m], length[m], 0);
- CHECK_ERR
- if (h->prefix || (m == j - 1))
{
- xfree(h->heurs[2]);
- h->heurs[2] = NULL;
- goto finish;
+ ret = tre_compile_fast(h->heurs[0], arr[0], length[0], 0);
+ CHECK_ERR
+ if (j == 1)
+ {
+ free(h->heurs[1]);
+ h->heurs[1] = NULL;
+ errcode = REG_OK;
+ goto finish;
+ }
+ else
+ ret = tre_compile_fast(h->heurs[1], arr[m], length[m], 0);
+ CHECK_ERR
+ if ((h->type == HEUR_PREFIX_ARRAY) || (m == j - 1))
+ {
+ xfree(h->heurs[2]);
+ h->heurs[2] = NULL;
+ errcode = REG_OK;
+ goto finish;
+ }
+ else
+ ret = tre_compile_fast(h->heurs[2], arr[j - 1], length[j - 1], 0);
+ CHECK_ERR
+ h->heurs[3] = NULL;
}
- else
- ret = tre_compile_fast(h->heurs[2], arr[j - 1], length[j - 1], 0);
- CHECK_ERR
- h->heurs[3] = NULL;
- errcode = REG_OK;
- goto finish;
+ errcode = REG_OK;
+ goto finish;
}
err2:
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Thu Oct 20 21:49:26 2011 (r226589)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.h Thu Oct 20 22:38:24 2011 (r226590)
@@ -7,10 +7,13 @@
#include "tre-fastmatch.h"
#include "tre-internal.h"
+#define HEUR_ARRAY 0
+#define HEUR_PREFIX_ARRAY 1
+#define HEUR_LONGEST 2
+
typedef struct {
fastmatch_t *heurs[4];
- bool prefix;
- bool newline;
+ int type;
} heur_t;
extern int tre_compile_heur(heur_t *h, const tre_char_t *regex,
More information about the svn-src-user
mailing list