svn commit: r223693 - in user/gabor/tre-integration:
contrib/tre/lib include
Gabor Kovesdan
gabor at FreeBSD.org
Thu Jun 30 14:10:49 UTC 2011
Author: gabor
Date: Thu Jun 30 14:10:49 2011
New Revision: 223693
URL: http://svn.freebsd.org/changeset/base/223693
Log:
- Plug in the fixed string matching code
Modified:
user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
user/gabor/tre-integration/contrib/tre/lib/fastmatch.h
user/gabor/tre-integration/contrib/tre/lib/regexec.c
user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
user/gabor/tre-integration/contrib/tre/lib/tre.h
user/gabor/tre-integration/include/regex.h
Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Thu Jun 30 10:56:02 2011 (r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Thu Jun 30 14:10:49 2011 (r223693)
@@ -235,7 +235,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_
int
tre_fastexec(const fastmatch_t *fg, const tre_char_t *data, size_t len,
- int nmatch, regmatch_t *pmatch)
+ int nmatch, regmatch_t pmatch[])
{
unsigned int j;
int cnt = 0;
@@ -253,8 +253,10 @@ tre_fastexec(const fastmatch_t *fg, cons
j = fg->eol ? len - fg->len : 0;
if (fastcmp(fg->pattern, data + j,
fg->len) == -1) {
- pmatch->rm_so = j;
- pmatch->rm_eo = j + fg->len;
+ if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
+ return 0;
+ pmatch[cnt].rm_so = j;
+ pmatch[cnt].rm_eo = j + fg->len;
ret = 0;
}
}
@@ -264,7 +266,7 @@ tre_fastexec(const fastmatch_t *fg, cons
do {
if (fastcmp(fg->pattern, data + j - fg->len,
fg->len) == -1) {
- if (!(fg->cflags & REG_NOSUB))
+ if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
return (0);
pmatch[cnt++].rm_so = j - fg->len;
pmatch[cnt++].rm_eo = j;
@@ -296,7 +298,7 @@ tre_fastexec(const fastmatch_t *fg, cons
j = 0;
do {
if (fastcmp(fg->pattern, data + j, fg->len) == -1) {
- if (!(fg->cflags & REG_NOSUB))
+ if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
return (0);
pmatch[cnt++].rm_so = j;
pmatch[cnt++].rm_eo = j + fg->len;
Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.h Thu Jun 30 10:56:02 2011 (r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.h Thu Jun 30 14:10:49 2011 (r223693)
@@ -28,6 +28,8 @@
#ifndef FASTMATCH_H
#define FASTMATCH_H 1
+#include <stdbool.h>
+
#include "hashtable.h"
#include "tre.h"
@@ -53,7 +55,7 @@ int tre_fastcomp_literal(fastmatch_t *pr
int tre_fastcomp(fastmatch_t *preg, const tre_char_t *regex, size_t,
int cflags);
int tre_fastexec(const fastmatch_t *fg, const tre_char_t *data,
- size_t len, int nmatch, regmatch_t *pmatch);
+ size_t len, int nmatch, regmatch_t pmatch[]);
void tre_fastfree(fastmatch_t *preg);
#endif /* FASTMATCH_H */
Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regexec.c Thu Jun 30 10:56:02 2011 (r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/regexec.c Thu Jun 30 14:10:49 2011 (r223693)
@@ -44,6 +44,7 @@ char *alloca ();
#endif /* HAVE_MALLOC_H */
#include <limits.h>
+#include "fastmatch.h"
#include "tre-internal.h"
#include "tre.h"
#include "xmalloc.h"
@@ -150,10 +151,16 @@ tre_have_approx(const regex_t *preg)
static int
tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
- int eflags)
+ int eflags, void *shortcut)
{
reg_errcode_t status;
int *tags = NULL, eo;
+
+ /* Check if we can cheat with a fixed string */
+ if (shortcut != NULL)
+ return tre_fastexec((fastmatch_t *)shortcut, (const tre_char_t *)string,
+ len, nmatch, pmatch);
+
if (tnfa->num_tags > 0 && nmatch > 0)
{
#ifdef TRE_USE_ALLOCA
@@ -222,7 +229,8 @@ tre_regnexec(const regex_t *preg, const
size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
size_t offset = pmatch[0].rm_so;
str = &str[offset];
- int ret = tre_match(tnfa, str, slen, type, nmatch, pmatch, eflags);
+ int ret = tre_match(tnfa, str, slen, type, nmatch, pmatch, eflags,
+ preg->shortcut);
if (!(eflags & REG_NOSUB))
{
for (unsigned i = 0; i < nmatch; i++)
@@ -235,7 +243,8 @@ tre_regnexec(const regex_t *preg, const
}
else
{
- return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags);
+ return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags,
+ preg->shortcut);
}
}
@@ -260,7 +269,8 @@ tre_regwnexec(const regex_t *preg, const
size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
size_t offset = pmatch[0].rm_so;
str = &str[offset];
- int ret = tre_match(tnfa, str, slen, STR_WIDE, nmatch, pmatch, eflags);
+ int ret = tre_match(tnfa, str, slen, STR_WIDE, nmatch, pmatch, eflags,
+ preg->shortcut);
if (!(eflags & REG_NOSUB))
{
for (unsigned i = 0; i < nmatch; i++)
@@ -273,7 +283,8 @@ tre_regwnexec(const regex_t *preg, const
}
else
{
- return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags);
+ return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags,
+ preg->shortcut);
}
}
@@ -291,7 +302,8 @@ tre_reguexec(const regex_t *preg, const
size_t nmatch, regmatch_t pmatch[], int eflags)
{
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
- return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
+ return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags,
+ preg->shortcut);
}
@@ -315,7 +327,7 @@ tre_match_approx(const tre_tnfa_t *tnfa,
if (params.max_cost == 0 && !tnfa->have_approx
&& !(eflags & REG_APPROX_MATCHER))
return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch,
- eflags);
+ eflags, NULL);
/* Back references are not supported by the approximate matcher. */
if (tnfa->have_backrefs)
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Thu Jun 30 10:56:02 2011 (r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Thu Jun 30 14:10:49 2011 (r223693)
@@ -20,6 +20,7 @@
#include <assert.h>
#include <string.h>
+#include "fastmatch.h"
#include "tre-internal.h"
#include "tre-mem.h"
#include "tre-stack.h"
@@ -1858,17 +1859,30 @@ tre_compile(regex_t *preg, const tre_cha
tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
tre_pos_and_tags_t *p;
int *counts = NULL, *offs = NULL;
- int i, add = 0;
+ int i, add = 0, ret;
tre_tnfa_transition_t *transitions, *initial;
tre_tnfa_t *tnfa = NULL;
tre_submatch_data_t *submatch_data;
tre_tag_direction_t *tag_directions = NULL;
reg_errcode_t errcode;
tre_mem_t mem;
+ fastmatch_t shortcut;
/* Parse context. */
tre_parse_ctx_t parse_ctx;
+ /* Check if we can cheat with a fixed string algorithm. */
+ ret = (cflags & REG_LITERAL)
+ ? tre_fastcomp_literal(&shortcut, regex, n, cflags)
+ : tre_fastcomp(&shortcut, regex, n, cflags);
+ if (!ret)
+ {
+ preg->shortcut = &shortcut;
+ return REG_OK;
+ }
+ else
+ preg->shortcut = NULL;
+
/* Allocate a stack used throughout the compilation process for various
purposes. */
stack = tre_stack_new(512, 10240, 128);
Modified: user/gabor/tre-integration/contrib/tre/lib/tre.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre.h Thu Jun 30 10:56:02 2011 (r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/tre.h Thu Jun 30 14:10:49 2011 (r223693)
@@ -48,6 +48,7 @@ typedef int regoff_t;
typedef struct {
size_t re_nsub; /* Number of parenthesized subexpressions. */
void *value; /* For internal use only. */
+ void *shortcut; /* For internal use only. */
const char *re_endp;
} regex_t;
Modified: user/gabor/tre-integration/include/regex.h
==============================================================================
--- user/gabor/tre-integration/include/regex.h Thu Jun 30 10:56:02 2011 (r223692)
+++ user/gabor/tre-integration/include/regex.h Thu Jun 30 14:10:49 2011 (r223693)
@@ -40,6 +40,7 @@ typedef int regoff_t;
typedef struct {
size_t re_nsub; /* Number of parenthesized subexpressions. */
void *value; /* For internal use only. */
+ void *shortcut; /* For internal use only. */
const char *re_endp;
} regex_t;
More information about the svn-src-user
mailing list