svn commit: r223726 - user/gabor/tre-integration/contrib/tre/lib
Gabor Kovesdan
gabor at FreeBSD.org
Sat Jul 2 20:14:40 UTC 2011
Author: gabor
Date: Sat Jul 2 20:14:40 2011
New Revision: 223726
URL: http://svn.freebsd.org/changeset/base/223726
Log:
- Fix some bugs
- Refactor to support single-byte, multi-byte and wide character strings;
at the moment still not complete
- Be more consistent to TRE coding style
Modified:
user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
user/gabor/tre-integration/contrib/tre/lib/fastmatch.h
user/gabor/tre-integration/contrib/tre/lib/regcomp.c
user/gabor/tre-integration/contrib/tre/lib/regexec.c
user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Sat Jul 2 18:43:35 2011 (r223725)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Sat Jul 2 20:14:40 2011 (r223726)
@@ -38,9 +38,10 @@
#include "fastmatch.h"
#include "hashtable.h"
#include "tre.h"
+#include "tre-internal.h"
#include "xmalloc.h"
-static int fastcmp(const tre_char_t *, const tre_char_t *, size_t);
+static int fastcmp(const tre_char_t *, const void *, size_t, tre_str_type_t);
static void revstr(tre_char_t *, int);
#ifdef TRE_WCHAR
@@ -49,6 +50,30 @@ static void revstr(tre_char_t *, int);
#define TRE_CHAR(n) n
#endif
+#define SKIP_CHARS(n) \
+ do { \
+ switch (type) \
+ { \
+ case STR_BYTE: \
+ startptr = str_byte + n; \
+ break; \
+ case STR_MBS: \
+ for (skip = j = 0; j < n; j++) \
+ { \
+ siz = mbrlen(str_byte, MB_CUR_MAX, NULL); \
+ skip += siz; \
+ } \
+ startptr = str_byte + skip; \
+ break; \
+ case STR_WIDE: \
+ startptr = str_wide + n; \
+ break; \
+ default: \
+ /* XXX */ \
+ break; \
+ } \
+ } while (0); \
+
/*
* Returns: -1 on failure, 0 on success
*/
@@ -57,14 +82,16 @@ tre_fastcomp_literal(fastmatch_t *fg, co
{
/* Initialize. */
- fg->len = n;
+ fg->len = (n == 0) ? tre_strlen(pat) : n;
fg->bol = false;
fg->eol = false;
fg->reversed = false;
fg->cflags = cflags;
- fg->pattern = xmalloc((n + 1) * sizeof(tre_char_t));
- memcpy(&fg->pattern, pat, n * sizeof(tre_char_t));
- fg->pattern[n] = TRE_CHAR('\0');
+ fg->pattern = xmalloc((fg->len + 1) * sizeof(tre_char_t));
+ if (fg->pattern == NULL)
+ return -1;
+ memcpy(fg->pattern, pat, fg->len * sizeof(tre_char_t));
+ fg->pattern[fg->len] = TRE_CHAR('\0');
/* Preprocess pattern. */
#ifdef TRE_WCHAR
@@ -84,7 +111,7 @@ tre_fastcomp_literal(fastmatch_t *fg, co
fg->qsBc[fg->pattern[i]] = fg->len - i;
#endif
- return 0;
+ return REG_OK;
}
/*
@@ -99,7 +126,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_
int lastHalfDot = 0;
/* Initialize. */
- fg->len = n;
+ fg->len = (n == 0) ? tre_strlen(pat) : n;
fg->bol = false;
fg->eol = false;
fg->reversed = false;
@@ -107,7 +134,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_
fg->cflags = cflags;
/* Remove end-of-line character ('$'). */
- if (fg->len > 0 && pat[fg->len - 1] == TRE_CHAR('$'))
+ if ((fg->len > 0) && (pat[fg->len - 1] == TRE_CHAR('$')))
{
fg->eol = true;
fg->len--;
@@ -121,9 +148,9 @@ tre_fastcomp(fastmatch_t *fg, const tre_
pat++;
}
- if (fg->len >= 14 &&
- memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0 &&
- memcmp(pat + fg->len - 7, TRE_CHAR("[[:>:]]"), 7 * sizeof(tre_char_t)) == 0)
+ if ((fg->len >= 14) &&
+ (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) &&
+ (memcmp(pat + fg->len - 7, TRE_CHAR("[[:>:]]"), 7 * sizeof(tre_char_t)) == 0))
{
fg->len -= 14;
pat += 7;
@@ -167,7 +194,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_
/* Free memory and let others know this is empty. */
free(fg->pattern);
fg->pattern = NULL;
- return (-1);
+ return -1;
}
}
@@ -230,20 +257,43 @@ tre_fastcomp(fastmatch_t *fg, const tre_
if (fg->reversed)
revstr(fg->pattern, fg->len);
- return (0);
+ return REG_OK;
}
int
-tre_fastexec(const fastmatch_t *fg, const tre_char_t *data, size_t len,
- int nmatch, regmatch_t pmatch[])
+tre_fastexec(const fastmatch_t *fg, const void *data, size_t len,
+ tre_str_type_t type, int nmatch, regmatch_t pmatch[])
{
unsigned int j;
+ size_t siz, skip;
int cnt = 0;
int ret = REG_NOMATCH;
+ const char *str_byte = data;
+ const void *startptr;
+#ifdef TRE_WCHAR
+ const wchar_t *str_wide = data;
+#endif
+
+ if (len == (unsigned)-1)
+ {
+ switch (type)
+ {
+ case STR_BYTE:
+ case STR_MBS:
+ len = strlen(str_byte);
+ break;
+ case STR_WIDE:
+ len = wcslen(str_wide);
+ break;
+ default:
+ /* XXX */
+ break;
+ }
+ }
/* No point in going farther if we do not have enough data. */
if (len < fg->len)
- return (ret);
+ return ret;
/* Only try once at the beginning or ending of the line. */
if (fg->bol || fg->eol) {
@@ -251,28 +301,29 @@ tre_fastexec(const fastmatch_t *fg, cons
if (!((fg->bol && fg->eol) && (len != fg->len))) {
/* Determine where in data to start search at. */
j = fg->eol ? len - fg->len : 0;
- if (fastcmp(fg->pattern, data + j,
- fg->len) == -1) {
+ SKIP_CHARS(j);
+ if (fastcmp(fg->pattern, startptr, fg->len, type) == -1) {
if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
- return 0;
+ return REG_OK;
pmatch[cnt].rm_so = j;
pmatch[cnt].rm_eo = j + fg->len;
- ret = 0;
+ return REG_OK;
}
}
} else if (fg->reversed) {
/* Quick Search algorithm. */
j = len;
do {
- if (fastcmp(fg->pattern, data + j - fg->len,
- fg->len) == -1) {
+ SKIP_CHARS(j - fg->len);
+ if (fastcmp(fg->pattern, startptr, fg->len, type) == -1) {
if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
- return (0);
+ return REG_OK;
pmatch[cnt++].rm_so = j - fg->len;
pmatch[cnt++].rm_eo = j;
nmatch--;
+ ret = REG_OK;
if (nmatch < 1)
- return (0);
+ return ret;
else {
j -= 2 * fg->len;
continue;
@@ -297,14 +348,16 @@ tre_fastexec(const fastmatch_t *fg, cons
/* Quick Search algorithm. */
j = 0;
do {
- if (fastcmp(fg->pattern, data + j, fg->len) == -1) {
+ SKIP_CHARS(j);
+ if (fastcmp(fg->pattern, startptr, fg->len, type) == -1) {
if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
- return (0);
+ return REG_OK;
pmatch[cnt++].rm_so = j;
pmatch[cnt++].rm_eo = j + fg->len;
nmatch--;
+ ret = REG_OK;
if (nmatch < 1)
- return (0);
+ return ret;
else {
j += fg->len;
continue;
@@ -327,7 +380,7 @@ tre_fastexec(const fastmatch_t *fg, cons
#endif
} while (j <= (len - fg->len));
}
- return (ret);
+ return ret;
}
void
@@ -345,15 +398,45 @@ tre_fastfree(fastmatch_t *fg)
* -1 on success
*/
static inline int
-fastcmp(const tre_char_t *pat, const tre_char_t *data, size_t len)
+fastcmp(const tre_char_t *pat, const void *data, size_t len,
+ tre_str_type_t type)
{
+ const char *str_byte = data;
+#ifdef TRE_WCHAR
+ const wchar_t *str_wide = data;
+ wint_t wc;
+ size_t s;
+#endif
for (unsigned int i = 0; i < len; i++) {
- if ((pat[i] == data[i]) || (pat[i] == TRE_CHAR('.')))
+ if (pat[i] == TRE_CHAR('.'))
continue;
- return (i);
+ switch (type)
+ {
+ case STR_BYTE:
+ if (pat[i] == btowc(str_byte[i]))
+ continue;
+ break;
+ case STR_MBS:
+ s = mbrtowc(&wc, str_byte, MB_CUR_MAX, NULL);
+ if (s == (size_t)-1)
+ return i;
+ else
+ str_byte += s;
+ if (pat[i] == wc)
+ continue;
+ break;
+ case STR_WIDE:
+ if (pat[i] == str_wide[i])
+ continue;
+ break;
+ default:
+ /* XXX */
+ break;
+ }
+ return i;
}
- return (-1);
+ return -1;
}
static inline void
Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.h Sat Jul 2 18:43:35 2011 (r223725)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.h Sat Jul 2 20:14:40 2011 (r223726)
@@ -32,6 +32,7 @@
#include "hashtable.h"
#include "tre.h"
+#include "tre-internal.h"
typedef struct {
size_t len;
@@ -54,8 +55,8 @@ int tre_fastcomp_literal(fastmatch_t *pr
size_t, int cflags);
int tre_fastcomp(fastmatch_t *preg, const tre_char_t *regex, size_t,
int cflags);
-int tre_fastexec(const fastmatch_t *fg, const tre_char_t *data,
- size_t len, int nmatch, regmatch_t pmatch[]);
+int tre_fastexec(const fastmatch_t *fg, const void *data, size_t len,
+ tre_str_type_t type, int nmatch, regmatch_t pmatch[]);
void tre_fastfree(fastmatch_t *preg);
#endif /* FASTMATCH_H */
Modified: user/gabor/tre-integration/contrib/tre/lib/regcomp.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regcomp.c Sat Jul 2 18:43:35 2011 (r223725)
+++ user/gabor/tre-integration/contrib/tre/lib/regcomp.c Sat Jul 2 20:14:40 2011 (r223726)
@@ -110,14 +110,13 @@ tre_regncomp(regex_t *preg, const char *
int
tre_regcomp(regex_t *preg, const char *regex, int cflags)
{
- size_t len;
+ size_t len;
- if (cflags & REG_PEND)
- {
- if (preg->re_endp >= regex)
- len = preg->re_endp - regex;
- else
- len = 0;
+ if (cflags & REG_PEND)
+ {
+ len = (preg->re_endp >= regex)
+ ? preg->re_endp - regex
+ : 0;
return tre_regncomp(preg, regex, len, cflags);
}
else
Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regexec.c Sat Jul 2 18:43:35 2011 (r223725)
+++ user/gabor/tre-integration/contrib/tre/lib/regexec.c Sat Jul 2 20:14:40 2011 (r223726)
@@ -151,15 +151,14 @@ tre_have_approx(const regex_t *preg)
static int
tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
- int eflags, void *shortcut)
+ int eflags, fastmatch_t *shortcut)
{
reg_errcode_t status;
int *tags = NULL, eo;
/* Check if we can cheat with a fixed string */
if (shortcut != NULL)
- return tre_fastexec((fastmatch_t *)shortcut, (const tre_char_t *)string,
- len, nmatch, pmatch);
+ return tre_fastexec(shortcut, string, len, nmatch, pmatch);
if (tnfa->num_tags > 0 && nmatch > 0)
{
Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Sat Jul 2 18:43:35 2011 (r223725)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Sat Jul 2 20:14:40 2011 (r223726)
@@ -1866,23 +1866,29 @@ tre_compile(regex_t *preg, const tre_cha
tre_tag_direction_t *tag_directions = NULL;
reg_errcode_t errcode;
tre_mem_t mem;
- fastmatch_t shortcut;
+ fastmatch_t *shortcut;
/* Parse context. */
tre_parse_ctx_t parse_ctx;
/* Check if we can cheat with a fixed string algorithm. */
+ shortcut = xmalloc(sizeof(fastmatch_t));
+ if (!shortcut)
+ return REG_ESPACE;
ret = (cflags & REG_LITERAL)
- ? tre_fastcomp_literal(&shortcut, regex, n, cflags)
- : tre_fastcomp(&shortcut, regex, n, cflags);
+ ? tre_fastcomp_literal(shortcut, regex, n, cflags)
+ : tre_fastcomp(shortcut, regex, n, cflags);
if (!ret)
{
- preg->shortcut = &shortcut;
+ preg->shortcut = shortcut;
preg->re_nsub = 0;
return REG_OK;
}
else
- preg->shortcut = NULL;
+ {
+ free(shortcut);
+ preg->shortcut = NULL;
+ }
/* Allocate a stack used throughout the compilation process for various
purposes. */
More information about the svn-src-user
mailing list