svn commit: r224758 - user/gabor/tre-integration/contrib/tre/lib
Gabor Kovesdan
gabor at FreeBSD.org
Wed Aug 10 15:19:14 UTC 2011
Author: gabor
Date: Wed Aug 10 15:19:14 2011
New Revision: 224758
URL: http://svn.freebsd.org/changeset/base/224758
Log:
- Make fg->pattern always hold SB/MB string and fg->wpattern always hold wide
string. This is more logical and allows simplifying some parts of the code.
- Cleanup and style changes according to TRE's coding style
Modified:
user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Aug 10 13:24:32 2011 (r224757)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Aug 10 15:19:14 2011 (r224758)
@@ -43,8 +43,6 @@
static int fastcmp(const void *, const void *, size_t,
tre_str_type_t, bool);
-static void revstr(tre_char_t *, int);
-static void revs(char *str, int len);
#ifdef TRE_WCHAR
#define TRE_CHAR(n) L##n
@@ -56,16 +54,11 @@ static void revs(char *str, int len);
do { \
switch (type) \
{ \
- case STR_BYTE: \
- case STR_MBS: \
- startptr = str_byte + n; \
- break; \
case STR_WIDE: \
startptr = str_wide + n; \
break; \
default: \
- /* XXX */ \
- break; \
+ startptr = str_byte + n; \
} \
} while (0); \
@@ -85,117 +78,72 @@ static void revs(char *str, int len);
} while (0); \
#define COMPARE \
- do { \
- switch (type) \
- { \
- case STR_BYTE: \
- case STR_MBS: \
- mismatch = fastcmp(fg->pattern, startptr, fg->len, type, \
- fg->icase); \
- break; \
- case STR_WIDE: \
- mismatch = fastcmp(fg->wpattern, startptr, fg->wlen, type, \
- fg->icase); \
- default: \
- break; \
+ switch (type) \
+ { \
+ case STR_WIDE: \
+ mismatch = fastcmp(fg->wpattern, startptr, fg->wlen, type, \
+ fg->icase); \
+ break; \
+ default: \
+ mismatch = fastcmp(fg->pattern, startptr, fg->len, type, \
+ fg->icase); \
} \
- } while (0);
-#ifdef TRE_WCHAR
#define IS_OUT_OF_BOUNDS \
((type == STR_WIDE) ? ((j + fg->wlen) > len) : ((j + fg->len) > len))
-#else
-#define IS_OUT_OF_BOUNDS ((j + fg->len) > len)
-#endif
#define CHECKBOUNDS \
if (IS_OUT_OF_BOUNDS) \
break; \
-#ifdef TRE_WCHAR
#define SHIFT \
- CHECKBOUNDS; \
- { \
- int bc = 0, gs = 0, ts, r = -1; \
+ CHECKBOUNDS; \
\
- switch (type) \
- { \
- case STR_BYTE: \
- case STR_MBS: \
- if (!fg->hasdot) \
- { \
- if (u != 0 && mismatch == fg->len - 1 - shift) \
- mismatch -= u; \
- v = fg->len - 1 - mismatch; \
- gs = fg->sbmGs[mismatch]; \
- } \
- bc = fg->qsBc[((unsigned char *)startptr) \
- [mismatch + 1]]; \
- break; \
- case STR_WIDE: \
- if (!fg->hasdot) \
- { \
- if (u != 0 && mismatch == fg->wlen - 1 - shift) \
- mismatch -= u; \
- v = fg->wlen - 1 - mismatch; \
- r = hashtable_get(fg->qsBc_table, \
- &((wchar_t *)startptr)[mismatch + 1], &bc); \
- gs = fg->bmGs[mismatch]; \
- } \
- bc = (r == 0) ? bc : fg->defBc; \
- break; \
- default: \
- /* XXX */ \
- break; \
- } \
- if (fg->hasdot) \
- shift = bc; \
- else \
- { \
- ts = u - v; \
- shift = MAX(ts, bc); \
- shift = MAX(shift, gs); \
- if (shift == gs) \
- u = MIN((type == STR_WIDE ? fg->wlen : fg->len) - \
- shift, v); \
- else \
- { \
- if (ts < bc) \
- shift = MAX(shift, u + 1); \
- u = 0; \
- } \
- } \
- j += shift; \
- }
-#else
-#define SHIFT \
- CHECKBOUNDS; \
+ { \
+ int bc = 0, gs = 0, ts, r = -1; \
+ \
+ switch (type) \
+ { \
+ case STR_WIDE: \
+ if (!fg->hasdot) \
+ { \
+ if (u != 0 && mismatch == fg->wlen - 1 - shift) \
+ mismatch -= u; \
+ v = fg->wlen - 1 - mismatch; \
+ r = hashtable_get(fg->qsBc_table, \
+ &((wchar_t *)startptr)[mismatch + 1], &bc); \
+ gs = fg->bmGs[mismatch]; \
+ } \
+ bc = (r == 0) ? bc : fg->defBc; \
+ break; \
+ default: \
+ if (!fg->hasdot) \
+ { \
+ if (u != 0 && mismatch == fg->len - 1 - shift) \
+ mismatch -= u; \
+ v = fg->len - 1 - mismatch; \
+ gs = fg->sbmGs[mismatch]; \
+ } \
+ bc = fg->qsBc[((unsigned char *)startptr)[mismatch + 1]]; \
+ } \
+ if (fg->hasdot) \
+ shift = bc; \
+ else \
{ \
- int bc, gs; \
- bc = fg->qsBc[((unsigned char *)startptr)[mismatch + 1]]; \
- if (fg->hasdot) \
- shift = bc; \
+ ts = u - v; \
+ shift = MAX(ts, bc); \
+ shift = MAX(shift, gs); \
+ if (shift == gs) \
+ u = MIN((type == STR_WIDE ? fg->wlen : fg->len) - shift, v); \
else \
{ \
- gs = fg->bmGs[mismatch]; \
- if (u != 0 && mismatch == fg->wlen - 1 - shift) \
- mismatch -= u; \
- v = fg->wlen - 1 - mismatch; \
- ts = u - v; \
- shift = MAX(ts, bc); \
- shift = MAX(shift, gs); \
- if (shift == gs) \
- u = MIN(fg->wlen - shift, v); \
- else \
- { \
- if (ts < bc) \
- shift = MAX(shift, u + 1); \
- u = 0; \
- } \
+ if (ts < bc) \
+ shift = MAX(shift, u + 1); \
+ u = 0; \
} \
- j += shift; \
- }
-#endif
+ } \
+ j += shift; \
+ }
/*
* Normal Quick Search would require a shift based on the position the
@@ -214,23 +162,22 @@ static void revs(char *str, int len);
* thi. 1
*/
-#define FILL_ARRAY(pat, plen) \
+#define FILL_QSBC \
for (unsigned int i = 0; i <= UCHAR_MAX; i++) \
- fg->qsBc[i] = plen - fg->hasdot; \
- for (int i = fg->hasdot + 1; i < plen; i++) \
+ fg->qsBc[i] = fg->len - fg->hasdot; \
+ for (int i = fg->hasdot + 1; i < fg->len; i++) \
{ \
- fg->qsBc[(unsigned)pat[i]] = plen - i; \
+ fg->qsBc[(unsigned)fg->pattern[i]] = fg->len - i; \
if (fg->icase) \
{ \
- char c = islower(pat[i]) ? toupper(pat[i]) \
- : tolower(pat[i]); \
- fg->qsBc[(unsigned)c] = plen - i; \
+ char c = islower(fg->pattern[i]) ? toupper(fg->pattern[i]) \
+ : tolower(fg->pattern[i]); \
+ fg->qsBc[(unsigned)c] = fg->len - i; \
} \
}
-#ifdef TRE_WCHAR
-#define FILL_QSBC \
+#define FILL_QSBC_WIDE \
/* Adjust the shift based on location of the last dot ('.'). */ \
fg->defBc = fg->wlen - fg->hasdot; \
\
@@ -248,11 +195,6 @@ static void revs(char *str, int len);
hashtable_put(fg->qsBc_table, &wc, &k); \
} \
} \
- \
- FILL_ARRAY(fg->pattern, fg->len);
-#else
-#define FILL_QSBC FILL_ARRAY(fg->wpattern, fg->wlen);
-#endif
#define FILL_BMGS(arr, pat, plen, wide) \
{ \
@@ -324,51 +266,44 @@ static void revs(char *str, int len);
free(suff); \
}
-#define REVFUNC(name, argtype) \
-static inline void \
-name(argtype *str, int len) \
-{ \
- argtype c; \
- \
- for (int i = 0; i < len / 2; i++) \
- { \
- c = str[i]; \
- str[i] = str[len - i - 1]; \
- str[len - i - 1] = c; \
- } \
-}
-
-REVFUNC(revstr, tre_char_t)
-REVFUNC(revs, char)
-
/*
* Returns: REG_OK on success, error code otherwise
*/
int
-tre_fastcomp_literal(fastmatch_t *fg, const tre_char_t *wpat, size_t n,
+tre_fastcomp_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
int cflags)
{
/* Initialize. */
memset(fg, 0, sizeof(*fg));
fg->icase = (cflags & REG_ICASE);
- /* XXX */
+
+ /* Cannot handle REG_ICASE with MB string */
if (fg->icase && (MB_CUR_MAX > 1))
return REG_BADPAT;
- fg->wlen = (n == 0) ? tre_strlen(wpat) : n;
+#ifdef TRE_WCHAR
+ fg->wlen = (n == 0) ? tre_strlen(pat) : n;
fg->wpattern = xmalloc((fg->wlen + 1) * sizeof(tre_char_t));
if (fg->wpattern == NULL)
return REG_ESPACE;
- memcpy(fg->wpattern, wpat, fg->wlen * sizeof(tre_char_t));
+ memcpy(fg->wpattern, pat, fg->wlen * sizeof(tre_char_t));
fg->wpattern[fg->wlen] = TRE_CHAR('\0');
-#ifdef TRE_WCHAR
+
STORE_MBS_PAT;
+#else
+ fg->len = (n == 0) ? tre_strlen(pat) : n;
+ fg->pattern = xmalloc((fg->len + 1) * sizeof(tre_char_t));
+ if (fg->pattern == NULL)
+ return REG_ESPACE;
+ memcpy(fg->pattern, pat, fg->len * sizeof(tre_char_t));
+ fg->pattern[fg->len] = TRE_CHAR('\0');
#endif
FILL_QSBC;
- FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true);
-#ifdef TRE_WCHAR
FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false);
+#ifdef TRE_WCHAR
+ FILL_QSBC_WIDE;
+ FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true);
#endif
return REG_OK;
@@ -378,52 +313,53 @@ tre_fastcomp_literal(fastmatch_t *fg, co
* Returns: REG_OK on success, error code otherwise
*/
int
-tre_fastcomp(fastmatch_t *fg, const tre_char_t *wpat, size_t n,
+tre_fastcomp(fastmatch_t *fg, const tre_char_t *pat, size_t n,
int cflags)
{
/* Initialize. */
memset(fg, 0, sizeof(*fg));
fg->icase = (cflags & REG_ICASE);
- /* XXX */
+
+ /* Cannot handle REG_ICASE with MB string */
if (fg->icase && (MB_CUR_MAX > 1))
return REG_BADPAT;
- fg->wlen = (n == 0) ? tre_strlen(wpat) : n;
+ fg->wlen = (n == 0) ? tre_strlen(pat) : n;
/* Remove end-of-line character ('$'). */
- if ((fg->wlen > 0) && (wpat[fg->wlen - 1] == TRE_CHAR('$')))
+ if ((fg->wlen > 0) && (pat[fg->wlen - 1] == TRE_CHAR('$')))
{
fg->eol = true;
fg->wlen--;
}
/* Remove beginning-of-line character ('^'). */
- if (wpat[0] == TRE_CHAR('^'))
+ if (pat[0] == TRE_CHAR('^'))
{
fg->bol = true;
fg->wlen--;
- wpat++;
+ pat++;
}
if ((fg->wlen >= 14) &&
- (memcmp(wpat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) &&
- (memcmp(wpat + fg->wlen - 7, TRE_CHAR("[[:>:]]"),
+ (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) &&
+ (memcmp(pat + fg->wlen - 7, TRE_CHAR("[[:>:]]"),
7 * sizeof(tre_char_t)) == 0))
{
fg->wlen -= 14;
- wpat += 7;
+ pat += 7;
fg->word = true;
}
/*
- * wpat has been adjusted earlier to not include '^', '$' or
+ * pat has been adjusted earlier to not include '^', '$' or
* the word match character classes at the beginning and ending
* of the string respectively.
*/
fg->wpattern = xmalloc((fg->wlen + 1) * sizeof(tre_char_t));
if (fg->wpattern == NULL)
return REG_ESPACE;
- memcpy(fg->wpattern, wpat, fg->wlen * sizeof(tre_char_t));
+ memcpy(fg->wpattern, pat, fg->wlen * sizeof(tre_char_t));
fg->wpattern[fg->wlen] = TRE_CHAR('\0');
/* Look for ways to cheat...er...avoid the full regex engine. */
@@ -446,14 +382,18 @@ tre_fastcomp(fastmatch_t *fg, const tre_
#ifdef TRE_WCHAR
STORE_MBS_PAT;
+#else
+ fg->len = fg->wlen;
+ fg->patter = fg->wpattern;
#endif
FILL_QSBC;
if (!fg->hasdot)
- FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true);
+ FILL_BMGS(fg->bmGs, fg->pattern, fg->len, false);
#ifdef TRE_WCHAR
+ FILL_QSBC_WIDE;
if (!fg->hasdot)
- FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false);
+ FILL_BMGS(fg->sbmGs, fg->wpattern, fg->wlen, true);
#endif
return REG_OK;
@@ -473,36 +413,31 @@ tre_fastexec(const fastmatch_t *fg, cons
#endif
if (len == (unsigned)-1)
- {
- switch (type)
- {
- case STR_BYTE:
- case STR_MBS:
- len = strlen(str_byte);
- break;
- case STR_WIDE:
- len = wcslen(str_wide);
- break;
- default:
- /* XXX */
- break;
- }
- }
+ switch (type)
+ {
+ case STR_WIDE:
+ len = wcslen(str_wide);
+ break;
+ default:
+ len = strlen(str_byte);
+ break;
+ }
/* No point in going farther if we do not have enough data. */
- if (len < fg->len)
- return ret;
-
switch (type)
{
case STR_WIDE:
+ if (len < fg->wlen)
+ return ret;
shift = fg->wlen;
break;
default:
+ if (len < fg->len)
+ return ret;
shift = fg->len;
- break;
}
+ /* XXX: make wchar-clean */
/* Only try once at the beginning or ending of the line. */
if (fg->bol || fg->eol) {
/* Simple text comparison. */
@@ -525,7 +460,7 @@ tre_fastexec(const fastmatch_t *fg, cons
COMPARE;
if (mismatch == REG_OK) {
pmatch[0].rm_so = j;
- pmatch[0].rm_eo = j + fg->len;
+ pmatch[0].rm_eo = j + ((type == STR_WIDE) ? fg->wlen : fg->len);
return REG_OK;
} else if (mismatch > 0)
return mismatch;
@@ -542,9 +477,9 @@ tre_fastfree(fastmatch_t *fg)
#ifdef TRE_WCHAR
hashtable_free(fg->qsBc_table);
- free(fg->pattern);
-#endif
free(fg->wpattern);
+#endif
+ free(fg->pattern);
}
/*
More information about the svn-src-user
mailing list