svn commit: r224763 - user/gabor/tre-integration/contrib/tre/lib
Gabor Kovesdan
gabor at FreeBSD.org
Wed Aug 10 19:16:04 UTC 2011
Author: gabor
Date: Wed Aug 10 19:16:04 2011
New Revision: 224763
URL: http://svn.freebsd.org/changeset/base/224763
Log:
- Add comments
- Make FILL_BMGS and such consistent with FILL_QSBC
- Avoid duplicated code by introducing a new macro
- Simplify a switch to be consistent with earlier changes
Modified:
user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Aug 10 19:12:21 2011 (r224762)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Aug 10 19:16:04 2011 (r224763)
@@ -44,12 +44,22 @@
static int fastcmp(const void *, const void *, size_t,
tre_str_type_t, bool);
+/*
+ * We will work with wide characters if they are supported
+ */
#ifdef TRE_WCHAR
#define TRE_CHAR(n) L##n
#else
#define TRE_CHAR(n) n
#endif
+/*
+ * Skips n characters in the input string and assigns the start
+ * address to startptr. Note: as per IEEE Std 1003.1-2008
+ * matching is based on bit pattern not character representations
+ * so we can handle MB strings as byte sequences just like
+ * SB strings.
+ */
#define SKIP_CHARS(n) \
do { \
switch (type) \
@@ -62,6 +72,11 @@ static int fastcmp(const void *, const v
} \
} while (0); \
+/*
+ * Converts the wide string pattern to SB/MB string and stores
+ * it in fg->pattern. Sets fg->len to the byte length of the
+ * converted string.
+ */
#define STORE_MBS_PAT \
do { \
size_t siz; \
@@ -77,6 +92,10 @@ static int fastcmp(const void *, const v
fg->pattern[siz] = '\0'; \
} while (0); \
+/*
+ * Compares the pattern to the input string at the position
+ * stored in startptr.
+ */
#define COMPARE \
switch (type) \
{ \
@@ -92,10 +111,18 @@ static int fastcmp(const void *, const v
#define IS_OUT_OF_BOUNDS \
((type == STR_WIDE) ? ((j + fg->wlen) > len) : ((j + fg->len) > len))
+/*
+ * Checks whether the new position after shifting in the input string
+ * is out of the bounds and break out from the loop if so.
+ */
#define CHECKBOUNDS \
if (IS_OUT_OF_BOUNDS) \
break; \
+/*
+ * Shifts in the input string after a mismatch. The position of the
+ * mismatch is stored in the mismatch variable.
+ */
#define SHIFT \
CHECKBOUNDS; \
\
@@ -162,6 +189,9 @@ static int fastcmp(const void *, const v
* thi. 1
*/
+/*
+ * Fills in the bad character shift array for SB/MB strings.
+ */
#define FILL_QSBC \
for (unsigned int i = 0; i <= UCHAR_MAX; i++) \
fg->qsBc[i] = fg->len - fg->hasdot; \
@@ -176,7 +206,15 @@ static int fastcmp(const void *, const v
} \
}
-
+/*
+ * Fills in the bad character shifts into a hastable for wide strings.
+ * With wide characters it is not possible any more to use a normal
+ * array because there are too many characters and we could not
+ * provide enough memory. Fortunately, we only have to store distinct
+ * values for so many characters as the number of distinct characters
+ * in the pattern, so we can store them in a hashtable and store a
+ * default shift value for the rest.
+ */
#define FILL_QSBC_WIDE \
/* Adjust the shift based on location of the last dot ('.'). */ \
fg->defBc = fg->wlen - fg->hasdot; \
@@ -196,7 +234,21 @@ static int fastcmp(const void *, const v
} \
} \
-#define FILL_BMGS(arr, pat, plen, wide) \
+/*
+ * Fills in the good suffix table for SB/MB strings.
+ */
+#define FILL_BMGS \
+ if (!fg->hasdot) \
+ _FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false);
+
+/*
+ * Fills in the good suffix table for wide strings.
+ */
+#define FILL_BMGS_WIDE \
+ if (!fg->hasdot) \
+ _FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true);
+
+#define _FILL_BMGS(arr, pat, plen, wide) \
{ \
char *p; \
wchar_t *wp; \
@@ -208,10 +260,10 @@ static int fastcmp(const void *, const v
wp = alloca(plen * sizeof(wint_t)); \
for (int i = 0; i < plen; i++) \
wp[i] = towlower(pat[i]); \
- _FILL_BMGS(arr, wp, plen); \
+ _CALC_BMGS(arr, wp, plen); \
} \
else \
- _FILL_BMGS(arr, pat, plen); \
+ _CALC_BMGS(arr, pat, plen); \
} \
else \
{ \
@@ -220,14 +272,14 @@ static int fastcmp(const void *, const v
p = alloca(plen); \
for (int i = 0; i < plen; i++) \
p[i] = tolower(pat[i]); \
- _FILL_BMGS(arr, p, plen); \
+ _CALC_BMGS(arr, p, plen); \
} \
else \
- _FILL_BMGS(arr, pat, plen); \
+ _CALC_BMGS(arr, pat, plen); \
} \
}
-#define _FILL_BMGS(arr, pat, plen) \
+#define _CALC_BMGS(arr, pat, plen) \
{ \
int f, g; \
\
@@ -266,6 +318,15 @@ static int fastcmp(const void *, const v
free(suff); \
}
+#define SAVE_PATTERN(p, l) \
+ l = (n == 0) ? tre_strlen(pat) : n; \
+ p = xmalloc((l + 1) * sizeof(tre_char_t)); \
+ if (p == NULL) \
+ return REG_ESPACE; \
+ memcpy(p, pat, l * sizeof(tre_char_t)); \
+ p[l] = TRE_CHAR('\0');
+
+
/*
* Returns: REG_OK on success, error code otherwise
*/
@@ -282,28 +343,17 @@ tre_fastcomp_literal(fastmatch_t *fg, co
return REG_BADPAT;
#ifdef TRE_WCHAR
- fg->wlen = (n == 0) ? tre_strlen(pat) : n;
- fg->wpattern = xmalloc((fg->wlen + 1) * sizeof(tre_char_t));
- if (fg->wpattern == NULL)
- return REG_ESPACE;
- memcpy(fg->wpattern, pat, fg->wlen * sizeof(tre_char_t));
- fg->wpattern[fg->wlen] = TRE_CHAR('\0');
-
+ SAVE_PATTERN(fg->wpattern, fg->wlen);
STORE_MBS_PAT;
#else
- fg->len = (n == 0) ? tre_strlen(pat) : n;
- fg->pattern = xmalloc((fg->len + 1) * sizeof(tre_char_t));
- if (fg->pattern == NULL)
- return REG_ESPACE;
- memcpy(fg->pattern, pat, fg->len * sizeof(tre_char_t));
- fg->pattern[fg->len] = TRE_CHAR('\0');
+ SAVE_PATTERN(fg->pattern, fg->len);
#endif
FILL_QSBC;
- FILL_BMGS(fg->sbmGs, fg->pattern, fg->len, false);
+ FILL_BMGS;
#ifdef TRE_WCHAR
FILL_QSBC_WIDE;
- FILL_BMGS(fg->bmGs, fg->wpattern, fg->wlen, true);
+ FILL_BMGS_WIDE;
#endif
return REG_OK;
@@ -356,11 +406,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_
* the word match character classes at the beginning and ending
* of the string respectively.
*/
- fg->wpattern = xmalloc((fg->wlen + 1) * sizeof(tre_char_t));
- if (fg->wpattern == NULL)
- return REG_ESPACE;
- memcpy(fg->wpattern, pat, fg->wlen * sizeof(tre_char_t));
- fg->wpattern[fg->wlen] = TRE_CHAR('\0');
+ SAVE_PATTERN(fg->wpattern, fg->wlen);
/* Look for ways to cheat...er...avoid the full regex engine. */
for (unsigned int i = 0; i < fg->wlen; i++) {
@@ -388,12 +434,10 @@ tre_fastcomp(fastmatch_t *fg, const tre_
#endif
FILL_QSBC;
- if (!fg->hasdot)
- FILL_BMGS(fg->bmGs, fg->pattern, fg->len, false);
+ FILL_BMGS;
#ifdef TRE_WCHAR
FILL_QSBC_WIDE;
- if (!fg->hasdot)
- FILL_BMGS(fg->sbmGs, fg->wpattern, fg->wlen, true);
+ FILL_BMGS_WIDE;
#endif
return REG_OK;
@@ -502,24 +546,19 @@ fastcmp(const void *pat, const void *dat
for (int i = len - 1; i >= 0; i--) {
switch (type)
{
- case STR_BYTE:
- case STR_MBS:
- if (pat_byte[i] == '.')
- continue;
- if (icase ? (tolower(pat_byte[i]) == tolower(str_byte[i]))
- : (pat_byte[i] == str_byte[i]))
- continue;
- break;
case STR_WIDE:
if (pat_wide[i] == L'.')
continue;
if (icase ? (towlower(pat_wide[i]) == towlower(str_wide[i]))
- : (pat_wide[i] == str_wide[i]))
+ : (pat_wide[i] == str_wide[i]))
continue;
break;
default:
- /* XXX */
- break;
+ if (pat_byte[i] == '.')
+ continue;
+ if (icase ? (tolower(pat_byte[i]) == tolower(str_byte[i]))
+ : (pat_byte[i] == str_byte[i]))
+ continue;
}
ret = -(i + 1);
break;
More information about the svn-src-user
mailing list