kern/169302: [libc] [patch] Applied MidnightBSD regex memory consumption limits
Pedro Giffuni
pfg at freebsd.org
Sun May 4 16:50:01 UTC 2014
The following reply was made to PR kern/169302; it has been noted by GNATS.
From: Pedro Giffuni <pfg at freebsd.org>
To: "bug-followup at FreeBSD.org" <bug-followup at FreeBSD.org>,
"zblacher at sandvine.com" <zblacher at sandvine.com>
Cc:
Subject: Re: kern/169302: [libc] [patch] Applied MidnightBSD regex memory
consumption limits
Date: Sun, 04 May 2014 11:41:45 -0500
This is a multi-part message in MIME format.
--------------020708080805040202010709
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
Updated patch, based on the NetBSD changes.
Also use calloc(1) as in OpenBSD.
--------------020708080805040202010709
Content-Type: text/plain; charset=us-ascii;
name="patch-regex-pr169302.txt"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="patch-regex-pr169302.txt"
Index: lib/libc/regex/engine.c
===================================================================
--- lib/libc/regex/engine.c (revision 265307)
+++ lib/libc/regex/engine.c (working copy)
@@ -219,7 +219,7 @@
} else {
for (dp = start; dp < stop; dp++)
if (*dp == g->must[0] &&
- stop - dp >= g->mlen &&
+ (size_t)(stop - dp) >= g->mlen &&
memcmp(dp, g->must, (size_t)g->mlen) == 0)
break;
if (dp == stop) /* we didn't find g->must */
Index: lib/libc/regex/regcomp.c
===================================================================
--- lib/libc/regex/regcomp.c (revision 265307)
+++ lib/libc/regex/regcomp.c (working copy)
@@ -86,11 +86,11 @@
#endif
/* === regcomp.c === */
-static void p_ere(struct parse *p, int stop);
-static void p_ere_exp(struct parse *p);
+static void p_ere(struct parse *p, int stop, size_t reclimit);
+static void p_ere_exp(struct parse *p, size_t reclimit);
static void p_str(struct parse *p);
-static void p_bre(struct parse *p, int end1, int end2);
-static int p_simp_re(struct parse *p, int starordinary);
+static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
+static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
static int p_count(struct parse *p);
static void p_bracket(struct parse *p);
static void p_b_term(struct parse *p, cset *cs);
@@ -102,7 +102,7 @@
static void bothcases(struct parse *p, wint_t ch);
static void ordinary(struct parse *p, wint_t ch);
static void nonnewline(struct parse *p);
-static void repeat(struct parse *p, sopno start, int from, int to);
+static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
static int seterr(struct parse *p, int e);
static cset *allocset(struct parse *p);
static void freeset(struct parse *p, cset *cs);
@@ -167,6 +167,13 @@
#define never 0 /* some <assert.h>s have bugs too */
#endif
+#define MEMLIMIT 0x8000000
+#define MEMSIZE(p) \
+ ((p)->ncsalloc / CHAR_BIT * NC + \
+ (p)->ncsalloc * sizeof(cset) + \
+ (p)->ssize * sizeof(sop))
+#define RECLIMIT 256
+
/* Macro used by computejump()/computematchjump() */
#define MIN(a,b) ((a)<(b)?(a):(b))
@@ -214,7 +221,7 @@
if (g == NULL)
return(REG_ESPACE);
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
- p->strip = (sop *)malloc(p->ssize * sizeof(sop));
+ p->strip = calloc(p->ssize, sizeof(sop));
p->slen = 0;
if (p->strip == NULL) {
free((char *)g);
@@ -249,11 +256,11 @@
EMIT(OEND, 0);
g->firststate = THERE();
if (cflags®_EXTENDED)
- p_ere(p, OUT);
+ p_ere(p, OUT, 0);
else if (cflags®_NOSPEC)
p_str(p);
else
- p_bre(p, OUT, OUT);
+ p_bre(p, OUT, OUT, 0);
EMIT(OEND, 0);
g->laststate = THERE();
@@ -294,7 +301,8 @@
*/
static void
p_ere(struct parse *p,
- int stop) /* character this ERE should end at */
+ int stop, /* character this ERE should end at */
+ size_t reclimit)
{
char c;
sopno prevback;
@@ -302,11 +310,16 @@
sopno conc;
int first = 1; /* is this the first alternative? */
+ if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+ p->error = REG_ESPACE;
+ return;
+ }
+
for (;;) {
/* do a bunch of concatenated expressions */
conc = HERE();
while (MORE() && (c = PEEK()) != '|' && c != stop)
- p_ere_exp(p);
+ p_ere_exp(p, reclimit);
(void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
if (!EAT('|'))
@@ -335,10 +348,10 @@
/*
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
- == static void p_ere_exp(struct parse *p);
+ == static void p_ere_exp(struct parse *p, size_t reclimit);
*/
static void
-p_ere_exp(struct parse *p)
+p_ere_exp(struct parse *p, size_t reclimit)
{
char c;
wint_t wc;
@@ -361,7 +374,7 @@
p->pbegin[subno] = HERE();
EMIT(OLPAREN, subno);
if (!SEE(')'))
- p_ere(p, ')');
+ p_ere(p, ')', reclimit);
if (subno < NPAREN) {
p->pend[subno] = HERE();
assert(p->pend[subno] != 0);
@@ -465,7 +478,7 @@
count2 = INFINITY;
} else /* just a single number */
count2 = count;
- repeat(p, pos, count, count2);
+ repeat(p, pos, count, count2, 0);
if (!EAT('}')) { /* error heuristics */
while (MORE() && PEEK() != '}')
NEXT();
@@ -499,7 +512,7 @@
/*
- p_bre - BRE parser top level, anchoring and concatenation
== static void p_bre(struct parse *p, int end1, \
- == int end2);
+ == int end2, size_t reclimit);
* Giving end1 as OUT essentially eliminates the end1/end2 check.
*
* This implementation is a bit of a kludge, in that a trailing $ is first
@@ -509,8 +522,14 @@
static void
p_bre(struct parse *p,
int end1, /* first terminating character */
- int end2) /* second terminating character */
+ int end2, /* second terminating character */
+ size_t reclimit)
{
+ if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
+ p->error = REG_ESPACE;
+ return;
+ }
+
sopno start = HERE();
int first = 1; /* first subexpression? */
int wasdollar = 0;
@@ -521,7 +540,7 @@
p->g->nbol++;
}
while (MORE() && !SEETWO(end1, end2)) {
- wasdollar = p_simp_re(p, first);
+ wasdollar = p_simp_re(p, first, reclimit);
first = 0;
}
if (wasdollar) { /* oops, that was a trailing anchor */
@@ -536,11 +555,12 @@
/*
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition
- == static int p_simp_re(struct parse *p, int starordinary);
+ == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
*/
static int /* was the simple RE an unbackslashed $? */
p_simp_re(struct parse *p,
- int starordinary) /* is a leading * an ordinary character? */
+ int starordinary, /* is a leading * an ordinary character? */
+ size_t reclimit)
{
int c;
int count;
@@ -580,7 +600,7 @@
EMIT(OLPAREN, subno);
/* the MORE here is an error heuristic */
if (MORE() && !SEETWO('\\', ')'))
- p_bre(p, '\\', ')');
+ p_bre(p, '\\', ')', reclimit);
if (subno < NPAREN) {
p->pend[subno] = HERE();
assert(p->pend[subno] != 0);
@@ -641,7 +661,7 @@
count2 = INFINITY;
} else /* just a single number */
count2 = count;
- repeat(p, pos, count, count2);
+ repeat(p, pos, count, count2, 0);
if (!EATTWO('\\', '}')) { /* error heuristics */
while (MORE() && !SEETWO('\\', '}'))
NEXT();
@@ -996,13 +1016,15 @@
/*
- repeat - generate code for a bounded repetition, recursively if needed
- == static void repeat(struct parse *p, sopno start, int from, int to);
+ == static void repeat(struct parse *p, sopno start, int from, int to,
+ == size_t reclimit );
*/
static void
repeat(struct parse *p,
sopno start, /* operand from here to end of strip */
int from, /* repeated from this number */
- int to) /* to this number of times (maybe INFINITY) */
+ int to, /* to this number of times (maybe INFINITY) */
+ size_t reclimit)
{
sopno finish = HERE();
# define N 2
@@ -1011,7 +1033,9 @@
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
sopno copy;
- if (p->error != 0) /* head off possible runaway recursion */
+ if (reclimit++ > RECLIMIT)
+ p->error = REG_ESPACE;
+ if (p->error)
return;
assert(from <= to);
@@ -1025,7 +1049,7 @@
case REP(0, INF): /* as x{1,}? */
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
INSERT(OCH_, start); /* offset is wrong... */
- repeat(p, start+1, 1, to);
+ repeat(p, start+1, 1, to, reclimit);
ASTERN(OOR1, start);
AHEAD(start); /* ... fix it */
EMIT(OOR2, 0);
@@ -1045,7 +1069,7 @@
ASTERN(O_CH, THERETHERE());
copy = dupl(p, start+1, finish+1);
assert(copy == finish+4);
- repeat(p, copy, 1, to-1);
+ repeat(p, copy, 1, to-1, reclimit);
break;
case REP(1, INF): /* as x+ */
INSERT(OPLUS_, start);
@@ -1053,11 +1077,11 @@
break;
case REP(N, N): /* as xx{m-1,n-1} */
copy = dupl(p, start, finish);
- repeat(p, copy, from-1, to-1);
+ repeat(p, copy, from-1, to-1, reclimit);
break;
case REP(N, INF): /* as xx{n-1,INF} */
copy = dupl(p, start, finish);
- repeat(p, copy, from-1, to);
+ repeat(p, copy, from-1, to, reclimit);
break;
default: /* "can't happen" */
SETERROR(REG_ASSERT); /* just in case */
@@ -1112,8 +1136,13 @@
{
cset *cs, *ncs;
+
+ if (MEMSIZE(p) > MEMLIMIT)
+ goto oomem;
+
ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof(*ncs));
if (ncs == NULL) {
+oomem:
SETERROR(REG_ESPACE);
return (NULL);
}
@@ -1347,17 +1376,22 @@
enlarge(struct parse *p, sopno size)
{
sop *sp;
+ sopno osize;
if (p->ssize >= size)
return 1;
-
+ osize = p->ssize;
+ p->ssize = size;
+ if (MEMSIZE(p) > MEMLIMIT)
+ goto oomem;
sp = (sop *)realloc(p->strip, size*sizeof(sop));
if (sp == NULL) {
+oomem:
+ p->ssize = osize;
SETERROR(REG_ESPACE);
return 0;
}
p->strip = sp;
- p->ssize = size;
return 1;
}
Index: lib/libc/regex/regex2.h
===================================================================
--- lib/libc/regex/regex2.h (revision 265307)
+++ lib/libc/regex/regex2.h (working copy)
@@ -73,7 +73,7 @@
* immediately *preceding* "execution" of that operator.
*/
typedef unsigned long sop; /* strip operator */
-typedef long sopno;
+typedef size_t sopno;
#define OPRMASK 0xf8000000L
#define OPDMASK 0x07ffffffL
#define OPSHIFT ((unsigned)27)
@@ -165,7 +165,7 @@
int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */
- int ncsets; /* number of csets in use */
+ size_t ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */
int cflags; /* copy of regcomp() cflags argument */
sopno nstates; /* = number of sops */
@@ -175,13 +175,13 @@
# define USEBOL 01 /* used ^ */
# define USEEOL 02 /* used $ */
# define BAD 04 /* something wrong */
- int nbol; /* number of ^ used */
- int neol; /* number of $ used */
+ size_t nbol; /* number of ^ used */
+ size_t neol; /* number of $ used */
char *must; /* match must contain this string */
int moffset; /* latest point at which must may be located */
int *charjump; /* Boyer-Moore char jump table */
int *matchjump; /* Boyer-Moore match jump table */
- int mlen; /* length of must */
+ size_t mlen; /* length of must */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
sopno nplus; /* how deep does it nest +s? */
--------------020708080805040202010709--
More information about the freebsd-bugs
mailing list