git: f39dd6a97844 - main - one-true-awk: import 20210221 (1e4bc42c53a1) which fixes a number of bugs

Warner Losh imp at FreeBSD.org
Thu Jul 8 01:27:44 UTC 2021


The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=f39dd6a9784467f0db5886012b3f4b13899be6b8

commit f39dd6a9784467f0db5886012b3f4b13899be6b8
Merge: 7cd22ac43418 746b7396bb3e
Author:     Warner Losh <imp at FreeBSD.org>
AuthorDate: 2021-07-07 23:30:35 +0000
Commit:     Warner Losh <imp at FreeBSD.org>
CommitDate: 2021-07-08 01:25:43 +0000

    one-true-awk: import 20210221 (1e4bc42c53a1) which fixes a number of bugs
    
    Import the latest bsd-features branch of the one-true-awk upstream:
    
    o Move to bison for $YACC
    o Set close-on-exec flag for file and pipe redirects that aren't std*
    o lots of little fixes to modernize ocde base
    o free sval member before setting it
    o fix a bug where a{0,3} could match aaaa
    o pull in systime and strftime from NetBSD awk
    o pull in fixes from {Net,Free,Open}BSD (normalized our code with them)
    o add BSD extensions and, or, xor, compl, lsheift, rshift (mostly a nop)
    
    Also revert a few of the trivial FreeBSD changes that were done slightly
    differently in the upstreaming process. Also, our PR database may have
    been mined by upstream for these fixes, and Mikolaj Golub may deserve
    credit for some of the fixes in this update.
    
    Suggested by:           Mikolaj Golub <to.my.trociny at gmail.com>
    PR:                     143363,143365,143368,143369,143373,143375,214782
    Sponsored by:           Netflix

 contrib/one-true-awk/ChangeLog                     | 108 +++
 contrib/one-true-awk/FIXES                         | 261 +++++-
 contrib/one-true-awk/REGRESS                       |   4 +
 contrib/one-true-awk/awk.1                         | 159 +++-
 contrib/one-true-awk/awk.h                         |  67 +-
 contrib/one-true-awk/awkgram.y                     |  48 +-
 contrib/one-true-awk/b.c                           | 438 +++++++---
 .../one-true-awk/bugs-fixed/missing-precision.ok   |   2 +-
 contrib/one-true-awk/bugs-fixed/negative-nf.ok     |   2 +-
 contrib/one-true-awk/lex.c                         |  77 +-
 contrib/one-true-awk/lib.c                         | 327 +++++---
 contrib/one-true-awk/main.c                        | 163 ++--
 contrib/one-true-awk/makefile                      |  74 +-
 contrib/one-true-awk/maketab.c                     |  66 +-
 contrib/one-true-awk/parse.c                       |  29 +-
 contrib/one-true-awk/proctab.c                     | 202 ++---
 contrib/one-true-awk/proto.h                       |  41 +-
 contrib/one-true-awk/run.c                         | 918 ++++++++++++++-------
 contrib/one-true-awk/tran.c                        | 164 ++--
 usr.bin/awk/Makefile                               |  10 +-
 20 files changed, 2192 insertions(+), 968 deletions(-)

diff --cc contrib/one-true-awk/awk.h
index 31d070aecddc,51c00df9f279..230eac41548c
--- a/contrib/one-true-awk/awk.h
+++ b/contrib/one-true-awk/awk.h
@@@ -217,9 -235,8 +235,10 @@@ extern	int	pairstack[], paircnt
  
  #define NCHARS	(256+3)		/* 256 handles 8-bit chars; 128 does 7-bit */
  				/* watch out in match(), etc. */
+ #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
  #define NSTATES	32
 +#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
 +				/* NCHARS is 2**n */
  
  typedef struct rrow {
  	long	ltype;	/* long avoids pointer warnings on 64-bit */
diff --cc contrib/one-true-awk/b.c
index 0cdcf30a972e,000000000000..20f4a8bee7f9
mode 100644,000000..100644
--- a/contrib/one-true-awk/b.c
+++ b/contrib/one-true-awk/b.c
@@@ -1,1225 -1,0 +1,1391 @@@
 +/****************************************************************
 +Copyright (C) Lucent Technologies 1997
 +All Rights Reserved
 +
 +Permission to use, copy, modify, and distribute this software and
 +its documentation for any purpose and without fee is hereby
 +granted, provided that the above copyright notice appear in all
 +copies and that both that the copyright notice and this
 +permission notice and warranty disclaimer appear in supporting
 +documentation, and that the name Lucent Technologies or any of
 +its entities not be used in advertising or publicity pertaining
 +to distribution of the software without specific, written prior
 +permission.
 +
 +LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
 +IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
 +SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
 +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 +ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 +THIS SOFTWARE.
 +****************************************************************/
 +
 +/* lasciate ogne speranza, voi ch'intrate. */
 +
 +#include <sys/cdefs.h>
 +__FBSDID("$FreeBSD$");
 +
 +#define	DEBUG
 +
 +#include <ctype.h>
 +#include <limits.h>
 +#include <stdio.h>
 +#include <string.h>
 +#include <stdlib.h>
 +#include "awk.h"
- #include "ytab.h"
++#include "awkgram.tab.h"
 +
 +#define MAXLIN 22
 +
 +#define type(v)		(v)->nobj	/* badly overloaded here */
 +#define info(v)		(v)->ntype	/* badly overloaded here */
 +#define left(v)		(v)->narg[0]
 +#define right(v)	(v)->narg[1]
 +#define parent(v)	(v)->nnext
 +
 +#define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
 +#define ELEAF	case EMPTYRE:		/* empty string in regexp */
 +#define UNARY	case STAR: case PLUS: case QUEST:
 +
 +/* encoding in tree Nodes:
 +	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
 +		left is index, right contains value or pointer to value
 +	unary (STAR, PLUS, QUEST): left is child, right is null
 +	binary (CAT, OR): left and right are children
 +	parent contains pointer to parent
 +*/
 +
 +
 +int	*setvec;
 +int	*tmpset;
 +int	maxsetvec = 0;
 +
 +int	rtok;		/* next token in current re */
 +int	rlxval;
- static uschar	*rlxstr;
- static uschar	*prestr;	/* current position in current re */
- static uschar	*lastre;	/* origin of last re */
- static uschar	*lastatom;	/* origin of last Atom */
- static uschar	*starttok;
- static uschar 	*basestr;	/* starts with original, replaced during
++static const uschar	*rlxstr;
++static const uschar	*prestr;	/* current position in current re */
++static const uschar	*lastre;	/* origin of last re */
++static const uschar	*lastatom;	/* origin of last Atom */
++static const uschar	*starttok;
++static const uschar 	*basestr;	/* starts with original, replaced during
 +				   repetition processing */
- static uschar 	*firstbasestr;
++static const uschar 	*firstbasestr;
 +
 +static	int setcnt;
 +static	int poscnt;
 +
- char	*patbeg;
++const char	*patbeg;
 +int	patlen;
 +
- #define	NFA	20	/* cache this many dynamic fa's */
++#define	NFA	128	/* cache this many dynamic fa's */
 +fa	*fatab[NFA];
 +int	nfatab	= 0;	/* entries in fatab */
 +
- fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
++static int *
++intalloc(size_t n, const char *f)
++{
++	int *p = (int *) calloc(n, sizeof(int));
++	if (p == NULL)
++		overflo(f);
++	return p;
++}
++
++static void
++resizesetvec(const char *f)
++{
++	if (maxsetvec == 0)
++		maxsetvec = MAXLIN;
++	else
++		maxsetvec *= 4;
++	setvec = (int *) realloc(setvec, maxsetvec * sizeof(*setvec));
++	tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(*tmpset));
++	if (setvec == NULL || tmpset == NULL)
++		overflo(f);
++}
++
++static void
++resize_state(fa *f, int state)
++{
++	unsigned int **p;
++	uschar *p2;
++	int **p3;
++	int i, new_count;
++
++	if (++state < f->state_count)
++		return;
++
++	new_count = state + 10; /* needs to be tuned */
++
++	p = (unsigned int **) realloc(f->gototab, new_count * sizeof(f->gototab[0]));
++	if (p == NULL)
++		goto out;
++	f->gototab = p;
++
++	p2 = (uschar *) realloc(f->out, new_count * sizeof(f->out[0]));
++	if (p2 == NULL)
++		goto out;
++	f->out = p2;
++
++	p3 = (int **) realloc(f->posns, new_count * sizeof(f->posns[0]));
++	if (p3 == NULL)
++		goto out;
++	f->posns = p3;
++
++	for (i = f->state_count; i < new_count; ++i) {
++		f->gototab[i] = (unsigned int *) calloc(NCHARS, sizeof(**f->gototab));
++		if (f->gototab[i] == NULL)
++			goto out;
++		f->out[i]  = 0;
++		f->posns[i] = NULL;
++	}
++	f->state_count = new_count;
++	return;
++out:
++	overflo(__func__);
++}
++
++fa *makedfa(const char *s, bool anchor)	/* returns dfa for reg expr s */
 +{
 +	int i, use, nuse;
 +	fa *pfa;
 +	static int now = 1;
 +
 +	if (setvec == NULL) {	/* first time through any RE */
- 		maxsetvec = MAXLIN;
- 		setvec = (int *) malloc(maxsetvec * sizeof(int));
- 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
- 		if (setvec == NULL || tmpset == NULL)
- 			overflo("out of space initializing makedfa");
++		resizesetvec(__func__);
 +	}
 +
- 	if (compile_time)	/* a constant for sure */
++	if (compile_time != RUNNING)	/* a constant for sure */
 +		return mkdfa(s, anchor);
 +	for (i = 0; i < nfatab; i++)	/* is it there already? */
 +		if (fatab[i]->anchor == anchor
 +		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
 +			fatab[i]->use = now++;
 +			return fatab[i];
 +		}
 +	pfa = mkdfa(s, anchor);
 +	if (nfatab < NFA) {	/* room for another */
 +		fatab[nfatab] = pfa;
 +		fatab[nfatab]->use = now++;
 +		nfatab++;
 +		return pfa;
 +	}
 +	use = fatab[0]->use;	/* replace least-recently used */
 +	nuse = 0;
 +	for (i = 1; i < nfatab; i++)
 +		if (fatab[i]->use < use) {
 +			use = fatab[i]->use;
 +			nuse = i;
 +		}
 +	freefa(fatab[nuse]);
 +	fatab[nuse] = pfa;
 +	pfa->use = now++;
 +	return pfa;
 +}
 +
- fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
- 				/* anchor = 1 for anchored matches, else 0 */
++fa *mkdfa(const char *s, bool anchor)	/* does the real work of making a dfa */
++				/* anchor = true for anchored matches, else false */
 +{
 +	Node *p, *p1;
 +	fa *f;
 +
- 	firstbasestr = (uschar *) s;
++	firstbasestr = (const uschar *) s;
 +	basestr = firstbasestr;
 +	p = reparse(s);
 +	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
 +		/* put ALL STAR in front of reg.  exp. */
 +	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
 +		/* put FINAL after reg.  exp. */
 +
 +	poscnt = 0;
 +	penter(p1);	/* enter parent pointers and leaf indices */
- 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
- 		overflo("out of space for fa");
++	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL)
++		overflo(__func__);
 +	f->accept = poscnt-1;	/* penter has computed number of positions in re */
 +	cfoll(f, p1);	/* set up follow sets */
 +	freetr(p1);
- 	if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
- 			overflo("out of space in makedfa");
- 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
- 		overflo("out of space in makedfa");
++	resize_state(f, 1);
++	f->posns[0] = intalloc(*(f->re[0].lfollow), __func__);
++	f->posns[1] = intalloc(1, __func__);
 +	*f->posns[1] = 0;
 +	f->initstat = makeinit(f, anchor);
 +	f->anchor = anchor;
 +	f->restr = (uschar *) tostring(s);
 +	if (firstbasestr != basestr) {
 +		if (basestr)
 +			xfree(basestr);
 +	}
 +	return f;
 +}
 +
- int makeinit(fa *f, int anchor)
++int makeinit(fa *f, bool anchor)
 +{
 +	int i, k;
 +
 +	f->curstat = 2;
 +	f->out[2] = 0;
- 	f->reset = 0;
 +	k = *(f->re[0].lfollow);
- 	xfree(f->posns[2]);			
- 	if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
- 		overflo("out of space in makeinit");
- 	for (i=0; i <= k; i++) {
++	xfree(f->posns[2]);
++	f->posns[2] = intalloc(k + 1,  __func__);
++	for (i = 0; i <= k; i++) {
 +		(f->posns[2])[i] = (f->re[0].lfollow)[i];
 +	}
 +	if ((f->posns[2])[1] == f->accept)
 +		f->out[2] = 1;
- 	for (i=0; i < NCHARS; i++)
++	for (i = 0; i < NCHARS; i++)
 +		f->gototab[2][i] = 0;
 +	f->curstat = cgoto(f, 2, HAT);
 +	if (anchor) {
 +		*f->posns[2] = k-1;	/* leave out position 0 */
- 		for (i=0; i < k; i++) {
++		for (i = 0; i < k; i++) {
 +			(f->posns[0])[i] = (f->posns[2])[i];
 +		}
 +
 +		f->out[0] = f->out[2];
 +		if (f->curstat != 2)
 +			--(*f->posns[f->curstat]);
 +	}
 +	return f->curstat;
 +}
 +
 +void penter(Node *p)	/* set up parent pointers and leaf indices */
 +{
 +	switch (type(p)) {
 +	ELEAF
 +	LEAF
 +		info(p) = poscnt;
 +		poscnt++;
 +		break;
 +	UNARY
 +		penter(left(p));
 +		parent(left(p)) = p;
 +		break;
 +	case CAT:
 +	case OR:
 +		penter(left(p));
 +		penter(right(p));
 +		parent(left(p)) = p;
 +		parent(right(p)) = p;
 +		break;
++	case ZERO:
++		break;
 +	default:	/* can't happen */
 +		FATAL("can't happen: unknown type %d in penter", type(p));
 +		break;
 +	}
 +}
 +
 +void freetr(Node *p)	/* free parse tree */
 +{
 +	switch (type(p)) {
 +	ELEAF
 +	LEAF
 +		xfree(p);
 +		break;
 +	UNARY
++	case ZERO:
 +		freetr(left(p));
 +		xfree(p);
 +		break;
 +	case CAT:
 +	case OR:
 +		freetr(left(p));
 +		freetr(right(p));
 +		xfree(p);
 +		break;
 +	default:	/* can't happen */
 +		FATAL("can't happen: unknown type %d in freetr", type(p));
 +		break;
 +	}
 +}
 +
 +/* in the parsing of regular expressions, metacharacters like . have */
 +/* to be seen literally;  \056 is not a metacharacter. */
 +
- int hexstr(uschar **pp)	/* find and eval hex string at pp, return new p */
++int hexstr(const uschar **pp)	/* find and eval hex string at pp, return new p */
 +{			/* only pick up one 8-bit byte (2 chars) */
- 	uschar *p;
++	const uschar *p;
 +	int n = 0;
 +	int i;
 +
- 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
++	for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
 +		if (isdigit(*p))
 +			n = 16 * n + *p - '0';
 +		else if (*p >= 'a' && *p <= 'f')
 +			n = 16 * n + *p - 'a' + 10;
 +		else if (*p >= 'A' && *p <= 'F')
 +			n = 16 * n + *p - 'A' + 10;
 +	}
- 	*pp = (uschar *) p;
++	*pp = p;
 +	return n;
 +}
 +
 +#define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
 +
- int quoted(uschar **pp)	/* pick up next thing after a \\ */
++int quoted(const uschar **pp)	/* pick up next thing after a \\ */
 +			/* and increment *pp */
 +{
- 	uschar *p = *pp;
++	const uschar *p = *pp;
 +	int c;
 +
 +	if ((c = *p++) == 't')
 +		c = '\t';
 +	else if (c == 'n')
 +		c = '\n';
 +	else if (c == 'f')
 +		c = '\f';
 +	else if (c == 'r')
 +		c = '\r';
 +	else if (c == 'b')
 +		c = '\b';
++	else if (c == 'v')
++		c = '\v';
++	else if (c == 'a')
++		c = '\a';
 +	else if (c == '\\')
 +		c = '\\';
 +	else if (c == 'x') {	/* hexadecimal goo follows */
 +		c = hexstr(&p);	/* this adds a null if number is invalid */
 +	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
 +		int n = c - '0';
 +		if (isoctdigit(*p)) {
 +			n = 8 * n + *p++ - '0';
 +			if (isoctdigit(*p))
 +				n = 8 * n + *p++ - '0';
 +		}
 +		c = n;
 +	} /* else */
 +		/* c = c; */
 +	*pp = p;
 +	return c;
 +}
 +
 +static int collate_range_cmp(int a, int b)
 +{
 +	static char s[2][2];
 +
 +	if ((uschar)a == (uschar)b)
 +		return 0;
 +	s[0][0] = a;
 +	s[1][0] = b;
 +	return (strcoll(s[0], s[1]));
 +}
 +
 +char *cclenter(const char *argp)	/* add a character class */
 +{
- 	int i, c, c2;
- 	int j;
- 	uschar *p = (uschar *) argp;
- 	uschar *op, *bp;
++	int i, c, c2, j;
++	const uschar *op, *p = (const uschar *) argp;
++	uschar *bp;
 +	static uschar *buf = NULL;
 +	static int bufsz = 100;
 +
 +	op = p;
 +	if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
 +		FATAL("out of space for character class [%.10s...] 1", p);
 +	bp = buf;
 +	for (i = 0; (c = *p++) != 0; ) {
 +		if (c == '\\') {
 +			c = quoted(&p);
 +		} else if (c == '-' && i > 0 && bp[-1] != 0) {
 +			if (*p != 0) {
 +				c = bp[-1];
 +				c2 = *p++;
 +				if (c2 == '\\')
 +					c2 = quoted(&p);
 +				if (collate_range_cmp(c, c2) > 0) {
 +					bp--;
 +					i--;
 +					continue;
 +				}
 +				for (j = 0; j < NCHARS; j++) {
 +					if ((collate_range_cmp(c, j) > 0) ||
 +					    collate_range_cmp(j, c2) > 0)
 +						continue;
 +					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
 +						FATAL("out of space for character class [%.10s...] 2", p);
 +					*bp++ = j;
 +					i++;
 +				}
 +				continue;
 +			}
 +		}
 +		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
 +			FATAL("out of space for character class [%.10s...] 3", p);
 +		*bp++ = c;
 +		i++;
 +	}
 +	*bp = 0;
- 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
++	DPRINTF("cclenter: in = |%s|, out = |%s|\n", op, buf);
 +	xfree(op);
 +	return (char *) tostring((char *) buf);
 +}
 +
 +void overflo(const char *s)
 +{
- 	FATAL("regular expression too big: %.30s...", s);
++	FATAL("regular expression too big: out of space in %.30s...", s);
 +}
 +
 +void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
 +{
 +	int i;
 +	int *p;
 +
 +	switch (type(v)) {
 +	ELEAF
 +	LEAF
 +		f->re[info(v)].ltype = type(v);
 +		f->re[info(v)].lval.np = right(v);
 +		while (f->accept >= maxsetvec) {	/* guessing here! */
- 			maxsetvec *= 4;
- 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
- 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- 			if (setvec == NULL || tmpset == NULL)
- 				overflo("out of space in cfoll()");
++			resizesetvec(__func__);
 +		}
 +		for (i = 0; i <= f->accept; i++)
 +			setvec[i] = 0;
 +		setcnt = 0;
 +		follow(v);	/* computes setvec and setcnt */
- 		if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
- 			overflo("out of space building follow set");
++		p = intalloc(setcnt + 1, __func__);
 +		f->re[info(v)].lfollow = p;
 +		*p = setcnt;
 +		for (i = f->accept; i >= 0; i--)
 +			if (setvec[i] == 1)
 +				*++p = i;
 +		break;
 +	UNARY
 +		cfoll(f,left(v));
 +		break;
 +	case CAT:
 +	case OR:
 +		cfoll(f,left(v));
 +		cfoll(f,right(v));
 +		break;
++	case ZERO:
++		break;
 +	default:	/* can't happen */
 +		FATAL("can't happen: unknown type %d in cfoll", type(v));
 +	}
 +}
 +
 +int first(Node *p)	/* collects initially active leaves of p into setvec */
 +			/* returns 0 if p matches empty string */
 +{
 +	int b, lp;
 +
 +	switch (type(p)) {
 +	ELEAF
 +	LEAF
 +		lp = info(p);	/* look for high-water mark of subscripts */
 +		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
- 			maxsetvec *= 4;
- 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
- 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
- 			if (setvec == NULL || tmpset == NULL)
- 				overflo("out of space in first()");
++			resizesetvec(__func__);
 +		}
 +		if (type(p) == EMPTYRE) {
 +			setvec[lp] = 0;
 +			return(0);
 +		}
 +		if (setvec[lp] != 1) {
 +			setvec[lp] = 1;
 +			setcnt++;
 +		}
 +		if (type(p) == CCL && (*(char *) right(p)) == '\0')
 +			return(0);		/* empty CCL */
- 		else return(1);
++		return(1);
 +	case PLUS:
- 		if (first(left(p)) == 0) return(0);
++		if (first(left(p)) == 0)
++			return(0);
 +		return(1);
 +	case STAR:
 +	case QUEST:
 +		first(left(p));
 +		return(0);
 +	case CAT:
 +		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
 +		return(1);
 +	case OR:
 +		b = first(right(p));
 +		if (first(left(p)) == 0 || b == 0) return(0);
 +		return(1);
++	case ZERO:
++		return 0;
 +	}
 +	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
 +	return(-1);
 +}
 +
 +void follow(Node *v)	/* collects leaves that can follow v into setvec */
 +{
 +	Node *p;
 +
 +	if (type(v) == FINAL)
 +		return;
 +	p = parent(v);
 +	switch (type(p)) {
 +	case STAR:
 +	case PLUS:
 +		first(v);
 +		follow(p);
 +		return;
 +
 +	case OR:
 +	case QUEST:
 +		follow(p);
 +		return;
 +
 +	case CAT:
 +		if (v == left(p)) {	/* v is left child of p */
 +			if (first(right(p)) == 0) {
 +				follow(p);
 +				return;
 +			}
 +		} else		/* v is right child */
 +			follow(p);
 +		return;
 +	}
 +}
 +
 +int member(int c, const char *sarg)	/* is c in s? */
 +{
- 	uschar *s = (uschar *) sarg;
++	const uschar *s = (const uschar *) sarg;
 +
 +	while (*s)
 +		if (c == *s++)
 +			return(1);
 +	return(0);
 +}
 +
 +int match(fa *f, const char *p0)	/* shortest match ? */
 +{
 +	int s, ns;
- 	uschar *p = (uschar *) p0;
++	const uschar *p = (const uschar *) p0;
++
++	s = f->initstat;
++	assert (s < f->state_count);
 +
- 	s = f->reset ? makeinit(f,0) : f->initstat;
 +	if (f->out[s])
 +		return(1);
 +	do {
 +		/* assert(*p < NCHARS); */
 +		if ((ns = f->gototab[s][*p]) != 0)
 +			s = ns;
 +		else
 +			s = cgoto(f, s, *p);
 +		if (f->out[s])
 +			return(1);
 +	} while (*p++ != 0);
 +	return(0);
 +}
 +
 +int pmatch(fa *f, const char *p0)	/* longest match, for sub */
 +{
 +	int s, ns;
- 	uschar *p = (uschar *) p0;
- 	uschar *q;
- 	int i, k;
++	const uschar *p = (const uschar *) p0;
++	const uschar *q;
 +
- 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
- 	if (f->reset) {
- 		f->initstat = s = makeinit(f,1);
- 	} else {
- 		s = f->initstat;
- 	}
- 	patbeg = (char *) p;
++	s = f->initstat;
++	assert(s < f->state_count);
++
++	patbeg = (const char *)p;
 +	patlen = -1;
 +	do {
 +		q = p;
 +		do {
 +			if (f->out[s])		/* final state */
 +				patlen = q-p;
 +			/* assert(*q < NCHARS); */
 +			if ((ns = f->gototab[s][*q]) != 0)
 +				s = ns;
 +			else
 +				s = cgoto(f, s, *q);
++
++			assert(s < f->state_count);
++
 +			if (s == 1) {	/* no transition */
 +				if (patlen >= 0) {
- 					patbeg = (char *) p;
++					patbeg = (const char *) p;
 +					return(1);
 +				}
 +				else
 +					goto nextin;	/* no match */
 +			}
 +		} while (*q++ != 0);
 +		if (f->out[s])
 +			patlen = q-p-1;	/* don't count $ */
 +		if (patlen >= 0) {
- 			patbeg = (char *) p;
++			patbeg = (const char *) p;
 +			return(1);
 +		}
 +	nextin:
 +		s = 2;
++#if 0 /* XXX */
 +		if (f->reset) {
 +			for (i = 2; i <= f->curstat; i++)
- 				xfree(f->posns[i]);
++n				xfree(f->posns[i]);
 +			k = *f->posns[0];			
 +			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
 +				overflo("out of space in pmatch");
 +			for (i = 0; i <= k; i++)
 +				(f->posns[2])[i] = (f->posns[0])[i];
 +			f->initstat = f->curstat = 2;
 +			f->out[2] = f->out[0];
 +			for (i = 0; i < NCHARS; i++)
 +				f->gototab[2][i] = 0;
 +		}
++#endif
 +	} while (*p++ != 0);
 +	return (0);
 +}
 +
 +int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
 +{
 +	int s, ns;
- 	uschar *p = (uschar *) p0;
- 	uschar *q;
- 	int i, k;
++	const uschar *p = (const uschar *) p0;
++	const uschar *q;
 +
- 	/* s = f->reset ? makeinit(f,1) : f->initstat; */
- 	if (f->reset) {
- 		f->initstat = s = makeinit(f,1);
- 	} else {
- 		s = f->initstat;
- 	}
++	s = f->initstat;
++	assert(s < f->state_count);
++
++	patbeg = (const char *)p;
 +	patlen = -1;
 +	while (*p) {
 +		q = p;
 +		do {
 +			if (f->out[s])		/* final state */
 +				patlen = q-p;
 +			/* assert(*q < NCHARS); */
 +			if ((ns = f->gototab[s][*q]) != 0)
 +				s = ns;
 +			else
 +				s = cgoto(f, s, *q);
 +			if (s == 1) {	/* no transition */
 +				if (patlen > 0) {
- 					patbeg = (char *) p;
++					patbeg = (const char *) p;
 +					return(1);
 +				} else
 +					goto nnextin;	/* no nonempty match */
 +			}
 +		} while (*q++ != 0);
 +		if (f->out[s])
 +			patlen = q-p-1;	/* don't count $ */
 +		if (patlen > 0 ) {
- 			patbeg = (char *) p;
++			patbeg = (const char *) p;
 +			return(1);
 +		}
 +	nnextin:
 +		s = 2;
++#if 0 /* XXX */
 +		if (f->reset) {
 +			for (i = 2; i <= f->curstat; i++)
 +				xfree(f->posns[i]);
 +			k = *f->posns[0];			
 +			if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
 +				overflo("out of state space");
 +			for (i = 0; i <= k; i++)
 +				(f->posns[2])[i] = (f->posns[0])[i];
 +			f->initstat = f->curstat = 2;
 +			f->out[2] = f->out[0];
 +			for (i = 0; i < NCHARS; i++)
 +				f->gototab[2][i] = 0;
 +		}
++#endif
 +		p++;
 +	}
 +	return (0);
 +}
 +
++
++/*
++ * NAME
++ *     fnematch
++ *
++ * DESCRIPTION
++ *     A stream-fed version of nematch which transfers characters to a
++ *     null-terminated buffer. All characters up to and including the last
++ *     character of the matching text or EOF are placed in the buffer. If
++ *     a match is found, patbeg and patlen are set appropriately.
++ *
++ * RETURN VALUES
++ *     false    No match found.
++ *     true     Match found.
++ */
++
++bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
++{
++	char *buf = *pbuf;
++	int bufsize = *pbufsize;
++	int c, i, j, k, ns, s;
++
++	s = pfa->initstat;
++	patlen = 0;
++
++	/*
++	 * All indices relative to buf.
++	 * i <= j <= k <= bufsize
++	 *
++	 * i: origin of active substring
++	 * j: current character
++	 * k: destination of next getc()
++	 */
++	i = -1, k = 0;
++        do {
++		j = i++;
++		do {
++			if (++j == k) {
++				if (k == bufsize)
++					if (!adjbuf((char **) &buf, &bufsize, bufsize+1, quantum, 0, "fnematch"))
++						FATAL("stream '%.30s...' too long", buf);
++				buf[k++] = (c = getc(f)) != EOF ? c : 0;
++			}
++			c = (uschar)buf[j];
++			/* assert(c < NCHARS); */
++
++			if ((ns = pfa->gototab[s][c]) != 0)
++				s = ns;
++			else
++				s = cgoto(pfa, s, c);
++
++			if (pfa->out[s]) {	/* final state */
++				patlen = j - i + 1;
++				if (c == 0)	/* don't count $ */
++					patlen--;
++			}
++		} while (buf[j] && s != 1);
++		s = 2;
++	} while (buf[i] && !patlen);
++
++	/* adjbuf() may have relocated a resized buffer. Inform the world. */
++	*pbuf = buf;
++	*pbufsize = bufsize;
++
++	if (patlen) {
++		patbeg = (char *) buf + i;
++		/*
++		 * Under no circumstances is the last character fed to
++		 * the automaton part of the match. It is EOF's nullbyte,
++		 * or it sent the automaton into a state with no further
++		 * transitions available (s==1), or both. Room for a
++		 * terminating nullbyte is guaranteed.
++		 *
++		 * ungetc any chars after the end of matching text
++		 * (except for EOF's nullbyte, if present) and null
++		 * terminate the buffer.
++		 */
++		do
++			if (buf[--k] && ungetc(buf[k], f) == EOF)
++				FATAL("unable to ungetc '%c'", buf[k]);
++		while (k > i + patlen);
++		buf[k] = '\0';
++		return true;
++	}
++	else
++		return false;
++}
++
 +Node *reparse(const char *p)	/* parses regular expression pointed to by p */
 +{			/* uses relex() to scan regular expression */
 +	Node *np;
 +
- 	dprintf( ("reparse <%s>\n", p) );
- 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
++	DPRINTF("reparse <%s>\n", p);
++	lastre = prestr = (const uschar *) p;	/* prestr points to string to be parsed */
 +	rtok = relex();
 +	/* GNU compatibility: an empty regexp matches anything */
 +	if (rtok == '\0') {
 +		/* FATAL("empty regular expression"); previous */
 +		return(op2(EMPTYRE, NIL, NIL));
 +	}
 +	np = regexp();
 +	if (rtok != '\0')
 +		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
 +	return(np);
 +}
 +
 +Node *regexp(void)	/* top-level parse of reg expr */
 +{
 +	return (alt(concat(primary())));
 +}
 +
 +Node *primary(void)
 +{
 +	Node *np;
 +	int savelastatom;
 +
 +	switch (rtok) {
 +	case CHAR:
 +		lastatom = starttok;
 +		np = op2(CHAR, NIL, itonp(rlxval));
 +		rtok = relex();
 +		return (unary(np));
 +	case ALL:
 +		rtok = relex();
 +		return (unary(op2(ALL, NIL, NIL)));
 +	case EMPTYRE:
 +		rtok = relex();
 +		return (unary(op2(EMPTYRE, NIL, NIL)));
 +	case DOT:
 +		lastatom = starttok;
*** 3688 LINES SKIPPED ***


More information about the dev-commits-src-all mailing list