git: 02cae85fdb9b - stable/14 - ota: Merge one true awk 20240422 (a3b68e649d2d)
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 20 May 2024 20:06:42 UTC
The branch stable/14 has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=02cae85fdb9b4d959bad253ce3dbb03e554b6f40 commit 02cae85fdb9b4d959bad253ce3dbb03e554b6f40 Author: Warner Losh <imp@FreeBSD.org> AuthorDate: 2024-05-04 21:50:33 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2024-05-20 19:29:19 +0000 ota: Merge one true awk 20240422 (a3b68e649d2d) Apr 22, 2024: fixed regex engine gototab reallocation issue that was introduced during the Nov 24 rewrite. Thanks to Arnold Robbins. Fixed a scan bug in split in the case the separator is a single character. thanks to Oguz Ismail for spotting the issue. Mar 10, 2024: fixed use-after-free bug in fnematch due to adjbuf invalidating the pointers to buf. thanks to github user caffe3 for spotting the issue and providing a fix, and to Miguel Pineiro Jr. for the alternative fix. MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max. thanks to Miguel Pineiro Jr. Sponsored by: Netflix (cherry picked from commit 1023317ac491090f8d84a62999ffc303cf88528c) --- contrib/one-true-awk/ChangeLog | 24 --- contrib/one-true-awk/FIXES | 14 ++ contrib/one-true-awk/awk.1 | 52 ----- contrib/one-true-awk/awk.h | 8 - contrib/one-true-awk/awkgram.y | 20 +- contrib/one-true-awk/b.c | 34 +-- contrib/one-true-awk/bugs-fixed/REGRESS | 2 +- contrib/one-true-awk/bugs-fixed/system-status.ok2 | 3 + contrib/one-true-awk/lex.c | 9 - contrib/one-true-awk/main.c | 2 +- contrib/one-true-awk/maketab.c | 1 - contrib/one-true-awk/parse.c | 23 -- contrib/one-true-awk/proto.h | 3 - contrib/one-true-awk/run.c | 244 +--------------------- 14 files changed, 44 insertions(+), 395 deletions(-) diff --git a/contrib/one-true-awk/ChangeLog b/contrib/one-true-awk/ChangeLog index dea4ed7e3187..6ce9417c10da 100644 --- a/contrib/one-true-awk/ChangeLog +++ b/contrib/one-true-awk/ChangeLog @@ -47,30 +47,6 @@ * test/T.lilly: Remove gawk warnings from output, improves portability. -2019-10-17 Arnold D. Robbins <arnold@skeeve.com> - - Pull in systime() and strftime() from the NetBSD awk. - - * awk.1: Document the functions. - * run.c (bltin): Implement the functions. - * awk.h: Add defines for systime and strftime. - * lex.c: Add support for systime and strftime. - -2019-10-07 Arnold D. Robbins <arnold@skeeve.com> - - Integrate features from different *BSD versions of awk. - Gensub support from NetBSD. Bitwise functions from OpenBSD. - - * awk.h: Add defines for and, or, xor, compl, lshift and rshift. - * awkgram.y: Add support for gensub. - * maketab.c: Ditto. - * lex.c: Add support for gensub and bitwise functions. - * parse.c (node5, op5): New functions. - * proto.h (node5, op5): New declarations. - * run.c (bltin): Implement the bitwise functions. - (gensub): New function. - * awk.1: Document additional functions. - 2019-10-07 Arnold D. Robbins <arnold@skeeve.com> * b.c (fnematch): Change type of pbuf from unsigned char to char. diff --git a/contrib/one-true-awk/FIXES b/contrib/one-true-awk/FIXES index a043b356fafa..c4eef3bd8ea0 100644 --- a/contrib/one-true-awk/FIXES +++ b/contrib/one-true-awk/FIXES @@ -25,6 +25,20 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the second edition of the AWK book was published in September 2023. +Apr 22, 2024: + fixed regex engine gototab reallocation issue that was + introduced during the Nov 24 rewrite. Thanks to Arnold Robbins. + Fixed a scan bug in split in the case the separator is a single + character. thanks to Oguz Ismail for spotting the issue. + +Mar 10, 2024: + fixed use-after-free bug in fnematch due to adjbuf invalidating + the pointers to buf. thanks to github user caffe3 for spotting + the issue and providing a fix, and to Miguel Pineiro Jr. + for the alternative fix. + MAX_UTF_BYTES in fnematch has been replaced with awk_mb_cur_max. + thanks to Miguel Pineiro Jr. + Jan 22, 2024: Restore the ability to compile with g++. Thanks to Arnold Robbins. diff --git a/contrib/one-true-awk/awk.1 b/contrib/one-true-awk/awk.1 index 496a2a652379..ef40a0104468 100644 --- a/contrib/one-true-awk/awk.1 +++ b/contrib/one-true-awk/awk.1 @@ -305,25 +305,6 @@ and .B gsub return the number of replacements. .TP -\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR -replaces instances of -.I pat -in -.I target -with -.IR repl . -If -.I how -is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise, -.I how -is a number indicating which occurrence to replace. If no -.IR target , -use -.BR $0 . -Return the resulting string; -.I target -is not modified. -.TP .BI sprintf( fmt , " expr" , " ...\fB) the string resulting from formatting .I expr ... @@ -332,28 +313,6 @@ according to the format .IR fmt . .TP -.B systime() -returns the current date and time as a standard -``seconds since the epoch'' value. -.TP -.BI strftime( fmt ", " timestamp\^ ) -formats -.I timestamp -(a value in seconds since the epoch) -according to -.IR fmt , -which is a format string as supported by -.IR strftime (3). -Both -.I timestamp -and -.I fmt -may be omitted; if no -.IR timestamp , -the current time of day is used, and if no -.IR fmt , -a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used. -.TP .BI system( cmd ) executes .I cmd @@ -413,17 +372,6 @@ In all cases, returns 1 for a successful input, 0 for end of file, and \-1 for an error. .PP -The functions -.BR compl , -.BR and , -.BR or , -.BR xor , -.BR lshift , -and -.B rshift -peform the corresponding bitwise operations on their -operands, which are first truncated to integer. -.PP Patterns are arbitrary Boolean combinations (with .BR "! || &&" ) diff --git a/contrib/one-true-awk/awk.h b/contrib/one-true-awk/awk.h index 740447ee2167..76180e47f16a 100644 --- a/contrib/one-true-awk/awk.h +++ b/contrib/one-true-awk/awk.h @@ -154,14 +154,6 @@ extern Cell *symtabloc; /* SYMTAB */ #define FTOUPPER 12 #define FTOLOWER 13 #define FFLUSH 14 -#define FAND 15 -#define FFOR 16 -#define FXOR 17 -#define FCOMPL 18 -#define FLSHIFT 19 -#define FRSHIFT 20 -#define FSYSTIME 21 -#define FSTRFTIME 22 /* Node: parse tree is made of nodes, with Cell's at bottom */ diff --git a/contrib/one-true-awk/awkgram.y b/contrib/one-true-awk/awkgram.y index 233253a4307b..db804e117e19 100644 --- a/contrib/one-true-awk/awkgram.y +++ b/contrib/one-true-awk/awkgram.y @@ -53,7 +53,7 @@ Node *arglist = 0; /* list of args for current function */ %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO %token <i> AND BOR APPEND EQ GE GT LE LT NE IN %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC -%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE +%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE %token <i> ADD MINUS MULT DIVIDE MOD %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ %token <i> PRINT PRINTF SPRINTF @@ -377,24 +377,6 @@ term: | INCR var { $$ = op1(PREINCR, $2); } | var DECR { $$ = op1(POSTDECR, $1); } | var INCR { $$ = op1(POSTINCR, $1); } - | GENSUB '(' reg_expr comma pattern comma pattern ')' - { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } - | GENSUB '(' pattern comma pattern comma pattern ')' - { if (constnode($3)) { - $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); - free($3); - } else - $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); - } - | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' - { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } - | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' - { if (constnode($3)) { - $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); - free($3); - } else - $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); - } | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } diff --git a/contrib/one-true-awk/b.c b/contrib/one-true-awk/b.c index 4c438fab4cd4..f650269753c7 100644 --- a/contrib/one-true-awk/b.c +++ b/contrib/one-true-awk/b.c @@ -651,8 +651,8 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem if (tab->inuse + 1 >= tab->allocated) resize_gototab(f, state); - f->gototab[state].entries[f->gototab[state].inuse-1].ch = ch; - f->gototab[state].entries[f->gototab[state].inuse-1].state = val; + f->gototab[state].entries[f->gototab[state].inuse].ch = ch; + f->gototab[state].entries[f->gototab[state].inuse].state = val; f->gototab[state].inuse++; return val; } else { @@ -677,9 +677,9 @@ static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab implem gtt *tab = & f->gototab[state]; if (tab->inuse + 1 >= tab->allocated) resize_gototab(f, state); - ++tab->inuse; f->gototab[state].entries[tab->inuse].ch = ch; f->gototab[state].entries[tab->inuse].state = val; + ++tab->inuse; qsort(f->gototab[state].entries, f->gototab[state].inuse, sizeof(gtte), entry_cmp); @@ -830,8 +830,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ } -#define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long - /* * NAME * fnematch @@ -868,16 +866,28 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) do { /* - * Call u8_rune with at least MAX_UTF_BYTES ahead in + * Call u8_rune with at least awk_mb_cur_max ahead in * the buffer until EOF interferes. */ - if (k - j < MAX_UTF_BYTES) { - if (k + MAX_UTF_BYTES > buf + bufsize) { + if (k - j < awk_mb_cur_max) { + if (k + awk_mb_cur_max > buf + bufsize) { + char *obuf = buf; adjbuf((char **) &buf, &bufsize, - bufsize + MAX_UTF_BYTES, + bufsize + awk_mb_cur_max, quantum, 0, "fnematch"); + + /* buf resized, maybe moved. update pointers */ + *pbufsize = bufsize; + if (obuf != buf) { + i = buf + (i - obuf); + j = buf + (j - obuf); + k = buf + (k - obuf); + *pbuf = buf; + if (patlen) + patbeg = buf + (patbeg - obuf); + } } - for (n = MAX_UTF_BYTES ; n > 0; n--) { + for (n = awk_mb_cur_max ; n > 0; n--) { *k++ = (c = getc(f)) != EOF ? c : 0; if (c == EOF) { if (ferror(f)) @@ -914,10 +924,6 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) s = 2; } while (1); - /* adjbuf() may have relocated a resized buffer. Inform the world. */ - *pbuf = buf; - *pbufsize = bufsize; - if (patlen) { /* * Under no circumstances is the last character fed to diff --git a/contrib/one-true-awk/bugs-fixed/REGRESS b/contrib/one-true-awk/bugs-fixed/REGRESS index acdbeebb6271..30bdc7cd5c0f 100755 --- a/contrib/one-true-awk/bugs-fixed/REGRESS +++ b/contrib/one-true-awk/bugs-fixed/REGRESS @@ -27,6 +27,6 @@ do then rm -f $OUT else - echo '++++ $i failed!' + echo "+++ $i failed!" fi done diff --git a/contrib/one-true-awk/bugs-fixed/system-status.ok2 b/contrib/one-true-awk/bugs-fixed/system-status.ok2 new file mode 100644 index 000000000000..f1f631e1cb33 --- /dev/null +++ b/contrib/one-true-awk/bugs-fixed/system-status.ok2 @@ -0,0 +1,3 @@ +normal status 42 +death by signal status 257 +death by signal with core dump status 262 diff --git a/contrib/one-true-awk/lex.c b/contrib/one-true-awk/lex.c index 141cc81d2b59..0473a338c906 100644 --- a/contrib/one-true-awk/lex.c +++ b/contrib/one-true-awk/lex.c @@ -47,11 +47,9 @@ const Keyword keywords[] = { /* keep sorted: binary searched */ { "BEGIN", XBEGIN, XBEGIN }, { "END", XEND, XEND }, { "NF", VARNF, VARNF }, - { "and", FAND, BLTIN }, { "atan2", FATAN, BLTIN }, { "break", BREAK, BREAK }, { "close", CLOSE, CLOSE }, - { "compl", FCOMPL, BLTIN }, { "continue", CONTINUE, CONTINUE }, { "cos", FCOS, BLTIN }, { "delete", DELETE, DELETE }, @@ -63,7 +61,6 @@ const Keyword keywords[] = { /* keep sorted: binary searched */ { "for", FOR, FOR }, { "func", FUNC, FUNC }, { "function", FUNC, FUNC }, - { "gensub", GENSUB, GENSUB }, { "getline", GETLINE, GETLINE }, { "gsub", GSUB, GSUB }, { "if", IF, IF }, @@ -72,30 +69,24 @@ const Keyword keywords[] = { /* keep sorted: binary searched */ { "int", FINT, BLTIN }, { "length", FLENGTH, BLTIN }, { "log", FLOG, BLTIN }, - { "lshift", FLSHIFT, BLTIN }, { "match", MATCHFCN, MATCHFCN }, { "next", NEXT, NEXT }, { "nextfile", NEXTFILE, NEXTFILE }, - { "or", FFOR, BLTIN }, { "print", PRINT, PRINT }, { "printf", PRINTF, PRINTF }, { "rand", FRAND, BLTIN }, { "return", RETURN, RETURN }, - { "rshift", FRSHIFT, BLTIN }, { "sin", FSIN, BLTIN }, { "split", SPLIT, SPLIT }, { "sprintf", SPRINTF, SPRINTF }, { "sqrt", FSQRT, BLTIN }, { "srand", FSRAND, BLTIN }, - { "strftime", FSTRFTIME, BLTIN }, { "sub", SUB, SUB }, { "substr", SUBSTR, SUBSTR }, { "system", FSYSTEM, BLTIN }, - { "systime", FSYSTIME, BLTIN }, { "tolower", FTOLOWER, BLTIN }, { "toupper", FTOUPPER, BLTIN }, { "while", WHILE, WHILE }, - { "xor", FXOR, BLTIN }, }; #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } diff --git a/contrib/one-true-awk/main.c b/contrib/one-true-awk/main.c index 73af89ec1058..0e70288a92f3 100644 --- a/contrib/one-true-awk/main.c +++ b/contrib/one-true-awk/main.c @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20240122"; +const char *version = "version 20240422"; #define DEBUG #include <stdio.h> diff --git a/contrib/one-true-awk/maketab.c b/contrib/one-true-awk/maketab.c index 3a80c87725ac..3747efa03702 100644 --- a/contrib/one-true-awk/maketab.c +++ b/contrib/one-true-awk/maketab.c @@ -104,7 +104,6 @@ struct xx { ARG, "arg", "arg" }, { VARNF, "getnf", "NF" }, { GETLINE, "awkgetline", "getline" }, - { GENSUB, "gensub", "gensub" }, { 0, "", "" }, }; diff --git a/contrib/one-true-awk/parse.c b/contrib/one-true-awk/parse.c index 2b7fd1928930..14608be7570a 100644 --- a/contrib/one-true-awk/parse.c +++ b/contrib/one-true-awk/parse.c @@ -93,20 +93,6 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e) return(x); } -Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f) -{ - Node *x; - - x = nodealloc(5); - x->nobj = a; - x->narg[0] = b; - x->narg[1] = c; - x->narg[2] = d; - x->narg[3] = e; - x->narg[4] = f; - return(x); -} - Node *stat1(int a, Node *b) { Node *x; @@ -179,15 +165,6 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e) return(x); } -Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f) -{ - Node *x; - - x = node5(a,b,c,d,e,f); - x->ntype = NEXPR; - return(x); -} - Node *celltonode(Cell *a, int b) { Node *x; diff --git a/contrib/one-true-awk/proto.h b/contrib/one-true-awk/proto.h index b44f9e7a5599..ed63e7875da3 100644 --- a/contrib/one-true-awk/proto.h +++ b/contrib/one-true-awk/proto.h @@ -73,14 +73,12 @@ extern Node *node1(int, Node *); extern Node *node2(int, Node *, Node *); extern Node *node3(int, Node *, Node *, Node *); extern Node *node4(int, Node *, Node *, Node *, Node *); -extern Node *node5(int, Node *, Node *, Node *, Node *, Node *); extern Node *stat3(int, Node *, Node *, Node *); extern Node *op2(int, Node *, Node *); extern Node *op1(int, Node *); extern Node *stat1(int, Node *); extern Node *op3(int, Node *, Node *, Node *); extern Node *op4(int, Node *, Node *, Node *, Node *); -extern Node *op5(int, Node *, Node *, Node *, Node *, Node *); extern Node *stat2(int, Node *, Node *); extern Node *stat4(int, Node *, Node *, Node *, Node *); extern Node *celltonode(Cell *, int); @@ -199,7 +197,6 @@ extern const char *filename(FILE *); extern Cell *closefile(Node **, int); extern void closeall(void); extern Cell *dosub(Node **, int); -extern Cell *gensub(Node **, int); extern FILE *popen(const char *, const char *); extern int pclose(FILE *); diff --git a/contrib/one-true-awk/run.c b/contrib/one-true-awk/run.c index ede3ba7d9827..99306992df41 100644 --- a/contrib/one-true-awk/run.c +++ b/contrib/one-true-awk/run.c @@ -1827,7 +1827,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ for (;;) { n++; t = s; - while (*s != sep && *s != '\n' && *s != '\0') + while (*s != sep && *s != '\0') s++; temp = *s; setptr(s, '\0'); @@ -2062,14 +2062,12 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis { Cell *x, *y; Awkfloat u; - int t, sz; + int t; Awkfloat tmp; - char *buf, *fmt; + char *buf; Node *nextarg; FILE *fp; int status = 0; - time_t tv; - struct tm *tm; int estatus = 0; t = ptoi(a[0]); @@ -2111,64 +2109,6 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis nextarg = nextarg->nnext; } break; - case FCOMPL: - u = ~((int)getfval(x)); - break; - case FAND: - if (nextarg == 0) { - WARNING("and requires two arguments; returning 0"); - u = 0; - break; - } - y = execute(a[1]->nnext); - u = ((int)getfval(x)) & ((int)getfval(y)); - tempfree(y); - nextarg = nextarg->nnext; - break; - case FFOR: - if (nextarg == 0) { - WARNING("or requires two arguments; returning 0"); - u = 0; - break; - } - y = execute(a[1]->nnext); - u = ((int)getfval(x)) | ((int)getfval(y)); - tempfree(y); - nextarg = nextarg->nnext; - break; - case FXOR: - if (nextarg == 0) { - WARNING("xor requires two arguments; returning 0"); - u = 0; - break; - } - y = execute(a[1]->nnext); - u = ((int)getfval(x)) ^ ((int)getfval(y)); - tempfree(y); - nextarg = nextarg->nnext; - break; - case FLSHIFT: - if (nextarg == 0) { - WARNING("lshift requires two arguments; returning 0"); - u = 0; - break; - } - y = execute(a[1]->nnext); - u = ((int)getfval(x)) << ((int)getfval(y)); - tempfree(y); - nextarg = nextarg->nnext; - break; - case FRSHIFT: - if (nextarg == 0) { - WARNING("rshift requires two arguments; returning 0"); - u = 0; - break; - } - y = execute(a[1]->nnext); - u = ((int)getfval(x)) >> ((int)getfval(y)); - tempfree(y); - nextarg = nextarg->nnext; - break; case FSYSTEM: fflush(stdout); /* in case something is buffered already */ estatus = status = system(getsval(x)); @@ -2223,41 +2163,6 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis else u = fflush(fp); break; - case FSYSTIME: - u = time((time_t *) 0); - break; - case FSTRFTIME: - /* strftime([format [,timestamp]]) */ - if (nextarg) { - y = execute(nextarg); - nextarg = nextarg->nnext; - tv = (time_t) getfval(y); - tempfree(y); - } else - tv = time((time_t *) 0); - tm = localtime(&tv); - if (tm == NULL) - FATAL("bad time %ld", (long)tv); - - if (isrec(x)) { - /* format argument not provided, use default */ - fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); - } else - fmt = tostring(getsval(x)); - - sz = 32; - buf = NULL; - do { - if ((buf = realloc(buf, (sz *= 2))) == NULL) - FATAL("out of memory in strftime"); - } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); - - y = gettemp(); - setsval(y, buf); - free(fmt); - free(buf); - - return y; default: /* can't happen */ FATAL("illegal function type %d", t); break; @@ -2501,7 +2406,7 @@ void backsub(char **pb_ptr, const char **sptr_ptr); Cell *dosub(Node **a, int subop) /* sub and gsub */ { fa *pfa; - int tempstat; + int tempstat = 0; char *repl; Cell *x; @@ -2637,147 +2542,6 @@ next_search: return x; } -Cell *gensub(Node **a, int nnn) /* global selective substitute */ - /* XXX incomplete - doesn't support backreferences \0 ... \9 */ -{ - Cell *x, *y, *res, *h; - char *rptr; - const char *sptr; - char *buf, *pb; - const char *t, *q; - fa *pfa; - int mflag, tempstat, num, whichm; - int bufsz = recsize; - - if ((buf = malloc(bufsz)) == NULL) - FATAL("out of memory in gensub"); - mflag = 0; /* if mflag == 0, can replace empty string */ - num = 0; - x = execute(a[4]); /* source string */ - t = getsval(x); - res = copycell(x); /* target string - initially copy of source */ - res->csub = CTEMP; /* result values are temporary */ - if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ - pfa = (fa *) a[1]; /* regular expression */ - else { - y = execute(a[1]); - pfa = makedfa(getsval(y), 1); - tempfree(y); - } - y = execute(a[2]); /* replacement string */ - h = execute(a[3]); /* which matches should be replaced */ - sptr = getsval(h); - if (sptr[0] == 'g' || sptr[0] == 'G') - whichm = -1; - else { - /* - * The specified number is index of replacement, starting - * from 1. GNU awk treats index lower than 0 same as - * 1, we do same for compatibility. - */ - whichm = (int) getfval(h) - 1; - if (whichm < 0) - whichm = 0; - } - tempfree(h); - - if (pmatch(pfa, t)) { - char *sl; - - tempstat = pfa->initstat; - pfa->initstat = 2; - pb = buf; - rptr = getsval(y); - /* - * XXX if there are any backreferences in subst string, - * complain now. - */ - for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { - if (strchr("0123456789", sl[1])) { - FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); - } - } - - do { - if (whichm >= 0 && whichm != num) { - num++; - adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); - - /* copy the part of string up to and including - * match to output buffer */ - while (t < patbeg + patlen) - *pb++ = *t++; - continue; - } - - if (patlen == 0 && *patbeg != 0) { /* matched empty string */ - if (mflag == 0) { /* can replace empty */ - num++; - sptr = rptr; - while (*sptr != 0) { - adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); - if (*sptr == '\\') { - backsub(&pb, &sptr); - } else if (*sptr == '&') { - sptr++; - adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); - for (q = patbeg; q < patbeg+patlen; ) - *pb++ = *q++; - } else - *pb++ = *sptr++; - } - } - if (*t == 0) /* at end */ - goto done; - adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); - *pb++ = *t++; - if (pb > buf + bufsz) /* BUG: not sure of this test */ - FATAL("gensub result0 %.30s too big; can't happen", buf); - mflag = 0; - } - else { /* matched nonempty string */ - num++; - sptr = t; - adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); - while (sptr < patbeg) - *pb++ = *sptr++; - sptr = rptr; - while (*sptr != 0) { - adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); - if (*sptr == '\\') { - backsub(&pb, &sptr); - } else if (*sptr == '&') { - sptr++; - adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); - for (q = patbeg; q < patbeg+patlen; ) - *pb++ = *q++; - } else - *pb++ = *sptr++; - } - t = patbeg + patlen; - if (patlen == 0 || *t == 0 || *(t-1) == 0) - goto done; - if (pb > buf + bufsz) - FATAL("gensub result1 %.30s too big; can't happen", buf); - mflag = 1; - } - } while (pmatch(pfa,t)); - sptr = t; - adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); - while ((*pb++ = *sptr++) != 0) - ; - done: if (pb > buf + bufsz) - FATAL("gensub result2 %.30s too big; can't happen", buf); - *pb = '\0'; - setsval(res, buf); - pfa->initstat = tempstat; - } - tempfree(x); - tempfree(y); - free(buf); - return(res); -} - void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ { /* sptr[0] == '\\' */ char *pb = *pb_ptr;