git: 284244ac3683 - stable/14 - awk: Merge in bsd-feature branch of OTA from 20240422 (31bb33a32f71)
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 20 May 2024 20:06:43 UTC
The branch stable/14 has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=284244ac36832b71f2e9e128f45ce132a3c293b0 commit 284244ac36832b71f2e9e128f45ce132a3c293b0 Author: Warner Losh <imp@FreeBSD.org> AuthorDate: 2024-05-14 18:15:43 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2024-05-20 19:29:26 +0000 awk: Merge in bsd-feature branch of OTA from 20240422 (31bb33a32f71) In the last 2nd edition import, I mistakenly grabbed from the 'main' branch of upstream rather than the bsd-feature branch. This means that we have a regression in awk from that point forward: all the BSD-specific bit functions (and a few others) were dropped. This restores it at the same level. MFC After: 1 day Sponsored by: Netflix (cherry picked from commit eb690a0576e8cf5412124477eace3bb765068c3d) --- contrib/one-true-awk/ChangeLog | 24 +++++ contrib/one-true-awk/awk.1 | 52 +++++++++ contrib/one-true-awk/awk.h | 8 ++ contrib/one-true-awk/awkgram.y | 20 +++- contrib/one-true-awk/lex.c | 9 ++ contrib/one-true-awk/maketab.c | 1 + contrib/one-true-awk/parse.c | 23 ++++ contrib/one-true-awk/proto.h | 3 + contrib/one-true-awk/run.c | 240 ++++++++++++++++++++++++++++++++++++++++- 9 files changed, 377 insertions(+), 3 deletions(-) diff --git a/contrib/one-true-awk/ChangeLog b/contrib/one-true-awk/ChangeLog index 6ce9417c10da..dea4ed7e3187 100644 --- a/contrib/one-true-awk/ChangeLog +++ b/contrib/one-true-awk/ChangeLog @@ -47,6 +47,30 @@ * test/T.lilly: Remove gawk warnings from output, improves portability. +2019-10-17 Arnold D. Robbins <arnold@skeeve.com> + + Pull in systime() and strftime() from the NetBSD awk. + + * awk.1: Document the functions. + * run.c (bltin): Implement the functions. + * awk.h: Add defines for systime and strftime. + * lex.c: Add support for systime and strftime. + +2019-10-07 Arnold D. Robbins <arnold@skeeve.com> + + Integrate features from different *BSD versions of awk. + Gensub support from NetBSD. Bitwise functions from OpenBSD. + + * awk.h: Add defines for and, or, xor, compl, lshift and rshift. + * awkgram.y: Add support for gensub. + * maketab.c: Ditto. + * lex.c: Add support for gensub and bitwise functions. + * parse.c (node5, op5): New functions. + * proto.h (node5, op5): New declarations. + * run.c (bltin): Implement the bitwise functions. + (gensub): New function. + * awk.1: Document additional functions. + 2019-10-07 Arnold D. Robbins <arnold@skeeve.com> * b.c (fnematch): Change type of pbuf from unsigned char to char. diff --git a/contrib/one-true-awk/awk.1 b/contrib/one-true-awk/awk.1 index ef40a0104468..496a2a652379 100644 --- a/contrib/one-true-awk/awk.1 +++ b/contrib/one-true-awk/awk.1 @@ -305,6 +305,25 @@ and .B gsub return the number of replacements. .TP +\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR +replaces instances of +.I pat +in +.I target +with +.IR repl . +If +.I how +is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise, +.I how +is a number indicating which occurrence to replace. If no +.IR target , +use +.BR $0 . +Return the resulting string; +.I target +is not modified. +.TP .BI sprintf( fmt , " expr" , " ...\fB) the string resulting from formatting .I expr ... @@ -313,6 +332,28 @@ according to the format .IR fmt . .TP +.B systime() +returns the current date and time as a standard +``seconds since the epoch'' value. +.TP +.BI strftime( fmt ", " timestamp\^ ) +formats +.I timestamp +(a value in seconds since the epoch) +according to +.IR fmt , +which is a format string as supported by +.IR strftime (3). +Both +.I timestamp +and +.I fmt +may be omitted; if no +.IR timestamp , +the current time of day is used, and if no +.IR fmt , +a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used. +.TP .BI system( cmd ) executes .I cmd @@ -372,6 +413,17 @@ In all cases, returns 1 for a successful input, 0 for end of file, and \-1 for an error. .PP +The functions +.BR compl , +.BR and , +.BR or , +.BR xor , +.BR lshift , +and +.B rshift +peform the corresponding bitwise operations on their +operands, which are first truncated to integer. +.PP Patterns are arbitrary Boolean combinations (with .BR "! || &&" ) diff --git a/contrib/one-true-awk/awk.h b/contrib/one-true-awk/awk.h index 76180e47f16a..740447ee2167 100644 --- a/contrib/one-true-awk/awk.h +++ b/contrib/one-true-awk/awk.h @@ -154,6 +154,14 @@ extern Cell *symtabloc; /* SYMTAB */ #define FTOUPPER 12 #define FTOLOWER 13 #define FFLUSH 14 +#define FAND 15 +#define FFOR 16 +#define FXOR 17 +#define FCOMPL 18 +#define FLSHIFT 19 +#define FRSHIFT 20 +#define FSYSTIME 21 +#define FSTRFTIME 22 /* Node: parse tree is made of nodes, with Cell's at bottom */ diff --git a/contrib/one-true-awk/awkgram.y b/contrib/one-true-awk/awkgram.y index db804e117e19..233253a4307b 100644 --- a/contrib/one-true-awk/awkgram.y +++ b/contrib/one-true-awk/awkgram.y @@ -53,7 +53,7 @@ Node *arglist = 0; /* list of args for current function */ %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO %token <i> AND BOR APPEND EQ GE GT LE LT NE IN %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC -%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE +%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE %token <i> ADD MINUS MULT DIVIDE MOD %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ %token <i> PRINT PRINTF SPRINTF @@ -377,6 +377,24 @@ term: | INCR var { $$ = op1(PREINCR, $2); } | var DECR { $$ = op1(POSTDECR, $1); } | var INCR { $$ = op1(POSTINCR, $1); } + | GENSUB '(' reg_expr comma pattern comma pattern ')' + { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } + | GENSUB '(' pattern comma pattern comma pattern ')' + { if (constnode($3)) { + $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); + free($3); + } else + $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); + } + | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' + { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } + | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' + { if (constnode($3)) { + $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); + free($3); + } else + $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); + } | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } diff --git a/contrib/one-true-awk/lex.c b/contrib/one-true-awk/lex.c index 0473a338c906..141cc81d2b59 100644 --- a/contrib/one-true-awk/lex.c +++ b/contrib/one-true-awk/lex.c @@ -47,9 +47,11 @@ const Keyword keywords[] = { /* keep sorted: binary searched */ { "BEGIN", XBEGIN, XBEGIN }, { "END", XEND, XEND }, { "NF", VARNF, VARNF }, + { "and", FAND, BLTIN }, { "atan2", FATAN, BLTIN }, { "break", BREAK, BREAK }, { "close", CLOSE, CLOSE }, + { "compl", FCOMPL, BLTIN }, { "continue", CONTINUE, CONTINUE }, { "cos", FCOS, BLTIN }, { "delete", DELETE, DELETE }, @@ -61,6 +63,7 @@ const Keyword keywords[] = { /* keep sorted: binary searched */ { "for", FOR, FOR }, { "func", FUNC, FUNC }, { "function", FUNC, FUNC }, + { "gensub", GENSUB, GENSUB }, { "getline", GETLINE, GETLINE }, { "gsub", GSUB, GSUB }, { "if", IF, IF }, @@ -69,24 +72,30 @@ const Keyword keywords[] = { /* keep sorted: binary searched */ { "int", FINT, BLTIN }, { "length", FLENGTH, BLTIN }, { "log", FLOG, BLTIN }, + { "lshift", FLSHIFT, BLTIN }, { "match", MATCHFCN, MATCHFCN }, { "next", NEXT, NEXT }, { "nextfile", NEXTFILE, NEXTFILE }, + { "or", FFOR, BLTIN }, { "print", PRINT, PRINT }, { "printf", PRINTF, PRINTF }, { "rand", FRAND, BLTIN }, { "return", RETURN, RETURN }, + { "rshift", FRSHIFT, BLTIN }, { "sin", FSIN, BLTIN }, { "split", SPLIT, SPLIT }, { "sprintf", SPRINTF, SPRINTF }, { "sqrt", FSQRT, BLTIN }, { "srand", FSRAND, BLTIN }, + { "strftime", FSTRFTIME, BLTIN }, { "sub", SUB, SUB }, { "substr", SUBSTR, SUBSTR }, { "system", FSYSTEM, BLTIN }, + { "systime", FSYSTIME, BLTIN }, { "tolower", FTOLOWER, BLTIN }, { "toupper", FTOUPPER, BLTIN }, { "while", WHILE, WHILE }, + { "xor", FXOR, BLTIN }, }; #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } diff --git a/contrib/one-true-awk/maketab.c b/contrib/one-true-awk/maketab.c index 3747efa03702..3a80c87725ac 100644 --- a/contrib/one-true-awk/maketab.c +++ b/contrib/one-true-awk/maketab.c @@ -104,6 +104,7 @@ struct xx { ARG, "arg", "arg" }, { VARNF, "getnf", "NF" }, { GETLINE, "awkgetline", "getline" }, + { GENSUB, "gensub", "gensub" }, { 0, "", "" }, }; diff --git a/contrib/one-true-awk/parse.c b/contrib/one-true-awk/parse.c index 14608be7570a..2b7fd1928930 100644 --- a/contrib/one-true-awk/parse.c +++ b/contrib/one-true-awk/parse.c @@ -93,6 +93,20 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e) return(x); } +Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f) +{ + Node *x; + + x = nodealloc(5); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + x->narg[3] = e; + x->narg[4] = f; + return(x); +} + Node *stat1(int a, Node *b) { Node *x; @@ -165,6 +179,15 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e) return(x); } +Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f) +{ + Node *x; + + x = node5(a,b,c,d,e,f); + x->ntype = NEXPR; + return(x); +} + Node *celltonode(Cell *a, int b) { Node *x; diff --git a/contrib/one-true-awk/proto.h b/contrib/one-true-awk/proto.h index ed63e7875da3..b44f9e7a5599 100644 --- a/contrib/one-true-awk/proto.h +++ b/contrib/one-true-awk/proto.h @@ -73,12 +73,14 @@ extern Node *node1(int, Node *); extern Node *node2(int, Node *, Node *); extern Node *node3(int, Node *, Node *, Node *); extern Node *node4(int, Node *, Node *, Node *, Node *); +extern Node *node5(int, Node *, Node *, Node *, Node *, Node *); extern Node *stat3(int, Node *, Node *, Node *); extern Node *op2(int, Node *, Node *); extern Node *op1(int, Node *); extern Node *stat1(int, Node *); extern Node *op3(int, Node *, Node *, Node *); extern Node *op4(int, Node *, Node *, Node *, Node *); +extern Node *op5(int, Node *, Node *, Node *, Node *, Node *); extern Node *stat2(int, Node *, Node *); extern Node *stat4(int, Node *, Node *, Node *, Node *); extern Node *celltonode(Cell *, int); @@ -197,6 +199,7 @@ extern const char *filename(FILE *); extern Cell *closefile(Node **, int); extern void closeall(void); extern Cell *dosub(Node **, int); +extern Cell *gensub(Node **, int); extern FILE *popen(const char *, const char *); extern int pclose(FILE *); diff --git a/contrib/one-true-awk/run.c b/contrib/one-true-awk/run.c index 99306992df41..9c61b1a1c558 100644 --- a/contrib/one-true-awk/run.c +++ b/contrib/one-true-awk/run.c @@ -2062,12 +2062,14 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis { Cell *x, *y; Awkfloat u; - int t; + int t, sz; Awkfloat tmp; - char *buf; + char *buf, *fmt; Node *nextarg; FILE *fp; int status = 0; + time_t tv; + struct tm *tm; int estatus = 0; t = ptoi(a[0]); @@ -2109,6 +2111,64 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis nextarg = nextarg->nnext; } break; + case FCOMPL: + u = ~((int)getfval(x)); + break; + case FAND: + if (nextarg == 0) { + WARNING("and requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) & ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FFOR: + if (nextarg == 0) { + WARNING("or requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) | ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FXOR: + if (nextarg == 0) { + WARNING("xor requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) ^ ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FLSHIFT: + if (nextarg == 0) { + WARNING("lshift requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) << ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FRSHIFT: + if (nextarg == 0) { + WARNING("rshift requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) >> ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; case FSYSTEM: fflush(stdout); /* in case something is buffered already */ estatus = status = system(getsval(x)); @@ -2163,6 +2223,41 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis else u = fflush(fp); break; + case FSYSTIME: + u = time((time_t *) 0); + break; + case FSTRFTIME: + /* strftime([format [,timestamp]]) */ + if (nextarg) { + y = execute(nextarg); + nextarg = nextarg->nnext; + tv = (time_t) getfval(y); + tempfree(y); + } else + tv = time((time_t *) 0); + tm = localtime(&tv); + if (tm == NULL) + FATAL("bad time %ld", (long)tv); + + if (isrec(x)) { + /* format argument not provided, use default */ + fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); + } else + fmt = tostring(getsval(x)); + + sz = 32; + buf = NULL; + do { + if ((buf = realloc(buf, (sz *= 2))) == NULL) + FATAL("out of memory in strftime"); + } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); + + y = gettemp(); + setsval(y, buf); + free(fmt); + free(buf); + + return y; default: /* can't happen */ FATAL("illegal function type %d", t); break; @@ -2542,6 +2637,147 @@ next_search: return x; } +Cell *gensub(Node **a, int nnn) /* global selective substitute */ + /* XXX incomplete - doesn't support backreferences \0 ... \9 */ +{ + Cell *x, *y, *res, *h; + char *rptr; + const char *sptr; + char *buf, *pb; + const char *t, *q; + fa *pfa; + int mflag, tempstat, num, whichm; + int bufsz = recsize; + + if ((buf = malloc(bufsz)) == NULL) + FATAL("out of memory in gensub"); + mflag = 0; /* if mflag == 0, can replace empty string */ + num = 0; + x = execute(a[4]); /* source string */ + t = getsval(x); + res = copycell(x); /* target string - initially copy of source */ + res->csub = CTEMP; /* result values are temporary */ + if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y); + } + y = execute(a[2]); /* replacement string */ + h = execute(a[3]); /* which matches should be replaced */ + sptr = getsval(h); + if (sptr[0] == 'g' || sptr[0] == 'G') + whichm = -1; + else { + /* + * The specified number is index of replacement, starting + * from 1. GNU awk treats index lower than 0 same as + * 1, we do same for compatibility. + */ + whichm = (int) getfval(h) - 1; + if (whichm < 0) + whichm = 0; + } + tempfree(h); + + if (pmatch(pfa, t)) { + char *sl; + + tempstat = pfa->initstat; + pfa->initstat = 2; + pb = buf; + rptr = getsval(y); + /* + * XXX if there are any backreferences in subst string, + * complain now. + */ + for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { + if (strchr("0123456789", sl[1])) { + FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); + } + } + + do { + if (whichm >= 0 && whichm != num) { + num++; + adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); + + /* copy the part of string up to and including + * match to output buffer */ + while (t < patbeg + patlen) + *pb++ = *t++; + continue; + } + + if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (mflag == 0) { /* can replace empty */ + num++; + sptr = rptr; + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + } + if (*t == 0) /* at end */ + goto done; + adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); + *pb++ = *t++; + if (pb > buf + bufsz) /* BUG: not sure of this test */ + FATAL("gensub result0 %.30s too big; can't happen", buf); + mflag = 0; + } + else { /* matched nonempty string */ + num++; + sptr = t; + adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); + while (sptr < patbeg) + *pb++ = *sptr++; + sptr = rptr; + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + t = patbeg + patlen; + if (patlen == 0 || *t == 0 || *(t-1) == 0) + goto done; + if (pb > buf + bufsz) + FATAL("gensub result1 %.30s too big; can't happen", buf); + mflag = 1; + } + } while (pmatch(pfa,t)); + sptr = t; + adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); + while ((*pb++ = *sptr++) != 0) + ; + done: if (pb > buf + bufsz) + FATAL("gensub result2 %.30s too big; can't happen", buf); + *pb = '\0'; + setsval(res, buf); + pfa->initstat = tempstat; + } + tempfree(x); + tempfree(y); + free(buf); + return(res); +} + void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ { /* sptr[0] == '\\' */ char *pb = *pb_ptr;