git: 666abb0888d2 - stable/13 - one-true-awk: import 20210221 (1e4bc42c53a1) which fixes a number of bugs

Warner Losh imp at FreeBSD.org
Sat Jul 10 17:08:33 UTC 2021


The branch stable/13 has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=666abb0888d277e82c6468851e015798e9a7629f

commit 666abb0888d277e82c6468851e015798e9a7629f
Author:     Warner Losh <imp at FreeBSD.org>
AuthorDate: 2021-07-07 23:30:35 +0000
Commit:     Warner Losh <imp at FreeBSD.org>
CommitDate: 2021-07-10 17:07:26 +0000

    one-true-awk: import 20210221 (1e4bc42c53a1) which fixes a number of bugs
    
    Import the latest bsd-features branch of the one-true-awk upstream:
    
    o Move to bison for $YACC
    o Set close-on-exec flag for file and pipe redirects that aren't std*
    o lots of little fixes to modernize ocde base
    o free sval member before setting it
    o fix a bug where a{0,3} could match aaaa
    o pull in systime and strftime from NetBSD awk
    o pull in fixes from {Net,Free,Open}BSD (normalized our code with them)
    o add BSD extensions and, or, xor, compl, lsheift, rshift (mostly a nop)
    
    Also revert a few of the trivial FreeBSD changes that were done slightly
    differently in the upstreaming process. Also, our PR database may have
    been mined by upstream for these fixes, and Mikolaj Golub may deserve
    credit for some of the fixes in this update.
    
    Suggested by:           Mikolaj Golub <to.my.trociny at gmail.com>
    PR:                     143363, 143365, 143368, 143369, 143373, 143375, 214783
    Sponsored by:           Netflix
    
    (cherry picked from commit f39dd6a9784467f0db5886012b3f4b13899be6b8)
---
 contrib/one-true-awk/ChangeLog                     | 108 +++
 contrib/one-true-awk/FIXES                         | 261 +++++-
 contrib/one-true-awk/REGRESS                       |   4 +
 contrib/one-true-awk/awk.1                         | 159 +++-
 contrib/one-true-awk/awk.h                         |  67 +-
 contrib/one-true-awk/awkgram.y                     |  48 +-
 contrib/one-true-awk/b.c                           | 438 +++++++---
 .../one-true-awk/bugs-fixed/missing-precision.ok   |   2 +-
 contrib/one-true-awk/bugs-fixed/negative-nf.ok     |   2 +-
 contrib/one-true-awk/lex.c                         |  77 +-
 contrib/one-true-awk/lib.c                         | 327 +++++---
 contrib/one-true-awk/main.c                        | 163 ++--
 contrib/one-true-awk/makefile                      |  74 +-
 contrib/one-true-awk/maketab.c                     |  66 +-
 contrib/one-true-awk/parse.c                       |  29 +-
 contrib/one-true-awk/proctab.c                     | 202 ++---
 contrib/one-true-awk/proto.h                       |  41 +-
 contrib/one-true-awk/run.c                         | 918 ++++++++++++++-------
 contrib/one-true-awk/tran.c                        | 164 ++--
 usr.bin/awk/Makefile                               |  10 +-
 20 files changed, 2192 insertions(+), 968 deletions(-)

diff --git a/contrib/one-true-awk/ChangeLog b/contrib/one-true-awk/ChangeLog
index fd03b2bbca0b..dea4ed7e3187 100644
--- a/contrib/one-true-awk/ChangeLog
+++ b/contrib/one-true-awk/ChangeLog
@@ -1,3 +1,111 @@
+2020-07-30         Arnold D. Robbins     <arnold at skeeve.com>
+
+	By fiat, we use bison for $(YACC). Trying to accommodate
+	different versions didn't work.
+
+	* makefile: Significant cleanup. Replace all ytab* references
+	with awkgram.tab.* and simplify definition of YACC.
+	* .gitignore: Remove ytab* references.
+	* b.c, lex.c, maketab.c, parse.c, run.c: Replace include of ytab.h
+	with awkgram.tab.h.
+	* lib.c, main.c, tran.c: Remove include of ytab.h, wasn't needed.
+
+2020-01-20         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* run.c (openfile): Set the close-on-exec flag for file
+	and pipe redirections that aren't stdin/stdout/stderr.
+
+2020-01-06         Arnold D. Robbins     <arnold at skeeve.com>
+
+	Minor fixes.
+	* b.c (replace_repeat): Turn init_q back into an int.
+	* lex.c (string): Use \a instead of \007.
+	* tran.c (catstr): Use snprintf instead of sprintf.
+
+2020-01-01         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* tran.c (syminit, arginit, envinit): Free sval member before
+	setting it. Thanks to valgrind.
+	* b.c: Small formatting cleanups in several routines.
+
+2019-12-27         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* b.c (replace_repeat): Fix a bug whereby a{0,3} could match
+	four a's.  Thanks to Anonymous AWK fan <awkfan77 at mailfence.com>
+	for the report. Also, minor code formatting cleanups.
+	* testdir/T.int-expr: New file.
+
+2019-12-11         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* README: Renamed to ...
+	* README.md: ... this. Cleaned up some as well,
+	including moving to Markdown.
+
+2019-11-08         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* test/T.chem: Use $oldawk instead of hardwiring 'awk'.
+	* test/T.lilly: Remove gawk warnings from output, improves
+	portability.
+
+2019-10-17         Arnold D. Robbins     <arnold at skeeve.com>
+
+	Pull in systime() and strftime() from the NetBSD awk.
+
+	* awk.1: Document the functions.
+	* run.c (bltin): Implement the functions.
+	* awk.h: Add defines for systime and strftime.
+	* lex.c: Add support for systime and strftime.
+
+2019-10-07         Arnold D. Robbins     <arnold at skeeve.com>
+
+	Integrate features from different *BSD versions of awk.
+	Gensub support from NetBSD. Bitwise functions from OpenBSD.
+
+	* awk.h: Add defines for and, or, xor, compl, lshift and rshift.
+	* awkgram.y: Add support for gensub.
+	* maketab.c: Ditto.
+	* lex.c: Add support for gensub and bitwise functions.
+	* parse.c (node5, op5): New functions.
+	* proto.h (node5, op5): New declarations.
+	* run.c (bltin): Implement the bitwise functions.
+	(gensub): New function.
+	* awk.1: Document additional functions.
+
+2019-10-07         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* b.c (fnematch): Change type of pbuf from unsigned char to char.
+	* proto.h (fnematch): Ditto.
+
+2019-10-06         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* lib.c (readrec): Allow RS a regular expression. Imported
+	the code from the NetBSD awk.
+	* b.c (fnematch): New function for implementing the feature.
+	* awk.1: Updated.
+	* main.c (version): Updated.
+
+2019-06-24         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* makefile: Revise to take into account there is no more awktest.tar,
+	add targets 'check' and 'test', and also 'testclean' to clean up
+	after test run.  Have 'clean' and 'cleaner' depend upon 'testclean'.
+
+2019-06-23         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* testdir: Extracted from awktest.tar and added to Git.
+	* awktest.tar: Removed.
+
+2019-06-06         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* awk.1: Fix a typo, minor edits.
+
+2019-06-05         Arnold D. Robbins     <arnold at skeeve.com>
+
+	* b.c (relex): Count parentheses and treat umatched right paren
+	as a literal character.
+	* awktest.tar (testdir/T.re): Added a test case.
+	* main.c (version): Updated.
+
 2019-05-29         Arnold D. Robbins     <arnold at skeeve.com>
 
 	* lib.c (isclvar): Remove check for additional '=' after
diff --git a/contrib/one-true-awk/FIXES b/contrib/one-true-awk/FIXES
index 183eaedee47d..516458eee0c1 100644
--- a/contrib/one-true-awk/FIXES
+++ b/contrib/one-true-awk/FIXES
@@ -25,6 +25,229 @@ THIS SOFTWARE.
 This file lists all bug fixes, changes, etc., made since the AWK book
 was sent to the printers in August, 1987.
 
+February 15, 2021:
+	Small fix so that awk will compile again with g++. Thanks to
+	Arnold Robbins.
+
+January 06, 2021:
+	Fix a decision bug with trailing stuff in lib.c:is_valid_number
+	after recent changes. Thanks to Ozan Yigit.
+
+December 18, 2020:
+	Fix problems converting inf and NaN values in lib.c:is_valid_number.
+	Enhance number to string conversion to do the right thing for
+	NaN and inf values.  Things are now pretty much the same as in
+	gawk.  (Found a gawk bug while we're at it.) Added a torture
+	test for these values.  Thanks to Arnold Robbins.  Allows closing
+	of PR #101.
+
+December 15, 2020:
+	Merge PR #99, which gets the right header for strcasecmp.
+	Thanks to GitHub user michaelforney.
+
+December 8, 2020:
+	Merge PR #98: Disallow hex data. Allow only +nan, -nan,
+	+inf, -inf (case independent) to give NaN and infinity values.
+	Improve things so that string to double conversion is only
+	done once, yielding something of a speedup.  This obviate
+	PR #95. Thanks to Arnold Robbins.
+
+December 3, 2020:
+	Fix to argument parsing to avoid printing spurious newlines.
+	Thanks to Todd Miller. Merges PR #97.
+
+October 13, 2020:
+	Add casts before all the calls to malloc/calloc/realloc in order
+	to get it to compile with g++. Thanks to Arnold Robbins.
+
+August 16, 2020:
+	Additional fixes for DJGPP. Thanks to Eli Zaretskii for
+	the testing.
+
+August 7, 2020:
+	Merge PR #93, which adds casts to (void*) for debug prints
+	using the %p format specifier. Thanks to GitHub user YongHaoWu
+	("Chris") for the fixes.
+
+August 4, 2020:
+	In run.c, use non-restartable multibyte routines to attain
+	portability to DJGPP. Should fix Issue 92. Thanks to Albert Wik
+	for the report and to Todd Miller for the suggested fix.
+
+July 30, 2020:
+	Merge PRs 88-91 which fix small bugs. Thanks to Todd Miller and
+	Tim van der Molen for the fixes.
+
+	In order to make life easier, we move exclusively to bison
+	as the parser generator.
+
+July 2, 2020:
+	Merge PRs 85 and 86 which fix regressions. Thanks to
+	Tim van der Molen for the fixes.
+
+June 25, 2020:
+	Merge PRs 82 and 84. The latter fixes issue #83. Thanks to
+	Todd Miller and awkfan77.
+
+June 12, 2020:
+	Clear errno before calling errcheck to avoid any spurious errors
+	left over from previous calls that may have set it. Thanks to
+	Todd Miller for the fix, from PR #80.
+
+	Fix Issue #78 by allowing \r to follow floating point numbers in
+	lib.c:is_number. Thanks to GitHub user ajcarr for the report
+	and to Arnold Robbins for the fix.
+
+June 5, 2020:
+	In fldbld(), make sure that inputFS is set before trying to
+	use it. Thanks to  Steffen Nurpmeso <steffen at sdaoden.eu>
+	for the report.
+
+May 5, 2020:
+	Fix checks for compilers that can handle noreturn. Thanks to
+	GitHub user enh-google for pointing it out. Closes Issue #79.
+
+April 16, 2020:
+	Handle old compilers that don't support C11 (for noreturn).
+	Thanks to Arnold Robbins.
+
+April 5, 2020:
+	Use <stdnoreturn.h> and noreturn instead of GCC attributes.
+	Thanks to GitHub user awkfan77. Closes PR #77.
+
+February 28, 2020:
+	More cleanups from Christos Zoulas: notably backslash continuation
+	inside strings removes the newline and a fix for RS = "^a".
+	Fix for address sanitizer-found problem. Thanks to GitHub user
+	enh-google.
+
+February 19, 2020:
+	More small cleanups from Christos Zoulas.
+
+February 18, 2020:
+	Additional cleanups from Christos Zoulas. It's no longer necessary
+	to use the -y flag to bison.
+
+February 6, 2020:
+	Additional small cleanups from Christos Zoulas. awk is now
+	a little more robust about reporting I/O errors upon exit.
+
+January 31, 2020:
+	Merge PR #70, which avoids use of variable length arrays. Thanks
+	to GitHub user michaelforney.  Fix issue #60 ({0} in interval
+	expressions doesn't work).  Also get all tests working again.
+	Thanks to Arnold Robbins.
+
+January 24, 2020:
+	A number of small cleanups from Christos Zoulas.  Add the close
+	on exec flag to files/pipes opened for redirection; courtesy of
+	Arnold Robbins.
+
+January 19, 2020:
+	If POSIXLY_CORRECT is set in the environment, then sub and gsub
+	use POSIX rules for multiple backslashes.  This fixes Issue #66,
+	while maintaining backwards compatibility.
+
+January 9, 2020:
+	Input/output errors on closing files are now fatal instead of
+	mere warnings. Thanks to Martijn Dekker <martijn at inlv.org>.
+
+January 5, 2020:
+	Fix a bug in the concatentation of two string constants into
+	one done in the grammar.  Fixes GitHub issue #61.  Thanks
+	to GitHub user awkfan77 for pointing out the direction for
+	the fix.  New test T.concat added to the test suite.
+	Fix a few memory leaks reported by valgrind, as well.
+
+December 27, 2019:
+	Fix a bug whereby a{0,3} could match four a's.  Thanks to
+	"Anonymous AWK fan" for the report.
+
+December 11, 2019:
+	Further printf-related fixes for 32 bit systems.
+	Thanks again to Christos Zoulas.
+
+December 8, 2019:
+	Fix the return value of sprintf("%d") on 32 bit systems.
+	Thanks to Jim Lowe for the report and to Christos Zoulas
+	for the fix.
+
+November 10, 2019:
+	Convert a number of Boolean integer variables into
+	actual bools. Convert compile_time variable into an
+	enum and simplify some of the related code.  Thanks
+	to Arnold Robbins.
+
+November 8, 2019:
+	Fix from Ori Bernstein to get UTF-8 characters instead of
+	bytes when FS = "".  This is currently the only bit of
+	the One True Awk that understands multibyte characters.
+	From Arnold Robbins, apply some cleanups in the test suite.
+
+October 25, 2019:
+	More fixes and cleanups from NetBSD, courtesy of Christos
+	Zoulas. Merges PRs 54 and 55.
+
+October 24, 2019:
+	Import second round of code cleanups from NetBSD. Much thanks
+	to Christos Zoulas (GitHub user zoulasc). Merges PR 53.
+	Add an optimization for string concatenation, also from
+	Christos.
+
+October 17, 2019:
+	Import code cleanups from NetBSD. Much thanks to Christos
+	Zoulas (GitHub user zoulasc). Merges PR 51.
+
+October 6, 2019:
+	Import code from NetBSD awk that implements RS as a regular
+	expression.
+
+September 10, 2019:
+	Fixes for various array / memory overruns found via gcc's
+	-fsanitize=unknown. Thanks to Alexander Richardson (GitHub
+	user arichardson). Merges PRs 47 and 48.
+
+July 28, 2019:
+	Import grammar optimization from NetBSD: Two string constants
+	concatenated together get turned into a single string.
+
+July 26, 2019:
+	Support POSIX-specified C-style escape sequences "\a" (alarm)
+	and "\v" (vertical tab) in command line arguments and regular
+	expressions, further to the support for them in strings added on
+	Apr 9, 1989. These now no longer match as literal "a" and "v"
+	characters (as they don't on other awk implementations).
+	Thanks to Martijn Dekker.
+
+July 17, 2019:
+	Pull in a number of code cleanups and minor fixes from
+	Warner Losh's bsd-ota branch.  The only user visible change
+	is the use of random(3) as the random number generator.
+	Thanks to Warner Losh for collecting all these fixes in
+	one easy place to get them from.
+
+July 16, 2019:
+	Fix field splitting to use FS value as of the time a record
+	was read or assigned to.  Thanks to GitHub user Cody Mello (melloc)
+	for the fix. (Merged from his branch, via PR #42.) Updated
+	testdir/T.split per said PR as well.
+
+June 24, 2019:
+	Extract awktest.tar into testdir directory. Add some very
+	simple mechanics to the makefile for running the tests and
+	for cleaning up. No changes to awk itself.
+
+June 17, 2019:
+	Disallow deleting SYMTAB and its elements, which creates
+	use-after-free bugs. Thanks to GitHub user Cody Mello (melloc)
+	for the fix. (Merged from PR #43.)
+
+June 5, 2019:
+	Allow unmatched right parenthesis in a regular expression to
+	be treated literally. Fixes Issue #40. Thanks to GitHub user
+	Warner Losh (bsdimp) for the report. Thanks to Arnold Robbins
+	for the fix.
+
 May 29,2019:
 	Fix check for command line arguments to no longer require that
 	first character after '=' not be another '='. Reverts change of
@@ -34,7 +257,7 @@ May 29,2019:
 Apr 7, 2019:
 	Update awktest.tar(p.50) to use modern options to sort. Needed
 	for Android development. Thanks to GitHub user mohd-akram (Mohamed
-	Akram).  From Comment #33.
+	Akram).  From Issue #33.
 
 Mar 12, 2019:
 	Added very simplistic support for cross-compiling in the
@@ -54,7 +277,7 @@ Mar 3, 2019:
 	#12: Avoid undefined behaviour when using ctype(3) functions in
 	     relex(). Thanks to GitHub user iamleot.
 	#31: Make getline handle numeric strings, and update FIXES. Thanks
-	     to GitHub user arnoldrobbins
+	     to GitHub user arnoldrobbins.
 	#32: maketab: support build systems with read-only source. Thanks
 	     to GitHub user enh.
 
@@ -159,10 +382,10 @@ Jun 12, 2011:
 	/pat/, \n /pat/ {...} is now legal, though bad style to use.
 
 	added checks to new -v code that permits -vnospace; thanks to
-	ruslan ermilov for spotting this and providing the patch. 
+	ruslan ermilov for spotting this and providing the patch.
 
 	removed fixed limit on number of open files; thanks to aleksey
-	cheusov and christos zoulos. 
+	cheusov and christos zoulos.
 
 	fixed day 1 bug that resurrected deleted elements of ARGV when
 	used as filenames (in lib.c).
@@ -180,10 +403,10 @@ May 1, 2011:
 	and arnold robbins, changed srand() to return the previous
 	seed (which is 1 on the first call of srand).  the seed is
 	an Awkfloat internally though converted to unsigned int to
-	pass to the library srand().  thanks, everyone. 
+	pass to the library srand().  thanks, everyone.
 
 	fixed a subtle (and i hope low-probability) overflow error
-	in fldbld, by adding space for one extra \0.  thanks to 
+	in fldbld, by adding space for one extra \0.  thanks to
 	robert bassett for spotting this one and providing a fix.
 
 	removed the files related to compilation on windows.  i no
@@ -220,7 +443,7 @@ Oct 8, 2008:
 
 Oct 23, 2007:
 	minor fix in lib.c: increase inputFS to 100, change malloc
-	for fields to n+1.  
+	for fields to n+1.
 
 	fixed memory fault caused by out of order test in setsval.
 
@@ -267,7 +490,7 @@ Jan 17, 2006:
 
 	core dump on linux with BEGIN {nextfile}, now fixed.
 
-	removed some #ifdef's in run.c and lex.c that appear to no 
+	removed some #ifdef's in run.c and lex.c that appear to no
 	longer be necessary.
 
 Apr 24, 2005:
@@ -281,8 +504,8 @@ Jan 14, 2005:
 	rethinking it.
 
 Dec 31, 2004:
-	prevent overflow of -f array in main, head off potential error in 
-	call of SYNTAX(), test malloc return in lib.c, all with thanks to 
+	prevent overflow of -f array in main, head off potential error in
+	call of SYNTAX(), test malloc return in lib.c, all with thanks to
 	todd miller.
 
 Dec 22, 2004:
@@ -310,8 +533,8 @@ Nov 22, 2003:
 	code known to man.
 
 	fixed a storage leak in call() that appears to have been there since
-	1983 or so -- a function without an explicit return that assigns a 
-	string to a parameter leaked a Cell.  thanks to moinak ghosh for 
+	1983 or so -- a function without an explicit return that assigns a
+	string to a parameter leaked a Cell.  thanks to moinak ghosh for
 	spotting this very subtle one.
 
 Jul 31, 2003:
@@ -333,7 +556,7 @@ Jul 28, 2003:
 	radix character in programs and command line arguments regardless of
 	the locale; otherwise, the locale should prevail for input and output
 	of numbers.  so it's intended to work that way.
-	
+
 	i have rescinded the attempt to use strcoll in expanding shorthands in
 	regular expressions (cclenter).  its properties are much too
 	surprising; for example [a-c] matches aAbBc in locale en_US but abBcC
@@ -397,7 +620,7 @@ Nov 29, 2002:
 Jun 28, 2002:
 	modified run/format() and tran/getsval() to do a slightly better
 	job on using OFMT for output from print and CONVFMT for other
-	number->string conversions, as promised by posix and done by 
+	number->string conversions, as promised by posix and done by
 	gawk and mawk.  there are still places where it doesn't work
 	right if CONVFMT is changed; by then the STR attribute of the
 	variable has been irrevocably set.  thanks to arnold robbins for
@@ -429,7 +652,7 @@ Feb 10, 2002:
 Jan 1, 2002:
 	fflush() or fflush("") flushes all files and pipes.
 
-	length(arrayname) returns number of elements; thanks to 
+	length(arrayname) returns number of elements; thanks to
 	arnold robbins for suggestion.
 
 	added a makefile.win to make it easier to build on windows.
@@ -479,7 +702,7 @@ July 5, 2000:
 
 May 25, 2000:
 	yet another attempt at making 8-bit input work, with another
-	band-aid in b.c (member()), and some (uschar) casts to head 
+	band-aid in b.c (member()), and some (uschar) casts to head
 	off potential errors in subscripts (like isdigit).  also
 	changed HAT to NCHARS-2.  thanks again to santiago vila.
 
@@ -526,7 +749,7 @@ Apr 21, 1999:
 	the test case.)
 
 Apr 16, 1999:
-	with code kindly provided by Bruce Lilly, awk now parses 
+	with code kindly provided by Bruce Lilly, awk now parses
 	/=/ and similar constructs more sensibly in more places.
 	Bruce also provided some helpful test cases.
 
@@ -583,7 +806,7 @@ Jan 13, 1999:
 
 Oct 19, 1998:
 	fixed a couple of bugs in getrec: could fail to update $0
-	after a getline var; because inputFS wasn't initialized, 
+	after a getline var; because inputFS wasn't initialized,
 	could split $0 on every character, a misleading diversion.
 
 	fixed caching bug in makedfa: LRU was actually removing
@@ -731,7 +954,7 @@ May 2, 1996:
 	input file. (thanks to arnold robbins for inspiration and code).
 
 	small fixes to regexpr code:  can now handle []], [[], and
-	variants;  [] is now a syntax error, rather than matching 
+	variants;  [] is now a syntax error, rather than matching
 	everything;  [z-a] is now empty, not z.  far from complete
 	or correct, however.  (thanks to jeffrey friedl for pointing out
 	some awful behaviors.)
diff --git a/contrib/one-true-awk/REGRESS b/contrib/one-true-awk/REGRESS
index 7d3ded69d536..eb3b5d7ac70b 100755
--- a/contrib/one-true-awk/REGRESS
+++ b/contrib/one-true-awk/REGRESS
@@ -33,3 +33,7 @@ then
 fi
 
 REGRESS
+
+cd ..
+cd bugs-fixed
+REGRESS
diff --git a/contrib/one-true-awk/awk.1 b/contrib/one-true-awk/awk.1
index b8e00cb60449..b3698eb07d1a 100644
--- a/contrib/one-true-awk/awk.1
+++ b/contrib/one-true-awk/awk.1
@@ -7,6 +7,10 @@
 .fi
 .ft 1
 ..
+.de TF
+.IP "" "\w'\fB\\$1\ \ \fP'u"
+.PD 0
+..
 .TH AWK 1
 .CT 1 files prog_other
 .SH NAME
@@ -48,7 +52,7 @@ matches the pattern.
 Each line is matched against the
 pattern portion of every pattern-action statement;
 the associated action is performed for each matched pattern.
-The file name 
+The file name
 .B \-
 means the standard input.
 Any
@@ -90,7 +94,7 @@ A pattern-action statement has the form:
 .IP
 .IB pattern " { " action " }
 .PP
-A missing 
+A missing
 .BI { " action " }
 means print the line;
 a missing pattern always matches.
@@ -209,7 +213,7 @@ or length of
 if no argument.
 .TP
 .B rand
-random number on [0,1)
+random number on [0,1).
 .TP
 .B srand
 sets seed for
@@ -217,7 +221,7 @@ sets seed for
 and returns the previous seed.
 .TP
 .B int
-truncates to an integer value
+truncates to an integer value.
 .TP
 \fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR
 the
@@ -225,12 +229,11 @@ the
 substring of
 .I s
 that begins at position
-.I m 
+.I m
 counted from 1.
 If no
-.IR m ,
-use the rest of the string
-.I 
+.IR n ,
+use the rest of the string.
 .TP
 .BI index( s , " t" )
 the position in
@@ -294,6 +297,25 @@ and
 .B gsub
 return the number of replacements.
 .TP
+\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR
+replaces instances of
+.I pat
+in
+.I target
+with
+.IR repl .
+If
+.I how
+is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise,
+.I how
+is a number indicating which occurrence to replace.  If no
+.IR target ,
+use
+.BR $0 .
+Return the resulting string;
+.I target
+is not modified.
+.TP
 .BI sprintf( fmt , " expr" , " ...\fB)
 the string resulting from formatting
 .I expr ...
@@ -302,13 +324,35 @@ according to the
 format
 .IR fmt .
 .TP
+.B systime()
+returns the current date and time as a standard
+``seconds since the epoch'' value.
+.TP
+.BI strftime( fmt ", " timestamp\^ )
+formats
+.I timestamp
+(a value in seconds since the epoch)
+according to
+.IR fmt ,
+which is a format string as supported by
+.IR strftime (3).
+Both
+.I timestamp
+and
+.I fmt
+may be omitted; if no
+.IR timestamp ,
+the current time of day is used, and if no
+.IR fmt ,
+a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used.
+.TP
 .BI system( cmd )
 executes
 .I cmd
 and returns its exit status. This will be \-1 upon error,
 .IR cmd 's
 exit status upon a normal exit,
-256 + 
+256 +
 .I sig
 upon death-by-signal, where
 .I sig
@@ -361,19 +405,26 @@ In all cases,
 returns 1 for a successful input,
 0 for end of file, and \-1 for an error.
 .PP
+The functions
+.BR compl ,
+.BR and ,
+.BR or ,
+.BR xor ,
+.BR lshift ,
+and
+.B rshift
+peform the corresponding bitwise operations on their
+operands, which are first truncated to integer.
+.PP
 Patterns are arbitrary Boolean combinations
 (with
 .BR "! || &&" )
 of regular expressions and
 relational expressions.
 Regular expressions are as in
-.IR egrep (1) 
-except numeric quantifiers are not supported beyond the basic
-.B +
-and
-.B ?
-for quantities \&\f(CW"0 or 1"\fP and \&\f(CW"1 or more"\fP
-respectively.
+.IR egrep ;
+see
+.IR grep (1).
 Isolated regular expressions
 in a pattern apply to the entire line.
 Regular expressions may also occur in
@@ -483,6 +534,11 @@ the length of a string matched by
 .TP
 .B RS
 input record separator (default newline).
+If empty, blank lines separate records.
+If more than one character long,
+.B RS
+is treated as a regular expression, and records are
+separated by text matching the expression.
 .TP
 .B RSTART
 the start position of a string matched by
@@ -502,6 +558,16 @@ functions may be called recursively.
 Parameters are local to the function; all other variables are global.
 Thus local variables may be created by providing excess parameters in
 the function definition.
+.SH ENVIRONMENT VARIABLES
+If
+.B POSIXLY_CORRECT
+is set in the environment, then
+.I awk
+follows the POSIX rules for
+.B sub
+and
+.B gsub
+with respect to consecutive backslashes and ampersands.
 .SH EXAMPLES
 .TP
 .EX
@@ -546,8 +612,8 @@ BEGIN	{	# Simulate echo(1)
 .fi
 .EE
 .SH SEE ALSO
-.IR grep (1), 
-.IR lex (1), 
+.IR grep (1),
+.IR lex (1),
 .IR sed (1)
 .br
 A. V. Aho, B. W. Kernighan, P. J. Weinberger,
@@ -558,8 +624,61 @@ There are no explicit conversions between numbers and strings.
 To force an expression to be treated as a number add 0 to it;
 to force it to be treated as a string concatenate
 \&\f(CW""\fP to it.
-.br
+.PP
 The scope rules for variables in functions are a botch;
 the syntax is worse.
-.br
+.PP
 Only eight-bit characters sets are handled correctly.
+.SH UNUSUAL FLOATING-POINT VALUES
+.I Awk
+was designed before IEEE 754 arithmetic defined Not-A-Number (NaN)
+and Infinity values, which are supported by all modern floating-point
+hardware.
+.PP
+Because
+.I awk
+uses
+.IR strtod (3)
+and
+.IR atof (3)
+to convert string values to double-precision floating-point values,
+modern C libraries also convert strings starting with
+.B inf
+and
+.B nan
+into infinity and NaN values respectively.  This led to strange results,
+with something like this:
+.PP
+.EX
+.nf
+echo nancy | awk '{ print $1 + 0 }'
+.fi
+.EE
+.PP
+printing
+.B nan
+instead of zero.
+.PP
+.I Awk
+now follows GNU AWK, and prefilters string values before attempting
+to convert them to numbers, as follows:
+.TP
+.I "Hexadecimal values"
+Hexadecimal values (allowed since C99) convert to zero, as they did
+prior to C99.
+.TP
+.I "NaN values"
+The two strings
+.B +nan
+and
+.B \-nan
+(case independent) convert to NaN. No others do.
+(NaNs can have signs.)
+.TP
+.I "Infinity values"
+The two strings
+.B +inf
+and
+.B \-inf
+(case independent) convert to positive and negative infinity, respectively.
+No others do.
diff --git a/contrib/one-true-awk/awk.h b/contrib/one-true-awk/awk.h
index 31d070aecddc..230eac41548c 100644
--- a/contrib/one-true-awk/awk.h
+++ b/contrib/one-true-awk/awk.h
@@ -23,6 +23,13 @@ THIS SOFTWARE.
 ****************************************************************/
 
 #include <assert.h>
+#include <stdint.h>
+#include <stdbool.h>
+#if __STDC_VERSION__ <= 199901L
+#define noreturn
+#else
+#include <stdnoreturn.h>
+#endif
 
 typedef double	Awkfloat;
 
@@ -30,24 +37,34 @@ typedef double	Awkfloat;
 
 typedef	unsigned char uschar;
 
-#define	xfree(a)	{ if ((a) != NULL) { free((void *) (a)); (a) = NULL; } }
+#define	xfree(a)	{ if ((a) != NULL) { free((void *)(intptr_t)(a)); (a) = NULL; } }
+/*
+ * We sometimes cheat writing read-only pointers to NUL-terminate them
+ * and then put back the original value
+ */
+#define setptr(ptr, a)	(*(char *)(intptr_t)(ptr)) = (a)
 
-#define	NN(p)	((p) ? (p) : "(null)")	/* guaranteed non-null for dprintf 
+#define	NN(p)	((p) ? (p) : "(null)")	/* guaranteed non-null for DPRINTF
 */
 #define	DEBUG
 #ifdef	DEBUG
-			/* uses have to be doubly parenthesized */
-#	define	dprintf(x)	if (dbg) printf x
+#	define	DPRINTF(...)	if (dbg) printf(__VA_ARGS__)
 #else
-#	define	dprintf(x)
+#	define	DPRINTF(...)
 #endif
 
-extern int	compile_time;	/* 1 if compiling, 0 if running */
-extern int	safe;		/* 0 => unsafe, 1 => safe */
+extern enum compile_states {
+	RUNNING,
+	COMPILING,
+	ERROR_PRINTING
+} compile_time;
+
+extern bool	safe;		/* false => unsafe, true => safe */
 
 #define	RECSIZE	(8 * 1024)	/* sets limit on records, fields, etc., etc. */
 extern int	recsize;	/* size of current record, orig RECSIZE */
 
+extern char	EMPTY[];	/* this avoid -Wwritable-strings issues */
 extern char	**FS;
 extern char	**RS;
 extern char	**ORS;
@@ -64,13 +81,11 @@ extern Awkfloat *RLENGTH;
 extern char	*record;	/* points to $0 */
 extern int	lineno;		/* line number in awk program */
 extern int	errorflag;	/* 1 if error has occurred */
-extern int	donefld;	/* 1 if record broken into fields */
-extern int	donerec;	/* 1 if record is valid (no fld has changed */
-extern char	inputFS[];	/* FS at time of input, for field splitting */
-
+extern bool	donefld;	/* true if record broken into fields */
+extern bool	donerec;	/* true if record is valid (no fld has changed */
 extern int	dbg;
 
-extern	char	*patbeg;	/* beginning of pattern matched */
+extern const char *patbeg;	/* beginning of pattern matched */
 extern	int	patlen;		/* length of pattern matched.  set in b.c */
 
 /* Cell:  all information about a variable or constant */
@@ -105,6 +120,7 @@ extern Cell	*rsloc;		/* RS */
 extern Cell	*rstartloc;	/* RSTART */
 extern Cell	*rlengthloc;	/* RLENGTH */
 extern Cell	*subseploc;	/* SUBSEP */
+extern Cell	*symtabloc;	/* SYMTAB */
 
 /* Cell.tval values: */
 #define	NUM	01	/* number value is valid */
@@ -134,12 +150,14 @@ extern Cell	*subseploc;	/* SUBSEP */
 #define	FTOUPPER 12
 #define	FTOLOWER 13
 #define	FFLUSH	14
-#define	FAND	15
-#define	FFOR	16
-#define	FXOR	17
-#define	FCOMPL	18
-#define	FLSHIFT	19
-#define	FRSHIFT	20
+#define FAND	15
+#define FFOR	16
+#define FXOR	17
+#define FCOMPL	18
+#define FLSHIFT	19
+#define FRSHIFT	20
+#define FSYSTIME	21
+#define FSTRFTIME	22
 
 /* Node:  parse tree is made of nodes, with Cell's at bottom */
 
@@ -167,7 +185,7 @@ extern Node	*nullnode;
 #define CCOPY	6
 #define CCON	5
 #define CTEMP	4
-#define CNAME	3 
+#define CNAME	3
 #define CVAR	2
 #define CFLD	1
 #define	CUNK	0
@@ -217,6 +235,7 @@ extern	int	pairstack[], paircnt;
 
 #define NCHARS	(256+3)		/* 256 handles 8-bit chars; 128 does 7-bit */
 				/* watch out in match(), etc. */
+#define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
 #define NSTATES	32
 #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
 				/* NCHARS is 2**n */
@@ -232,16 +251,16 @@ typedef struct rrow {
 } rrow;
 
 typedef struct fa {
-	uschar	gototab[NSTATES][HAT + 1];
-	uschar	out[NSTATES];
+	unsigned int	**gototab;
+	uschar	*out;
 	uschar	*restr;
-	int	*posns[NSTATES];
-	int	anchor;
+	int	**posns;
+	int	state_count;
+	bool	anchor;
 	int	use;
 	int	initstat;
 	int	curstat;
 	int	accept;
-	int	reset;
 	struct	rrow re[1];	/* variable: actual size set by calling malloc */
 } fa;
 
diff --git a/contrib/one-true-awk/awkgram.y b/contrib/one-true-awk/awkgram.y
index e4abeeddcb6a..f37073d1f9ac 100644
--- a/contrib/one-true-awk/awkgram.y
+++ b/contrib/one-true-awk/awkgram.y
@@ -32,8 +32,8 @@ int yywrap(void) { return(1); }
 
 Node	*beginloc = 0;
 Node	*endloc = 0;
-int	infunc	= 0;	/* = 1 if in arglist or body of func */
*** 4848 LINES SKIPPED ***


More information about the dev-commits-src-all mailing list