git: 5c053aa3c5e9 - main - split: switch to getline() for line/pattern matching
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 25 Oct 2022 15:06:28 UTC
The branch main has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=5c053aa3c5e907bdd1ac466ce9b58611781c2c20 commit 5c053aa3c5e907bdd1ac466ce9b58611781c2c20 Author: Kyle Evans <kevans@FreeBSD.org> AuthorDate: 2022-08-23 02:05:58 +0000 Commit: Kyle Evans <kevans@FreeBSD.org> CommitDate: 2022-10-25 15:05:23 +0000 split: switch to getline() for line/pattern matching Get rid of split's home-grown logic for growing the buffer; arbitrarily breaking at LONG_MAX bytes instead of 65536 bytes gives us much more wiggle room. Additionally, we'll actually fail out entirely if we can't fit a line, which makes noticing this class of problem much easier. Reviewed by: bapt, emaste, pauamma Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D36323 --- usr.bin/split/split.1 | 8 +++++--- usr.bin/split/split.c | 25 ++++++++++++------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/usr.bin/split/split.1 b/usr.bin/split/split.1 index 8f287a4163dd..684cad57d4fc 100644 --- a/usr.bin/split/split.1 +++ b/usr.bin/split/split.1 @@ -28,7 +28,7 @@ .\" @(#)split.1 8.3 (Berkeley) 4/16/94 .\" $FreeBSD$ .\" -.Dd May 9, 2013 +.Dd October 25, 2022 .Dt SPLIT 1 .Os .Sh NAME @@ -213,5 +213,7 @@ A .Nm command appeared in .At v3 . -.Sh BUGS -The maximum line length for matching patterns is 65536. +.Pp +Before +.Fx 14 , +pattern matching and only operated on lines shorter than 65,536 bytes. diff --git a/usr.bin/split/split.c b/usr.bin/split/split.c index 9028b29d1c69..008b614f4946 100644 --- a/usr.bin/split/split.c +++ b/usr.bin/split/split.c @@ -70,7 +70,6 @@ static off_t chunks = 0; /* Chunks count to split into. */ static long numlines; /* Line count to split on. */ static int file_open; /* If a file open. */ static int ifd = -1, ofd = -1; /* Input/output file descriptors. */ -static char bfr[MAXBSIZE]; /* I/O buffer. */ static char fname[MAXPATHLEN]; /* File name prefix. */ static regex_t rgx; static int pflag; @@ -203,6 +202,7 @@ main(int argc, char **argv) static void split1(void) { + static char bfr[MAXBSIZE]; off_t bcnt; char *C; ssize_t dist, len; @@ -211,7 +211,7 @@ split1(void) nfiles = 0; for (bcnt = 0;;) - switch ((len = read(ifd, bfr, MAXBSIZE))) { + switch ((len = read(ifd, bfr, sizeof(bfr)))) { case 0: exit(0); case -1: @@ -264,46 +264,45 @@ split1(void) static void split2(void) { + char *buf; + size_t bufsize; + ssize_t len; long lcnt = 0; FILE *infp; + buf = NULL; + bufsize = 0; + /* Stick a stream on top of input file descriptor */ if ((infp = fdopen(ifd, "r")) == NULL) err(EX_NOINPUT, "fdopen"); /* Process input one line at a time */ - while (fgets(bfr, sizeof(bfr), infp) != NULL) { - const int len = strlen(bfr); - - /* If line is too long to deal with, just write it out */ - if (bfr[len - 1] != '\n') - goto writeit; - + while ((len = getline(&buf, &bufsize, infp)) > 0) { /* Check if we need to start a new file */ if (pflag) { regmatch_t pmatch; pmatch.rm_so = 0; pmatch.rm_eo = len - 1; - if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) + if (regexec(&rgx, buf, 0, &pmatch, REG_STARTEND) == 0) newfile(); } else if (lcnt++ == numlines) { newfile(); lcnt = 1; } -writeit: /* Open output file if needed */ if (!file_open) newfile(); /* Write out line */ - if (write(ofd, bfr, len) != len) + if (write(ofd, buf, len) != len) err(EX_IOERR, "write"); } /* EOF or error? */ - if (ferror(infp)) + if ((len == -1 && errno != 0) || ferror(infp)) err(EX_IOERR, "read"); else exit(0);