git: f6a842313ca2 - stable/13 - split: switch to getline() for line/pattern matching
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 11 Nov 2022 18:11:34 UTC
The branch stable/13 has been updated by kevans:
URL: https://cgit.FreeBSD.org/src/commit/?id=f6a842313ca28d300beb36c0b765c10e0970b2ca
commit f6a842313ca28d300beb36c0b765c10e0970b2ca
Author: Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2022-08-23 02:05:58 +0000
Commit: Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2022-11-11 18:08:46 +0000
split: switch to getline() for line/pattern matching
Get rid of split's home-grown logic for growing the buffer; arbitrarily
breaking at LONG_MAX bytes instead of 65536 bytes gives us much more
wiggle room. Additionally, we'll actually fail out entirely if we can't
fit a line, which makes noticing this class of problem much easier.
Reviewed by: bapt, emaste, pauamma
Sponsored by: Klara, Inc.
(cherry picked from commit 5c053aa3c5e907bdd1ac466ce9b58611781c2c20)
---
usr.bin/split/split.1 | 8 +++++---
usr.bin/split/split.c | 25 ++++++++++++-------------
2 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/usr.bin/split/split.1 b/usr.bin/split/split.1
index 8f287a4163dd..684cad57d4fc 100644
--- a/usr.bin/split/split.1
+++ b/usr.bin/split/split.1
@@ -28,7 +28,7 @@
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
.\" $FreeBSD$
.\"
-.Dd May 9, 2013
+.Dd October 25, 2022
.Dt SPLIT 1
.Os
.Sh NAME
@@ -213,5 +213,7 @@ A
.Nm
command appeared in
.At v3 .
-.Sh BUGS
-The maximum line length for matching patterns is 65536.
+.Pp
+Before
+.Fx 14 ,
+pattern matching and only operated on lines shorter than 65,536 bytes.
diff --git a/usr.bin/split/split.c b/usr.bin/split/split.c
index 9028b29d1c69..008b614f4946 100644
--- a/usr.bin/split/split.c
+++ b/usr.bin/split/split.c
@@ -70,7 +70,6 @@ static off_t chunks = 0; /* Chunks count to split into. */
static long numlines; /* Line count to split on. */
static int file_open; /* If a file open. */
static int ifd = -1, ofd = -1; /* Input/output file descriptors. */
-static char bfr[MAXBSIZE]; /* I/O buffer. */
static char fname[MAXPATHLEN]; /* File name prefix. */
static regex_t rgx;
static int pflag;
@@ -203,6 +202,7 @@ main(int argc, char **argv)
static void
split1(void)
{
+ static char bfr[MAXBSIZE];
off_t bcnt;
char *C;
ssize_t dist, len;
@@ -211,7 +211,7 @@ split1(void)
nfiles = 0;
for (bcnt = 0;;)
- switch ((len = read(ifd, bfr, MAXBSIZE))) {
+ switch ((len = read(ifd, bfr, sizeof(bfr)))) {
case 0:
exit(0);
case -1:
@@ -264,46 +264,45 @@ split1(void)
static void
split2(void)
{
+ char *buf;
+ size_t bufsize;
+ ssize_t len;
long lcnt = 0;
FILE *infp;
+ buf = NULL;
+ bufsize = 0;
+
/* Stick a stream on top of input file descriptor */
if ((infp = fdopen(ifd, "r")) == NULL)
err(EX_NOINPUT, "fdopen");
/* Process input one line at a time */
- while (fgets(bfr, sizeof(bfr), infp) != NULL) {
- const int len = strlen(bfr);
-
- /* If line is too long to deal with, just write it out */
- if (bfr[len - 1] != '\n')
- goto writeit;
-
+ while ((len = getline(&buf, &bufsize, infp)) > 0) {
/* Check if we need to start a new file */
if (pflag) {
regmatch_t pmatch;
pmatch.rm_so = 0;
pmatch.rm_eo = len - 1;
- if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
+ if (regexec(&rgx, buf, 0, &pmatch, REG_STARTEND) == 0)
newfile();
} else if (lcnt++ == numlines) {
newfile();
lcnt = 1;
}
-writeit:
/* Open output file if needed */
if (!file_open)
newfile();
/* Write out line */
- if (write(ofd, bfr, len) != len)
+ if (write(ofd, buf, len) != len)
err(EX_IOERR, "write");
}
/* EOF or error? */
- if (ferror(infp))
+ if ((len == -1 && errno != 0) || ferror(infp))
err(EX_IOERR, "read");
else
exit(0);