misc/113175: add "-n count" option to split(1)
Jan Schaumann
jschauma at netmeister.org
Thu May 31 02:00:11 UTC 2007
>Number: 113175
>Category: misc
>Synopsis: add "-n count" option to split(1)
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Thu May 31 02:00:09 GMT 2007
>Closed-Date:
>Last-Modified:
>Originator: Jan Schaumann
>Release: None
>Organization:
>Environment:
>Description:
I added a new option to split(1) in NetBSD to allow splitting of a file into N chunks rather than having to do the math oneselves and then specify the '-b' option.
See rev1.22 of NetBSD's split.c (rev 1.15 of split.1).
Attached is a completely untested patch that might work for your version of split.
Hmm, second try. Attaching the diff didn't work. Something about wrong encoding type. See diff in "Fix".
>How-To-Repeat:
>Fix:
--- split.c.orig 2007-05-30 18:35:37.000000000 -0700
+++ split.c 2007-05-30 18:45:56.000000000 -0700
@@ -64,6 +64,7 @@
#define DEFLINE 1000 /* Default num lines per file. */
off_t bytecnt; /* Byte count to split on. */
+off_t chunks = 0; /* Chunks count to split into. */
long numlines; /* Line count to split on. */
int file_open; /* If a file open. */
int ifd = -1, ofd = -1; /* Input/output file descriptors. */
@@ -76,6 +77,7 @@
void newfile(void);
void split1(void);
void split2(void);
+void split3(void);
static void usage(void);
int
@@ -88,7 +90,7 @@
setlocale(LC_ALL, "");
- while ((ch = getopt(argc, argv, "0123456789a:b:l:p:")) != -1)
+ while ((ch = getopt(argc, argv, "0123456789a:b:l:n:p:")) != -1)
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
@@ -138,6 +140,13 @@
errx(EX_USAGE,
"%s: illegal line count", optarg);
break;
+ case 'n': /* Chunks. */
+ if (!isdigit((unsigned char)optarg[0]) ||
+ (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
+ *ep != '\0')
+ errx(EX_USAGE, "%s: illegal number of chunks.", optarg);
+ break;
+
case 'p': /* pattern matching. */
if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
errx(EX_USAGE, "%s: illegal regexp", optarg);
@@ -164,12 +173,15 @@
if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
errx(EX_USAGE, "suffix is too long");
- if (pflag && (numlines != 0 || bytecnt != 0))
+ if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
usage();
if (numlines == 0)
numlines = DEFLINE;
- else if (bytecnt != 0)
+ else if (bytecnt != 0 || chunks != 0)
+ usage();
+
+ if (bytecnt && chunks)
usage();
if (ifd == -1) /* Stdin by default. */
@@ -178,6 +190,9 @@
if (bytecnt) {
split1();
exit (0);
+ } else if (chunks) {
+ split3();
+ exit (0);
}
split2();
if (pflag)
@@ -195,6 +210,9 @@
off_t bcnt;
char *C;
ssize_t dist, len;
+ int nfiles;
+
+ nfiles = 0;
for (bcnt = 0;;)
switch ((len = read(ifd, bfr, MAXBSIZE))) {
@@ -204,8 +222,11 @@
err(EX_IOERR, "read");
/* NOTREACHED */
default:
- if (!file_open)
+ if (!file_open) {
+ if (!chunks || (nfiles < chunks)) {
newfile();
+ nfiles++;
+ }
if (bcnt + len >= bytecnt) {
dist = bytecnt - bcnt;
if (write(ofd, bfr, dist) != dist)
@@ -213,13 +234,19 @@
len -= dist;
for (C = bfr + dist; len >= bytecnt;
len -= bytecnt, C += bytecnt) {
+ if (!chunks || (nfiles < chunks)) {
newfile();
+ nfiles++;
+ }
if (write(ofd,
C, bytecnt) != bytecnt)
err(EX_IOERR, "write");
}
if (len != 0) {
+ if (!chunks || (nfiles < chunks)) {
newfile();
+ nfiles++;
+ }
if (write(ofd, C, len) != len)
err(EX_IOERR, "write");
} else
@@ -285,6 +312,31 @@
exit(0);
}
+ /*
+ * split3 --
+ * Split the input into specified number of chunks
+ */
+static void
+split3()
+{
+ struct stat sb;
+
+ if (fstat(ifd, &sb) == -1) {
+ err(1, "stat");
+ /* NOTREACHED */
+ }
+
+ if (chunks > sb.st_size) {
+ errx(1, "can't split into more than %d files",
+ (int)sb.st_size);
+ /* NOTREACHED */
+ }
+
+ bytecnt = sb.st_size/chunks;
+ split1();
+}
+
+
/*
* newfile --
* Open a new output file.
@@ -338,6 +390,7 @@
(void)fprintf(stderr,
"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
" split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
+" split -n chunk_count [-a suffix_length] [file [prefix]]\n"
" split -p pattern [-a suffix_length] [file [prefix]]\n");
exit(EX_USAGE);
}
--- split.1.orig 2007-05-30 18:36:08.000000000 -0700
+++ split.1 2007-05-30 18:47:14.000000000 -0700
@@ -32,7 +32,7 @@
.\" @(#)split.1 8.3 (Berkeley) 4/16/94
.\" $FreeBSD: /repoman/r/ncvs/src/usr.bin/split/split.1,v 1.21 2006/09/29 15:20:47 ru Exp $
.\"
-.Dd August 10, 2006
+.Dd May 30, 2007
.Dt SPLIT 1
.Os
.Sh NAME
@@ -53,6 +53,10 @@
.Op Fl a Ar suffix_length
.Op Ar file Op Ar prefix
.Nm
+.Fl n Ar chunk_count
+.Op Fl a Ar suffix_length
+.Op Ar file Op Ar prefix
+.Nm
.Fl p Ar pattern
.Op Fl a Ar suffix_length
.Op Ar file Op Ar prefix
@@ -112,6 +116,10 @@
Create smaller files
.Ar line_count
lines in length.
+.It Fl n Ar chunk_count
+Split file int
+.Ar chunk_count
+smaller files.
.It Fl p Ar pattern
The file is split whenever an input line matches
.Ar pattern ,
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-bugs
mailing list