misc/113175: add "-n count" option to split(1)

Thu May 31 02:00:11 UTC 2007

>Number:         113175
>Category:       misc
>Synopsis:       add "-n count" option to split(1)
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu May 31 02:00:09 GMT 2007
>Closed-Date:
>Last-Modified:
>Originator:     Jan Schaumann
>Release:        None
>Organization:
>Environment:
>Description:
I added a new option to split(1) in NetBSD to allow splitting of a file into N chunks rather than having to do the math oneselves and then specify the '-b' option.

See rev1.22 of NetBSD's split.c (rev 1.15 of split.1).

Attached is a completely untested patch that might work for your version of split.

Hmm, second try.  Attaching the diff didn't work.  Something about wrong encoding type.  See diff in "Fix".
>How-To-Repeat:

>Fix:

--- split.c.orig	2007-05-30 18:35:37.000000000 -0700
+++ split.c	2007-05-30 18:45:56.000000000 -0700
@@ -64,6 +64,7 @@
 #define DEFLINE	1000			/* Default num lines per file. */
 
 off_t	 bytecnt;			/* Byte count to split on. */
+off_t	 chunks = 0;			/* Chunks count to split into. */
 long	 numlines;			/* Line count to split on. */
 int	 file_open;			/* If a file open. */
 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
@@ -76,6 +77,7 @@
 void newfile(void);
 void split1(void);
 void split2(void);
+void split3(void);
 static void usage(void);
 
 int
@@ -88,7 +90,7 @@
 
 	setlocale(LC_ALL, "");
 
-	while ((ch = getopt(argc, argv, "0123456789a:b:l:p:")) != -1)
+	while ((ch = getopt(argc, argv, "0123456789a:b:l:n:p:")) != -1)
 		switch (ch) {
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
@@ -138,6 +140,13 @@
 				errx(EX_USAGE,
 				    "%s: illegal line count", optarg);
 			break;
+		case 'n':		/* Chunks. */
+			if (!isdigit((unsigned char)optarg[0]) ||
+				(chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
+				*ep != '\0')
+				errx(EX_USAGE, "%s: illegal number of chunks.", optarg);
+			break;
+
 		case 'p':		/* pattern matching. */
 			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
 				errx(EX_USAGE, "%s: illegal regexp", optarg);
@@ -164,12 +173,15 @@
 
 	if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
 		errx(EX_USAGE, "suffix is too long");
-	if (pflag && (numlines != 0 || bytecnt != 0))
+	if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
 		usage();
 
 	if (numlines == 0)
 		numlines = DEFLINE;
-	else if (bytecnt != 0)
+	else if (bytecnt != 0 || chunks != 0)
+		usage();
+
+	if (bytecnt && chunks)
 		usage();
 
 	if (ifd == -1)				/* Stdin by default. */
@@ -178,6 +190,9 @@
 	if (bytecnt) {
 		split1();
 		exit (0);
+	} else if (chunks) {
+		split3();
+		exit (0);
 	}
 	split2();
 	if (pflag)
@@ -195,6 +210,9 @@
 	off_t bcnt;
 	char *C;
 	ssize_t dist, len;
+	int nfiles;
+
+	nfiles = 0;
 
 	for (bcnt = 0;;)
 		switch ((len = read(ifd, bfr, MAXBSIZE))) {
@@ -204,8 +222,11 @@
 			err(EX_IOERR, "read");
 			/* NOTREACHED */
 		default:
-			if (!file_open)
+			if (!file_open) {
+				if (!chunks || (nfiles < chunks)) {
 				newfile();
+					nfiles++;
+				}
 			if (bcnt + len >= bytecnt) {
 				dist = bytecnt - bcnt;
 				if (write(ofd, bfr, dist) != dist)
@@ -213,13 +234,19 @@
 				len -= dist;
 				for (C = bfr + dist; len >= bytecnt;
 				    len -= bytecnt, C += bytecnt) {
+					if (!chunks || (nfiles < chunks)) {
 					newfile();
+						nfiles++;
+					}
 					if (write(ofd,
 					    C, bytecnt) != bytecnt)
 						err(EX_IOERR, "write");
 				}
 				if (len != 0) {
+					if (!chunks || (nfiles < chunks)) {
 					newfile();
+						nfiles++;
+					}
 					if (write(ofd, C, len) != len)
 						err(EX_IOERR, "write");
 				} else
@@ -285,6 +312,31 @@
 		exit(0);
 }
 
+ /*
+ * split3 --
+ *	Split the input into specified number of chunks
+ */
+static void
+split3()
+{
+	struct stat sb;
+
+	if (fstat(ifd, &sb) == -1) {
+		err(1, "stat");
+		/* NOTREACHED */
+	}
+
+	if (chunks > sb.st_size) {
+		errx(1, "can't split into more than %d files",
+				(int)sb.st_size);
+		/* NOTREACHED */
+	}
+
+	bytecnt = sb.st_size/chunks;
+	split1();
+}
+
+
 /*
  * newfile --
  *	Open a new output file.
@@ -338,6 +390,7 @@
 	(void)fprintf(stderr,
 "usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
 "       split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
+"       split -n chunk_count [-a suffix_length] [file [prefix]]\n"
 "       split -p pattern [-a suffix_length] [file [prefix]]\n");
 	exit(EX_USAGE);
 }
--- split.1.orig	2007-05-30 18:36:08.000000000 -0700
+++ split.1	2007-05-30 18:47:14.000000000 -0700
@@ -32,7 +32,7 @@
 .\"	@(#)split.1	8.3 (Berkeley) 4/16/94
 .\" $FreeBSD: /repoman/r/ncvs/src/usr.bin/split/split.1,v 1.21 2006/09/29 15:20:47 ru Exp $
 .\"
-.Dd August 10, 2006
+.Dd May 30, 2007
 .Dt SPLIT 1
 .Os
 .Sh NAME
@@ -53,6 +53,10 @@
 .Op Fl a Ar suffix_length
 .Op Ar file Op Ar prefix
 .Nm
+.Fl n Ar chunk_count
+.Op Fl a Ar suffix_length
+.Op Ar file Op Ar prefix
+.Nm
 .Fl p Ar pattern
 .Op Fl a Ar suffix_length
 .Op Ar file Op Ar prefix
@@ -112,6 +116,10 @@
 Create smaller files
 .Ar line_count
 lines in length.
+.It Fl n Ar chunk_count
+Split file int
+.Ar chunk_count
+smaller files.
 .It Fl p Ar pattern
 The file is split whenever an input line matches
 .Ar pattern ,


>Release-Note:
>Audit-Trail:
>Unformatted: