improvement to split(1)

perryh at pluto.rain.com perryh at pluto.rain.com
Sun Aug 5 22:49:37 PDT 2007


In the case where the output files from split(1) are of a specified
size (in bytes) and the size of the input is known, it is possible
to compute the minimum required suffix_length rather than requiring
it to be specified or accepting the default (2).  The attached diffs
add a -B switch, which requests that automation and otherwise behaves
the same as -b.
-------------- next part --------------
*** split.1.orig	Sun Jan 16 23:44:29 2005
--- split.1	Sat Aug  4 22:46:19 2007
***************
*** 30,38 ****
  .\" SUCH DAMAGE.
  .\"
  .\"	@(#)split.1	8.3 (Berkeley) 4/16/94
! .\" $FreeBSD: src/usr.bin/split/split.1,v 1.15 2005/01/17 07:44:29 ru Exp $
  .\"
! .Dd July 12, 2004
  .Dt SPLIT 1
  .Os
  .Sh NAME
--- 30,38 ----
  .\" SUCH DAMAGE.
  .\"
  .\"	@(#)split.1	8.3 (Berkeley) 4/16/94
! .\" $FreeBSD: src/usr.bin/split/split.1,v 1.15+ 2005/01/17 07:44:29 ru Exp $
  .\"
! .Dd August 4, 2007
  .Dt SPLIT 1
  .Os
  .Sh NAME
***************
*** 41,47 ****
  .Sh SYNOPSIS
  .Nm
  .Op Fl a Ar suffix_length
! .Op Fl b Ar byte_count[k|m]
  .Op Fl l Ar line_count
  .Op Fl p Ar pattern
  .Op Ar file Op Ar name
--- 41,47 ----
  .Sh SYNOPSIS
  .Nm
  .Op Fl a Ar suffix_length
! .Op Fl {b|B} Ar byte_count[k|m]
  .Op Fl l Ar line_count
  .Op Fl p Ar pattern
  .Op Ar file Op Ar name
***************
*** 79,84 ****
--- 79,92 ----
  is appended to the number, the file is split into
  .Ar byte_count
  megabyte pieces.
+ .It Fl B
+ Like
+ .Fl b,
+ and compute the
+ .Ar suffix_length
+ based on the
+ .Ar byte_count
+ and the file size.
  .It Fl l
  Create smaller files
  .Ar n
-------------- next part --------------
*** split.c.orig	Sun Jul 11 07:44:23 2004
--- split.c	Sat Aug  4 23:01:08 2007
***************
*** 32,38 ****
   */
  
  #include <sys/cdefs.h>
! __FBSDID("$FreeBSD: src/usr.bin/split/split.c,v 1.15 2004/07/11 14:44:23 tjr Exp $");
  
  #ifndef lint
  static const char copyright[] =
--- 32,38 ----
   */
  
  #include <sys/cdefs.h>
! __FBSDID("$FreeBSD: src/usr.bin/split/split.c,v 1.15+ 2004/07/11 14:44:23 tjr Exp $");
  
  #ifndef lint
  static const char copyright[] =
***************
*** 61,66 ****
--- 61,69 ----
  #include <regex.h>
  #include <sysexits.h>
  
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ 
  #define DEFLINE	1000			/* Default num lines per file. */
  
  off_t	 bytecnt;			/* Byte count to split on. */
***************
*** 70,77 ****
  char	 bfr[MAXBSIZE];			/* I/O buffer. */
  char	 fname[MAXPATHLEN];		/* File name prefix. */
  regex_t	 rgx;
! int	 pflag;
! long	 sufflen = 2;			/* File name suffix length. */
  
  void newfile(void);
  void split1(void);
--- 73,80 ----
  char	 bfr[MAXBSIZE];			/* I/O buffer. */
  char	 fname[MAXPATHLEN];		/* File name prefix. */
  regex_t	 rgx;
! int	 pflag, Bflag;
! long	 sufflen = -2;	    /* File name suffix length, -2 => unspecified. */
  
  void newfile(void);
  void split1(void);
***************
*** 85,94 ****
  	long scale;
  	int ch;
  	char *ep, *p;
  
  	setlocale(LC_ALL, "");
  
! 	while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1)
  		switch (ch) {
  		case '0': case '1': case '2': case '3': case '4':
  		case '5': case '6': case '7': case '8': case '9':
--- 88,98 ----
  	long scale;
  	int ch;
  	char *ep, *p;
+ 	struct stat istat;
  
  	setlocale(LC_ALL, "");
  
! 	while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:B:")) != -1)
  		switch (ch) {
  		case '0': case '1': case '2': case '3': case '4':
  		case '5': case '6': case '7': case '8': case '9':
***************
*** 114,123 ****
--- 118,134 ----
  			ifd = 0;
  			break;
  		case 'a':		/* Suffix length */
+ 			if (Bflag)
+ 				errx(EX_USAGE, "-a is incompatible with -B");
  			if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
  				errx(EX_USAGE,
  				    "%s: illegal suffix length", optarg);
  			break;
+ 		case 'B':		/* Byte count & compute sufflen. */
+ 			if (sufflen != -2)
+ 				errx(EX_USAGE, "-B is incompatible with -a");
+ 			Bflag = 1;
+ 			/* fall through */
  		case 'b':		/* Byte count. */
  			errno = 0;
  			if ((bytecnti = strtoimax(optarg, &ep, 10)) <= 0 ||
***************
*** 153,164 ****
--- 164,185 ----
  	argv += optind;
  	argc -= optind;
  
+ 	if (sufflen == -2)
+ 		sufflen = 2;
+ 
  	if (*argv != NULL)
  		if (ifd == -1) {		/* Input file. */
  			if (strcmp(*argv, "-") == 0)
  				ifd = STDIN_FILENO;
  			else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
  				err(EX_NOINPUT, "%s", *argv);
+ 			else if (Bflag && fstat(ifd, &istat) == 0 &&
+ 				 istat.st_size > 0) {
+ 				off_t nfiles =
+ 				(istat.st_size + bytecnt - 1) / bytecnt;
+ 				for (sufflen = 1; nfiles > 26; nfiles /= 26)
+ 					++sufflen;
+ 			}
  			++argv;
  		}
  	if (*argv != NULL)			/* File name prefix. */
***************
*** 349,355 ****
  usage(void)
  {
  	(void)fprintf(stderr,
! "usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n");
  	(void)fprintf(stderr,
  "             [file [prefix]]\n");
  	exit(EX_USAGE);
--- 370,376 ----
  usage(void)
  {
  	(void)fprintf(stderr,
! "usage: split [-a sufflen] [-{b|B} byte_count] [-l line_count] [-p pattern]\n");
  	(void)fprintf(stderr,
  "             [file [prefix]]\n");
  	exit(EX_USAGE);


More information about the freebsd-hackers mailing list