git: 75c3ca1ebf81 - main - split: add some tests

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Tue, 25 Oct 2022 15:06:29 UTC
The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=75c3ca1ebf81224919f2b7a9edf2921ce76ea290

commit 75c3ca1ebf81224919f2b7a9edf2921ce76ea290
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2022-08-23 02:08:03 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2022-10-25 15:05:24 +0000

    split: add some tests
    
    This should cover all of the basic functionality, as well as the recent
    enhancement to use a dynamic buffer size rather than limiting patterns
    and lines to MAXBSIZE.
    
    Reviewed by:    bapt
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D36324
---
 etc/mtree/BSD.tests.dist          |   2 +
 usr.bin/split/Makefile            |   5 +
 usr.bin/split/tests/Makefile      |   7 ++
 usr.bin/split/tests/split_test.sh | 213 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 227 insertions(+)

diff --git a/etc/mtree/BSD.tests.dist b/etc/mtree/BSD.tests.dist
index c83f720e1de3..1a00c13f78ba 100644
--- a/etc/mtree/BSD.tests.dist
+++ b/etc/mtree/BSD.tests.dist
@@ -1088,6 +1088,8 @@
         ..
         sort
         ..
+        split
+        ..
         stat
         ..
         tail
diff --git a/usr.bin/split/Makefile b/usr.bin/split/Makefile
index 6891f8bbb4d7..c366d574e090 100644
--- a/usr.bin/split/Makefile
+++ b/usr.bin/split/Makefile
@@ -1,8 +1,13 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/6/93
 # $FreeBSD$
 
+.include <src.opts.mk>
+
 PROG=	split
 
 LIBADD=	util
 
+HAS_TESTS=		yes
+SUBDIR.${MK_TESTS}+=	tests
+
 .include <bsd.prog.mk>
diff --git a/usr.bin/split/tests/Makefile b/usr.bin/split/tests/Makefile
new file mode 100644
index 000000000000..29ef04f8fa3b
--- /dev/null
+++ b/usr.bin/split/tests/Makefile
@@ -0,0 +1,7 @@
+# $FreeBSD$
+
+PACKAGE=	tests
+
+ATF_TESTS_SH+=	split_test
+
+.include <bsd.test.mk>
diff --git a/usr.bin/split/tests/split_test.sh b/usr.bin/split/tests/split_test.sh
new file mode 100755
index 000000000000..58f62c3d018f
--- /dev/null
+++ b/usr.bin/split/tests/split_test.sh
@@ -0,0 +1,213 @@
+#
+# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+#
+# Copyright (c) 2022 Klara Systems
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+
+# sys/param.h
+: ${MAXBSIZE:=65536}
+
+atf_test_case bytes
+bytes_body()
+{
+	printf "aaaa" > foo-aa
+	printf "bb\nc" > foo-ab
+	printf "ccc\n" > foo-ac
+
+	cat foo-* > foo
+	atf_check split -b 4 foo split-
+	atf_check -o file:foo-aa cat split-aa
+	atf_check -o file:foo-ab cat split-ab
+	atf_check -o file:foo-ac cat split-ac
+
+	# MAXBSIZE is the default buffer size, so we'll split at just a little
+	# bit past the buffer size to make sure that it still properly splits
+	# even when it needs to read again to hit the limit.
+	bsize=$((MAXBSIZE + 12))
+	rm foo-* foo
+	jot -ns "" -b "a" ${bsize} > foo-aa
+	jot -ns "" -b "b" ${bsize} > foo-ab
+	jot -ns "" -b "c" 12 > foo-ac
+
+	cat foo-* > foo
+	atf_check split -b ${bsize} foo split-
+	atf_check -o file:foo-aa cat split-aa
+	atf_check -o file:foo-ab cat split-ab
+	atf_check -o file:foo-ac cat split-ac
+}
+
+atf_test_case chunks
+chunks_body()
+{
+	jot -ns "" -b "a" 4096 > foo
+	jot -ns "" -b "b" 4096 >> foo
+	jot -ns "" -b "c" 4104 >> foo
+
+	chunks=3
+	jot -ns "" -b "a" 4096 > foo-aa
+	jot -ns "" -b "b" 2 >> foo-aa
+	jot -ns "" -b "b" 4094 > foo-ab
+	jot -ns "" -b "c" 4 >> foo-ab
+	jot -ns "" -b "c" 4100 > foo-ac
+
+	atf_check split -n ${chunks} foo split-
+	atf_check -o file:foo-aa cat split-aa
+	atf_check -o file:foo-ab cat split-ab
+	atf_check -o file:foo-ac cat split-ac
+}
+
+atf_test_case sensible_lines
+sensible_lines_body()
+{
+	echo "The quick brown fox" > foo-aa
+	echo "jumps over" > foo-ab
+	echo "the lazy dog" > foo-ac
+
+	cat foo-* > foo
+	atf_check split -l 1 foo split-
+	atf_check -o file:foo-aa cat split-aa
+	atf_check -o file:foo-ab cat split-ab
+	atf_check -o file:foo-ac cat split-ac
+
+	# Try again, make sure that `-` uses stdin as documented.
+	atf_check rm split-*
+	atf_check -x 'split -l 1 - split- < foo'
+	atf_check -o file:foo-aa cat split-aa
+	atf_check -o file:foo-ab cat split-ab
+	atf_check -o file:foo-ac cat split-ac
+
+	# Finally, try with -l == 2; we should see a 2/1 split instead of the
+	# previous 1/1/1.
+	cat foo-aa foo-ab > foo-aa-ng
+	cat foo-ac > foo-ab-ng
+
+	atf_check rm split-*
+	atf_check split -l 2 foo split-
+
+	atf_check -o file:foo-aa-ng cat split-aa
+	atf_check -o file:foo-ab-ng cat split-ab
+}
+
+atf_test_case long_lines
+long_lines_body()
+{
+
+	# Test file lines will be:
+	# a x MAXBSIZE
+	# b x MAXBSIZE + c x MAXBSIZE
+	# d x 1024
+	#
+	# The historical split(1) implementation wouldn't grow its internal
+	# buffer, so we'd end up with 2/3 split- files being wrong with -l 1.
+	# Notably, split-aa would include most of the first two lines, split-ab
+	# a tiny fraction of the second line, and split-ac the third line.
+	#
+	# Recent split(1) instead grows the buffer until we can either fit the
+	# line or we run out of memory.
+	jot -s "" -b "a" ${MAXBSIZE} > foo-aa
+	jot -ns "" -b "b" ${MAXBSIZE} > foo-ab
+	jot -s "" -b "c" ${MAXBSIZE} >> foo-ab
+	jot -s "" -b "d" 1024 > foo-ac
+
+	cat foo-* > foo
+	atf_check split -l 1 foo split-
+
+	atf_check -o file:foo-aa cat split-aa
+	atf_check -o file:foo-ab cat split-ab
+	atf_check -o file:foo-ac cat split-ac
+}
+
+atf_test_case numeric_suffix
+numeric_suffix_body()
+{
+	echo "The quick brown fox" > foo-00
+	echo "jumps over" > foo-01
+	echo "the lazy dog" > foo-02
+
+	cat foo-* > foo
+	atf_check split -d -l 1 foo split-
+
+	atf_check -o file:foo-00 cat split-00
+	atf_check -o file:foo-01 cat split-01
+	atf_check -o file:foo-02 cat split-02
+}
+
+atf_test_case larger_suffix_length
+larger_suffix_length_body()
+{
+	:> foo
+
+	# Generate foo-000 through foo-009, then foo-010 and foo-011
+	for i in $(seq -w 0 11); do
+		len=$((${i##0} + 1))
+		file="foo-0${i}"
+		jot -s "" -b "a" ${len} > ${file}
+		cat ${file} >> foo
+	done
+
+	atf_check split -a 3 -d -l 1 foo split-
+	for i in $(seq -w 0 11); do
+		srcfile="foo-0${i}"
+		splitfile="split-0${i}"
+		atf_check -o file:"${srcfile}" cat "${splitfile}"
+	done
+}
+
+atf_test_case pattern
+pattern_body()
+{
+
+	# Some fake yaml gives us a good realistic use-case for -p, as we can
+	# split on top-level stanzas.
+	cat <<EOF > foo-aa
+cat:
+  aa: true
+  ab: true
+  ac: true
+EOF
+	cat <<EOF > foo-ab
+dog:
+  ba: true
+  bb: true
+  bc: true
+EOF
+
+	cat foo-* > foo
+
+	atf_check split -p "^[^[:space:]]+:" foo split-
+	atf_check -o file:foo-aa cat split-aa
+	atf_check -o file:foo-ab cat split-ab
+}
+
+atf_init_test_cases()
+{
+	atf_add_test_case bytes
+	atf_add_test_case chunks
+	atf_add_test_case sensible_lines
+	atf_add_test_case long_lines
+	atf_add_test_case numeric_suffix
+	atf_add_test_case larger_suffix_length
+	atf_add_test_case pattern
+}