git: 21a02f42b641 - main - converters/simdutf: Unicode validation and transcoding with SIMD

From: Robert Clausecker <fuz_at_FreeBSD.org>
Date: Mon, 23 Oct 2023 01:53:32 UTC
The branch main has been updated by fuz:

URL: https://cgit.FreeBSD.org/ports/commit/?id=21a02f42b64150353438318eae45aefb66d51b4d

commit 21a02f42b64150353438318eae45aefb66d51b4d
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2023-10-20 17:07:54 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2023-10-23 01:51:27 +0000

    converters/simdutf: Unicode validation and transcoding with SIMD
    
    This library provide fast Unicode functions such as
    
     - ASCII, UTF-8, UTF-16LE/BE and UTF-32 validation, with and without
       error identification,
     - transcoding between each of Latin1, UTF-8, UTF-16LE/BE, and UTF-32,
       with and without validation, with and without error identification
     - From an UTF-8 string, compute the size of the Latin1/UTF-16/UTF-32
       equivalent string,
     - From an UTF-16LE/BE string, compute the size of the
       Latin1/UTF-8/UTF-32 equivalent string,
     - From an UTF-32 string, compute the size of the UTF-8 or UTF-16LE
       equivalent string,
     - UTF-8 and UTF-16LE/BE character counting.
     - UTF-16 endianness change (UTF16-LE/BE to UTF-16-BE/LE)
    
    The functions are accelerated using SIMD instructions (e.g., ARM NEON,
    SSE, AVX, AVX-512, etc.). When your strings contain hundreds of
    characters, we can often transcode them at speeds exceeding a billion
    characters per second. You should expect high speeds not only with
    English strings (ASCII) but also Chinese, Japanese, Arabic, and so
    forth. We handle the full character range (including, for example,
    emojis).
    
    The library compiles down to a small library of a few hundred kilobytes.
    Our functions are exception-free and non allocating. We have extensive
    tests and extensive benchmarks.
    
    WWW: https://simdutf.github.io/simdutf/
---
 converters/Makefile          |  1 +
 converters/simdutf/Makefile  | 38 ++++++++++++++++++++++++++++++++++++++
 converters/simdutf/distinfo  |  3 +++
 converters/simdutf/pkg-descr | 26 ++++++++++++++++++++++++++
 converters/simdutf/pkg-plist | 18 ++++++++++++++++++
 5 files changed, 86 insertions(+)

diff --git a/converters/Makefile b/converters/Makefile
index 3782f6bcdac8..7b40eac0f7ae 100644
--- a/converters/Makefile
+++ b/converters/Makefile
@@ -168,6 +168,7 @@
     SUBDIR += rubygem-url_safe_base64
     SUBDIR += rubygem-xdr
     SUBDIR += showkey
+    SUBDIR += simdutf
     SUBDIR += ta2as
     SUBDIR += tnef
     SUBDIR += trans
diff --git a/converters/simdutf/Makefile b/converters/simdutf/Makefile
new file mode 100644
index 000000000000..947a871a07f6
--- /dev/null
+++ b/converters/simdutf/Makefile
@@ -0,0 +1,38 @@
+PORTNAME=	simdutf
+DISTVERSIONPREFIX=	v
+DISTVERSION=	4.0.0
+CATEGORIES=	converters textproc
+
+MAINTAINER=	fuz@FreeBSD.org
+COMMENT=	Unicode validation and transcoding with SIMD
+WWW=		https://simdutf.github.io/simdutf/
+
+LICENSE=	APACHE20 MIT
+LICENSE_COMB=	dual
+LICENSE_FILE_APACHE20=	${WRKSRC}/LICENSE-APACHE
+LICENSE_FILE_MIT=	${WRKSRC}/LICENSE-MIT
+
+USES=		cmake:testing
+USE_GITHUB=	yes
+USE_LDCONFIG=	yes
+
+CMAKE_ON=	BUILD_SHARED_LIBS
+
+OPTIONS_DEFINE=	BENCHMARKS ICONV TEST TOOLS
+OPTIONS_DEFAULT=	ICONV TOOLS
+OPTIONS_SUB=		yes
+BENCHMARKS_DESC=	Build benchmarks
+TOOLS_DESC=		Build tools
+
+BENCHMARKS_IMPLIES=	TEST
+BENCHMARKS_LIB_DEPENDS=	libicuuc.so:devel/icu
+BENCHMARKS_CMAKE_BOOL=	SIMDUTF_BENCHMARKS
+ICONV_USES=	iconv
+ICONV_CMAKE_BOOL=	SIMDUTF_ICONV
+TEST_CMAKE_BOOL=	SIMDUTF_TESTS
+TOOLS_CMAKE_BOOL=	SIMDUTF_TOOLS
+
+pre-test-TEST-off:
+	@echo Enable option TEST to run the full test suite
+
+.include <bsd.port.mk>
diff --git a/converters/simdutf/distinfo b/converters/simdutf/distinfo
new file mode 100644
index 000000000000..ef9cc66d6552
--- /dev/null
+++ b/converters/simdutf/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1697849191
+SHA256 (simdutf-simdutf-v4.0.0_GH0.tar.gz) = 1a84ea8a24396ea410d1c88d3126f95956a8799d8eaea0e03dc721e7c65ff9b3
+SIZE (simdutf-simdutf-v4.0.0_GH0.tar.gz) = 1864807
diff --git a/converters/simdutf/pkg-descr b/converters/simdutf/pkg-descr
new file mode 100644
index 000000000000..d8319ef380ee
--- /dev/null
+++ b/converters/simdutf/pkg-descr
@@ -0,0 +1,26 @@
+This library provide fast Unicode functions such as
+
+ - ASCII, UTF-8, UTF-16LE/BE and UTF-32 validation, with and without
+   error identification,
+ - transcoding between each of Latin1, UTF-8, UTF-16LE/BE, and UTF-32,
+   with and without validation, with and without error identification
+ - From an UTF-8 string, compute the size of the Latin1/UTF-16/UTF-32
+   equivalent string,
+ - From an UTF-16LE/BE string, compute the size of the
+   Latin1/UTF-8/UTF-32 equivalent string,
+ - From an UTF-32 string, compute the size of the UTF-8 or UTF-16LE
+   equivalent string,
+ - UTF-8 and UTF-16LE/BE character counting.
+ - UTF-16 endianness change (UTF16-LE/BE to UTF-16-BE/LE)
+
+The functions are accelerated using SIMD instructions (e.g., ARM NEON,
+SSE, AVX, AVX-512, etc.). When your strings contain hundreds of
+characters, we can often transcode them at speeds exceeding a billion
+characters per second. You should expect high speeds not only with
+English strings (ASCII) but also Chinese, Japanese, Arabic, and so
+forth. We handle the full character range (including, for example,
+emojis).
+
+The library compiles down to a small library of a few hundred kilobytes.
+Our functions are exception-free and non allocating. We have extensive
+tests and extensive benchmarks.
diff --git a/converters/simdutf/pkg-plist b/converters/simdutf/pkg-plist
new file mode 100644
index 000000000000..4c801e507c91
--- /dev/null
+++ b/converters/simdutf/pkg-plist
@@ -0,0 +1,18 @@
+%%TOOLS%%bin/sutf
+include/simdutf.h
+include/simdutf/avx512.h
+include/simdutf/common_defs.h
+include/simdutf/compiler_check.h
+include/simdutf/encoding_types.h
+include/simdutf/error.h
+include/simdutf/implementation.h
+include/simdutf/internal/isadetection.h
+include/simdutf/portability.h
+include/simdutf/simdutf_version.h
+lib/cmake/simdutf/simdutf-config-version.cmake
+lib/cmake/simdutf/simdutf-config.cmake
+lib/cmake/simdutf/simdutfTargets-%%CMAKE_BUILD_TYPE%%.cmake
+lib/cmake/simdutf/simdutfTargets.cmake
+lib/libsimdutf.so.5.0.0
+lib/libsimdutf.so.5
+lib/libsimdutf.so