git: f8eedac61802 - main - textproc/py-segments: New port: Unicode Standard tokenization routines

From: Yuri Victorovich <yuri_at_FreeBSD.org>
Date: Mon, 25 Aug 2025 06:18:24 UTC
The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=f8eedac618025136dd2393ab7edebe38f94ef7da

commit f8eedac618025136dd2393ab7edebe38f94ef7da
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2025-08-25 06:14:18 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2025-08-25 06:18:18 +0000

    textproc/py-segments: New port: Unicode Standard tokenization routines
---
 textproc/Makefile              |  1 +
 textproc/py-segments/Makefile  | 28 ++++++++++++++++++++++++++++
 textproc/py-segments/distinfo  |  3 +++
 textproc/py-segments/pkg-descr |  3 +++
 4 files changed, 35 insertions(+)

diff --git a/textproc/Makefile b/textproc/Makefile
index 1d744ad9e0d0..e677728df7ef 100644
--- a/textproc/Makefile
+++ b/textproc/Makefile
@@ -1601,6 +1601,7 @@
     SUBDIR += py-rst2html5
     SUBDIR += py-sacremoses
     SUBDIR += py-scour
+    SUBDIR += py-segments
     SUBDIR += py-segno
     SUBDIR += py-sense2vec
     SUBDIR += py-sentencepiece
diff --git a/textproc/py-segments/Makefile b/textproc/py-segments/Makefile
new file mode 100644
index 000000000000..09aa07b358e8
--- /dev/null
+++ b/textproc/py-segments/Makefile
@@ -0,0 +1,28 @@
+PORTNAME=	segments
+DISTVERSION=	2.3.0
+CATEGORIES=	textproc python
+MASTER_SITES=	PYPI
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Unicode Standard tokenization routines
+WWW=		https://github.com/cldf/segments
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+BUILD_DEPENDS=	${PY_SETUPTOOLS} \
+		${PYTHON_PKGNAMEPREFIX}wheel>0:devel/py-wheel@${PY_FLAVOR}
+RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}csvw>=1.5.6:misc/py-csvw@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}regex>0:textproc/py-regex@${PY_FLAVOR}
+TEST_DEPENDS=	${PYTHON_PKGNAMEPREFIX}pytest-cov>0:devel/py-pytest-cov@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pytest-mock>0:devel/py-pytest-mock@${PY_FLAVOR}
+
+USES=		python
+USE_PYTHON=	pep517 autoplist pytest # testcase fail to find some fixtures
+
+TEST_ENV=	${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}
+
+NO_ARCH=	yes
+
+.include <bsd.port.mk>
diff --git a/textproc/py-segments/distinfo b/textproc/py-segments/distinfo
new file mode 100644
index 000000000000..47a8b6b71c9e
--- /dev/null
+++ b/textproc/py-segments/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1756084039
+SHA256 (segments-2.3.0.tar.gz) = 381143f66f59eaf45398f5bb57f899d6501be011048ec5f92754c9b24b181615
+SIZE (segments-2.3.0.tar.gz) = 18193
diff --git a/textproc/py-segments/pkg-descr b/textproc/py-segments/pkg-descr
new file mode 100644
index 000000000000..1dc8d1ad243d
--- /dev/null
+++ b/textproc/py-segments/pkg-descr
@@ -0,0 +1,3 @@
+The segments package provides Unicode Standard tokenization routines and
+orthography segmentation, implementing the linear algorithm described in
+the orthography profile specification from The Unicode Cookbook.