git: f8eedac61802 - main - textproc/py-segments: New port: Unicode Standard tokenization routines
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 25 Aug 2025 06:18:24 UTC
The branch main has been updated by yuri:
URL: https://cgit.FreeBSD.org/ports/commit/?id=f8eedac618025136dd2393ab7edebe38f94ef7da
commit f8eedac618025136dd2393ab7edebe38f94ef7da
Author: Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2025-08-25 06:14:18 +0000
Commit: Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2025-08-25 06:18:18 +0000
textproc/py-segments: New port: Unicode Standard tokenization routines
---
textproc/Makefile | 1 +
textproc/py-segments/Makefile | 28 ++++++++++++++++++++++++++++
textproc/py-segments/distinfo | 3 +++
textproc/py-segments/pkg-descr | 3 +++
4 files changed, 35 insertions(+)
diff --git a/textproc/Makefile b/textproc/Makefile
index 1d744ad9e0d0..e677728df7ef 100644
--- a/textproc/Makefile
+++ b/textproc/Makefile
@@ -1601,6 +1601,7 @@
SUBDIR += py-rst2html5
SUBDIR += py-sacremoses
SUBDIR += py-scour
+ SUBDIR += py-segments
SUBDIR += py-segno
SUBDIR += py-sense2vec
SUBDIR += py-sentencepiece
diff --git a/textproc/py-segments/Makefile b/textproc/py-segments/Makefile
new file mode 100644
index 000000000000..09aa07b358e8
--- /dev/null
+++ b/textproc/py-segments/Makefile
@@ -0,0 +1,28 @@
+PORTNAME= segments
+DISTVERSION= 2.3.0
+CATEGORIES= textproc python
+MASTER_SITES= PYPI
+PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER= yuri@FreeBSD.org
+COMMENT= Unicode Standard tokenization routines
+WWW= https://github.com/cldf/segments
+
+LICENSE= APACHE20
+LICENSE_FILE= ${WRKSRC}/LICENSE
+
+BUILD_DEPENDS= ${PY_SETUPTOOLS} \
+ ${PYTHON_PKGNAMEPREFIX}wheel>0:devel/py-wheel@${PY_FLAVOR}
+RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}csvw>=1.5.6:misc/py-csvw@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}regex>0:textproc/py-regex@${PY_FLAVOR}
+TEST_DEPENDS= ${PYTHON_PKGNAMEPREFIX}pytest-cov>0:devel/py-pytest-cov@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pytest-mock>0:devel/py-pytest-mock@${PY_FLAVOR}
+
+USES= python
+USE_PYTHON= pep517 autoplist pytest # testcase fail to find some fixtures
+
+TEST_ENV= ${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}
+
+NO_ARCH= yes
+
+.include <bsd.port.mk>
diff --git a/textproc/py-segments/distinfo b/textproc/py-segments/distinfo
new file mode 100644
index 000000000000..47a8b6b71c9e
--- /dev/null
+++ b/textproc/py-segments/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1756084039
+SHA256 (segments-2.3.0.tar.gz) = 381143f66f59eaf45398f5bb57f899d6501be011048ec5f92754c9b24b181615
+SIZE (segments-2.3.0.tar.gz) = 18193
diff --git a/textproc/py-segments/pkg-descr b/textproc/py-segments/pkg-descr
new file mode 100644
index 000000000000..1dc8d1ad243d
--- /dev/null
+++ b/textproc/py-segments/pkg-descr
@@ -0,0 +1,3 @@
+The segments package provides Unicode Standard tokenization routines and
+orthography segmentation, implementing the linear algorithm described in
+the orthography profile specification from The Unicode Cookbook.