git: 0532aa392a24 - main - www/py-htmldate: Add py-htmldate 1.7.0

From: Po-Chuan Hsieh <sunpoet_at_FreeBSD.org>
Date: Wed, 21 Feb 2024 15:17:31 UTC
The branch main has been updated by sunpoet:

URL: https://cgit.FreeBSD.org/ports/commit/?id=0532aa392a247337f8afcc9c58025347d9fff64c

commit 0532aa392a247337f8afcc9c58025347d9fff64c
Author:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
AuthorDate: 2024-02-21 14:13:47 +0000
Commit:     Po-Chuan Hsieh <sunpoet@FreeBSD.org>
CommitDate: 2024-02-21 15:06:10 +0000

    www/py-htmldate: Add py-htmldate 1.7.0
    
    htmldate finds original and updated publication dates of any web page. From the
    command-line or within Python, all the steps needed from web page download to
    HTML parsing, scraping, and text analysis are included.
---
 www/Makefile                         |  1 +
 www/py-htmldate/Makefile             | 26 ++++++++++++++++++++++++++
 www/py-htmldate/distinfo             |  3 +++
 www/py-htmldate/files/patch-setup.py | 11 +++++++++++
 www/py-htmldate/pkg-descr            |  3 +++
 5 files changed, 44 insertions(+)

diff --git a/www/Makefile b/www/Makefile
index 44c67c184f35..8d99027b44c3 100644
--- a/www/Makefile
+++ b/www/Makefile
@@ -1721,6 +1721,7 @@
     SUBDIR += py-html3
     SUBDIR += py-html5-parser
     SUBDIR += py-html5lib
+    SUBDIR += py-htmldate
     SUBDIR += py-httmock
     SUBDIR += py-http-parser
     SUBDIR += py-httpbin
diff --git a/www/py-htmldate/Makefile b/www/py-htmldate/Makefile
new file mode 100644
index 000000000000..2e44731e6ea0
--- /dev/null
+++ b/www/py-htmldate/Makefile
@@ -0,0 +1,26 @@
+PORTNAME=	htmldate
+PORTVERSION=	1.7.0
+CATEGORIES=	www python
+MASTER_SITES=	PYPI
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	sunpoet@FreeBSD.org
+COMMENT=	Fast and robust extraction of publication dates from URLs and web pages
+WWW=		https://htmldate.readthedocs.io/en/latest/ \
+		https://github.com/adbar/htmldate
+
+LICENSE=	GPLv3+
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}charset-normalizer>=3.3.2:textproc/py-charset-normalizer@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}dateparser>=1.1.2:devel/py-dateparser@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}lxml>=4.9.3<6:devel/py-lxml@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}dateutil>=2.8.2:devel/py-dateutil@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}urllib3>=1.26,1<3,1:net/py-urllib3@${PY_FLAVOR}
+
+USES=		python
+USE_PYTHON=	autoplist concurrent distutils
+
+NO_ARCH=	yes
+
+.include <bsd.port.mk>
diff --git a/www/py-htmldate/distinfo b/www/py-htmldate/distinfo
new file mode 100644
index 000000000000..010b81ad6da7
--- /dev/null
+++ b/www/py-htmldate/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1708448862
+SHA256 (htmldate-1.7.0.tar.gz) = 02a800dd224cbf74bf483b042f64e14f57ba0e40c6b4404b284e98bc6c30b68d
+SIZE (htmldate-1.7.0.tar.gz) = 53992
diff --git a/www/py-htmldate/files/patch-setup.py b/www/py-htmldate/files/patch-setup.py
new file mode 100644
index 000000000000..1a2e3831b98c
--- /dev/null
+++ b/www/py-htmldate/files/patch-setup.py
@@ -0,0 +1,11 @@
+--- setup.py.orig	2024-01-17 16:57:16 UTC
++++ setup.py
+@@ -120,7 +120,7 @@ setup(
+         "dateparser >= 1.1.2",  # 1.1.3+ slower
+         # see tests on Github Actions
+         "lxml == 4.9.2; platform_system == 'Darwin' and python_version <= '3.8'",
+-        "lxml >= 4.9.4, < 6; platform_system != 'Darwin' or python_version > '3.8'",
++        "lxml >= 4.9.3, < 6; platform_system != 'Darwin' or python_version > '3.8'",
+         "python-dateutil >= 2.8.2",
+         "urllib3 >= 1.26, < 2; python_version < '3.7'",
+         "urllib3 >= 1.26, < 3; python_version >= '3.7'",
diff --git a/www/py-htmldate/pkg-descr b/www/py-htmldate/pkg-descr
new file mode 100644
index 000000000000..0d27dc7f96f5
--- /dev/null
+++ b/www/py-htmldate/pkg-descr
@@ -0,0 +1,3 @@
+htmldate finds original and updated publication dates of any web page. From the
+command-line or within Python, all the steps needed from web page download to
+HTML parsing, scraping, and text analysis are included.