git: 5ef5e58a9051 - main - textproc/py-pdf2docx: New port: Library and applicaton to convert pdf to docx

From: Yuri Victorovich <yuri_at_FreeBSD.org>
Date: Mon, 11 Sep 2023 03:40:22 UTC
The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=5ef5e58a9051aad258aa948376badacc97ef130c

commit 5ef5e58a9051aad258aa948376badacc97ef130c
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2023-09-11 03:39:34 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2023-09-11 03:40:15 +0000

    textproc/py-pdf2docx: New port: Library and applicaton to convert pdf to docx
---
 textproc/Makefile              |  1 +
 textproc/py-pdf2docx/Makefile  | 29 +++++++++++++++++++++++++++++
 textproc/py-pdf2docx/distinfo  |  3 +++
 textproc/py-pdf2docx/pkg-descr |  6 ++++++
 4 files changed, 39 insertions(+)

diff --git a/textproc/Makefile b/textproc/Makefile
index 4394d47e183c..08f9879c5656 100644
--- a/textproc/Makefile
+++ b/textproc/Makefile
@@ -1467,6 +1467,7 @@
     SUBDIR += py-parsimonious
     SUBDIR += py-parso
     SUBDIR += py-patiencediff
+    SUBDIR += py-pdf2docx
     SUBDIR += py-pdfminer.six
     SUBDIR += py-pdfminer3k
     SUBDIR += py-pdfrw
diff --git a/textproc/py-pdf2docx/Makefile b/textproc/py-pdf2docx/Makefile
new file mode 100644
index 000000000000..e7244113b971
--- /dev/null
+++ b/textproc/py-pdf2docx/Makefile
@@ -0,0 +1,29 @@
+PORTNAME=	pdf2docx
+DISTVERSION=	0.5.6
+CATEGORIES=	textproc python
+MASTER_SITES=	PYPI
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Library and applicaton to convert pdf to docx
+WWW=		https://github.com/dothinking/pdf2docx
+
+LICENSE=	GPLv3
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+PY_DEPENDS=	${PYTHON_PKGNAMEPREFIX}fire>=0.3.0:devel/py-fire@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}PyMuPDF>=1.19.0:print/py-PyMuPDF@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}fonttools>=4.24.0:print/py-fonttools@${PY_FLAVOR} \
+		${PYNUMPY} \
+		${PYTHON_PKGNAMEPREFIX}python-docx>=0.8.10:textproc/py-python-docx@${PY_FLAVOR} \
+		opencv>0:graphics/opencv
+BUILD_DEPENDS=	${PY_DEPENDS} \
+		${PYTHON_PKGNAMEPREFIX}pip>0:devel/py-pip@${PY_FLAVOR}
+RUN_DEPENDS=	${PY_DEPENDS}
+
+USES=		python
+USE_PYTHON=	distutils concurrent autoplist
+
+NO_ARCH=	yes
+
+.include <bsd.port.mk>
diff --git a/textproc/py-pdf2docx/distinfo b/textproc/py-pdf2docx/distinfo
new file mode 100644
index 000000000000..91cc3c3f364c
--- /dev/null
+++ b/textproc/py-pdf2docx/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1694388825
+SHA256 (pdf2docx-0.5.6.tar.gz) = 364c8b78a6c1cfd96a1d2c659b06ba90fc116535c998e398641da80a8b5262c2
+SIZE (pdf2docx-0.5.6.tar.gz) = 3085029
diff --git a/textproc/py-pdf2docx/pkg-descr b/textproc/py-pdf2docx/pkg-descr
new file mode 100644
index 000000000000..b0d2d9e319a8
--- /dev/null
+++ b/textproc/py-pdf2docx/pkg-descr
@@ -0,0 +1,6 @@
+pdf2docx is a Python program and library to convert pdf to docx.
+
+pdf2docx allows to:
+* Extract data from PDF with PyMuPDF, e.g. text, images and drawings
+* Parse layout with rule, e.g. sections, paragraphs, images and tables
+* Generate docx with python-docx