svn commit: r521891 - in head/deskutils/py-paperless: . files
Michael Gmelin
grembo at FreeBSD.org
Fri Jan 3 00:19:59 UTC 2020
Author: grembo
Date: Fri Jan 3 00:19:58 2020
New Revision: 521891
URL: https://svnweb.freebsd.org/changeset/ports/521891
Log:
Port back filename transformation feature from pull request
https://github.com/the-paperless-project/paperless/pull/542
Adapt man page to new default python version. Remove stale comment.
Added:
head/deskutils/py-paperless/files/patch-docs-guesswork.rst (contents, props changed)
head/deskutils/py-paperless/files/patch-src-documents-models.py (contents, props changed)
Modified:
head/deskutils/py-paperless/Makefile
head/deskutils/py-paperless/files/paperless.7.in
head/deskutils/py-paperless/files/patch-paperless.conf.example
head/deskutils/py-paperless/files/patch-src-paperless-settings.py
Modified: head/deskutils/py-paperless/Makefile
==============================================================================
--- head/deskutils/py-paperless/Makefile Thu Jan 2 23:58:27 2020 (r521890)
+++ head/deskutils/py-paperless/Makefile Fri Jan 3 00:19:58 2020 (r521891)
@@ -2,7 +2,7 @@
PORTNAME= paperless
PORTVERSION= 2.7.0
-PORTREVISION= 2
+PORTREVISION= 3
CATEGORIES= deskutils python
PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
@@ -108,7 +108,6 @@ do-install:
${STAGEDIR}/var/db/paperless/sqlite
${INSTALL_MAN} ${WRKDIR}/paperless.7 ${STAGEDIR}${MANPREFIX}/man/man7
-# Klammern aussenrum?
post-install-DOCS-on:
@${MKDIR} ${STAGEDIR}${DOCSDIR}/presentation
@cd ${WRKSRC}/docs/_build/html && \
Modified: head/deskutils/py-paperless/files/paperless.7.in
==============================================================================
--- head/deskutils/py-paperless/files/paperless.7.in Thu Jan 2 23:58:27 2020 (r521890)
+++ head/deskutils/py-paperless/files/paperless.7.in Fri Jan 3 00:19:58 2020 (r521891)
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd March 30, 2019
+.Dd January 3, 2020
.Dt PAPERLESS 7
.Os
.Sh NAME
@@ -130,7 +130,7 @@ web server, e.g., nginx + uwsgi.
.Pp
Install and configure uwsgi:
.Pp
-.Dl "pkg install uwsgi-py36"
+.Dl "pkg install uwsgi"
.Dl "mkdir -p %%PREFIX%%/etc/uwsgi"
.Dl "cp %%EXAMPLESDIR%%/uwsgi.ini \\"
.Dl " %%PREFIX%%/etc/uwsgi/paperless.ini"
Added: head/deskutils/py-paperless/files/patch-docs-guesswork.rst
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/deskutils/py-paperless/files/patch-docs-guesswork.rst Fri Jan 3 00:19:58 2020 (r521891)
@@ -0,0 +1,37 @@
+--- docs/guesswork.rst.orig 2019-01-27 13:48:05 UTC
++++ docs/guesswork.rst
+@@ -54,6 +54,34 @@ filename as described above.
+
+ .. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
+
++Transforming filenames for parsing
++----------------------------------
++Some devices can't produce filenames that can be parsed by the default
++parser. By configuring the option ``PAPERLESS_FILENAME_PARSE_TRANSFORMS`` in
++``paperless.conf`` one can add transformations that are applied to the filename
++before it's parsed.
++
++The option contains a list of dictionaries of regular expressions (key:
++``pattern``) and replacements (key: ``repl``) in JSON format, which are
++applied in order by passing them to ``re.subn``. Transformation stops
++after the first match, so at most one transformation is applied. The general
++syntax is
++
++.. code:: python
++
++ [{"pattern":"pattern1", "repl":"repl1"}, {"pattern":"pattern2", "repl":"repl2"}, ..., {"pattern":"patternN", "repl":"replN"}]
++
++The example below is for a Brother ADS-2400N, a scanner that allows
++different names to different hardware buttons (useful for handling
++multiple entities in one instance), but insists on adding ``_<count>``
++to the filename.
++
++.. code:: python
++
++ # Brother profile configuration, support "Name_Date_Count" (the default
++ # setting) and "Name_Count" (use "Name" as tag and "Count" as title).
++ PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}, {"pattern":"^([a-z]+)_([0-9]+)\\.", "repl":" - \\2 - \\1."}]
++
+ .. _guesswork-content:
+
+ Reading the Document Contents
Modified: head/deskutils/py-paperless/files/patch-paperless.conf.example
==============================================================================
--- head/deskutils/py-paperless/files/patch-paperless.conf.example Thu Jan 2 23:58:27 2020 (r521890)
+++ head/deskutils/py-paperless/files/patch-paperless.conf.example Fri Jan 3 00:19:58 2020 (r521891)
@@ -36,3 +36,27 @@
# To host paperless under a subpath url like example.com/paperless you set
# this value to /paperless. No trailing slash!
+@@ -135,6 +135,23 @@ PAPERLESS_EMAIL_SECRET=""
+ # as normal.
+ #PAPERLESS_FILENAME_DATE_ORDER="YMD"
+
++# Sometimes devices won't create filenames which can be parsed properly
++# by the filename parser (see
++# https://paperless.readthedocs.io/en/latest/guesswork.html).
++#
++# This setting allows to specify a list of transformations
++# in regular expression syntax, which are passed in order to re.sub.
++# Transformation stops after the first match, so at most one transformation
++# is applied.
++#
++# Syntax is a JSON array of dictionaries containing "pattern" and "repl"
++# as keys.
++#
++# The example below transforms filenames created by a Brother ADS-2400N
++# document scanner in its standard configuration `Name_Date_Count', so that
++# count is used as title, name as tag and date can be parsed by paperless.
++#PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}]
++
+ #
+ # The following values use sensible defaults for modern systems, but if you're
+ # running Paperless on a low-resource device (like a Raspberry Pi), modifying
Added: head/deskutils/py-paperless/files/patch-src-documents-models.py
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/deskutils/py-paperless/files/patch-src-documents-models.py Fri Jan 3 00:19:58 2020 (r521891)
@@ -0,0 +1,18 @@
+--- src/documents/models.py.orig 2019-01-27 13:48:05 UTC
++++ src/documents/models.py
+@@ -483,8 +483,14 @@ class FileInfo:
+ "<title>.<suffix>"
+ """
+
++ filename = os.path.basename(path)
++ for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
++ (filename, count) = pattern.subn(repl, filename)
++ if count:
++ break
++
+ for regex in cls.REGEXES.values():
+- m = regex.match(os.path.basename(path))
++ m = regex.match(filename)
+ if m:
+ properties = m.groupdict()
+ cls._mangle_property(properties, "created")
Modified: head/deskutils/py-paperless/files/patch-src-paperless-settings.py
==============================================================================
--- head/deskutils/py-paperless/files/patch-src-paperless-settings.py Thu Jan 2 23:58:27 2020 (r521890)
+++ head/deskutils/py-paperless/files/patch-src-paperless-settings.py Fri Jan 3 00:19:58 2020 (r521891)
@@ -1,6 +1,16 @@
--- src/paperless/settings.py.orig 2019-01-27 13:48:05 UTC
+++ src/paperless/settings.py
-@@ -104,7 +104,7 @@ MIDDLEWARE = [
+@@ -10,7 +10,9 @@ For the full list of settings and their
+ https://docs.djangoproject.com/en/1.10/ref/settings/
+ """
+
++import json
+ import os
++import re
+
+ from dotenv import load_dotenv
+
+@@ -102,7 +104,7 @@ MIDDLEWARE = [
]
# We allow CORS from localhost:8080
@@ -9,3 +19,19 @@
# If auth is disabled, we just use our "bypass" authentication middleware
if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")):
+@@ -314,6 +316,15 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_
+ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
+ FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
+
++# Transformations applied before filename parsing
++FILENAME_PARSE_TRANSFORMS = []
++_filename_parse_transforms = os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS")
++if _filename_parse_transforms:
++ FILENAME_PARSE_TRANSFORMS = [(
++ re.compile(t["pattern"]), t["repl"])
++ for t in json.loads(_filename_parse_transforms)
++ ]
++
+ # Specify for how many years a correspondent is considered recent. Recent
+ # correspondents will be shown in a separate "Recent correspondents" filter as
+ # well. Set to 0 to disable this filter.
More information about the svn-ports-all
mailing list