svn commit: r486909 - in head/textproc: . ucto ucto/files uctodata
Yuri Victorovich
yuri at FreeBSD.org
Sat Dec 8 04:26:48 UTC 2018
Author: yuri
Date: Sat Dec 8 04:26:45 2018
New Revision: 486909
URL: https://svnweb.freebsd.org/changeset/ports/486909
Log:
New ports: textproc/ucto, textproc/uctodata: Advanced rule-based (regular-expression) and unicode-aware tokenizer and its data port
Added:
head/textproc/ucto/
head/textproc/ucto/Makefile (contents, props changed)
head/textproc/ucto/distinfo (contents, props changed)
head/textproc/ucto/files/
head/textproc/ucto/files/patch-config_Makefile.am (contents, props changed)
head/textproc/ucto/pkg-descr (contents, props changed)
head/textproc/ucto/pkg-plist (contents, props changed)
head/textproc/uctodata/
head/textproc/uctodata/Makefile (contents, props changed)
head/textproc/uctodata/distinfo (contents, props changed)
head/textproc/uctodata/pkg-descr (contents, props changed)
head/textproc/uctodata/pkg-plist (contents, props changed)
Modified:
head/textproc/Makefile
Modified: head/textproc/Makefile
==============================================================================
--- head/textproc/Makefile Sat Dec 8 00:48:00 2018 (r486908)
+++ head/textproc/Makefile Sat Dec 8 04:26:45 2018 (r486909)
@@ -1789,6 +1789,8 @@
SUBDIR += txt2man
SUBDIR += txt2tags
SUBDIR += uchardet
+ SUBDIR += ucto
+ SUBDIR += uctodata
SUBDIR += uim
SUBDIR += uim-el
SUBDIR += uim-gtk
Added: head/textproc/ucto/Makefile
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/ucto/Makefile Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,33 @@
+# $FreeBSD$
+
+PORTNAME= ucto
+DISTVERSIONPREFIX= v
+DISTVERSION= 0.14
+CATEGORIES= textproc
+
+MAINTAINER= yuri at FreeBSD.org
+COMMENT= Advanced rule-based (regular-expression) and unicode-aware tokenizer
+
+LICENSE= APACHE20
+LICENSE_FILE= ${WRKSRC}/COPYING
+
+BUILD_DEPENDS= autoconf-archive>0:devel/autoconf-archive \
+ uctodata>0:textproc/uctodata
+LIB_DEPENDS= libexttextcat-2.0.so:textproc/libexttextcat \
+ libfolia.so:textproc/libfolia \
+ libicuio.so:devel/icu \
+ libomp.so:devel/openmp \
+ libticcutils.so:devel/ticcutils
+RUN_DEPENDS= uctodata>0:textproc/uctodata
+
+USES= autoreconf gmake gnome libedit libtool pkgconfig readline
+GNU_CONFIGURE= yes
+CONFIGURE_ARGS= --disable-static
+USE_GITHUB= yes
+GH_ACCOUNT= LanguageMachines
+USE_GNOME= libxml2
+USE_LDCONFIG= yes
+
+INSTALL_TARGET= install-strip
+
+.include <bsd.port.mk>
Added: head/textproc/ucto/distinfo
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/ucto/distinfo Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,3 @@
+TIMESTAMP = 1544204678
+SHA256 (LanguageMachines-ucto-v0.14_GH0.tar.gz) = ba40c28b0baba4eef98f88abc7c894a4b6fbaf153eaacd2ea3c9c177b0e85ea5
+SIZE (LanguageMachines-ucto-v0.14_GH0.tar.gz) = 350837
Added: head/textproc/ucto/files/patch-config_Makefile.am
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/ucto/files/patch-config_Makefile.am Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,12 @@
+--- config/Makefile.am.orig 2018-12-08 03:11:07 UTC
++++ config/Makefile.am
+@@ -7,7 +7,7 @@ EXTRA_DIST = $(config_DATA)
+ install-data-hook:
+ rm -f $(configdir)/textcat.cfg
+ if OLD_LM
+- $(LN_S) $(configdir)/textcat_alt.cfg $(configdir)/textcat.cfg
++ cd $(DESTDIR)$(configdir) && $(LN_S) textcat_alt.cfg textcat.cfg
+ else
+- $(LN_S) $(configdir)/textcat_normal.cfg $(configdir)/textcat.cfg
++ cd $(DESTDIR)$(configdir) && $(LN_S) textcat_normal.cfg textcat.cfg
+ endif
Added: head/textproc/ucto/pkg-descr
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/ucto/pkg-descr Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,10 @@
+Ucto tokenizes text files: it separates words from punctuation, and splits
+sentences. It offers several other basic preprocessing steps such as changing
+case that you can all use to make your text suited for further processing such
+as indexing, part-of-speech tagging, or machine translation.
+
+Ucto comes with tokenisation rules for several languages and can be easily
+extended to suit other languages. It has been incorporated for tokenizing Dutch
+text in Frog, our Dutch morpho-syntactic processor.
+
+WWW: https://languagemachines.github.io/ucto/
Added: head/textproc/ucto/pkg-plist
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/ucto/pkg-plist Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,12 @@
+bin/ucto
+include/ucto/my_textcat.h
+include/ucto/setting.h
+include/ucto/tokenize.h
+lib/libucto.so
+lib/libucto.so.3
+lib/libucto.so.%%GTK3_VERSION%%
+libdata/pkgconfig/ucto.pc
+man/man1/ucto.1.gz
+%%DATADIR%%/textcat.cfg
+%%DATADIR%%/textcat_alt.cfg
+%%DATADIR%%/textcat_normal.cfg
Added: head/textproc/uctodata/Makefile
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/uctodata/Makefile Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,23 @@
+# $FreeBSD$
+
+PORTNAME= uctodata
+DISTVERSIONPREFIX= v
+DISTVERSION= 0.8
+CATEGORIES= textproc
+
+MAINTAINER= yuri at FreeBSD.org
+COMMENT= Datafiles for the tokenizer 'ucto'
+
+LICENSE= APACHE20
+LICENSE_FILE= ${WRKSRC}/COPYING
+
+USES= autoreconf gmake
+GNU_CONFIGURE= yes
+USE_GITHUB= yes
+GH_ACCOUNT= LanguageMachines
+
+NO_ARCH= yes
+
+DATADIR= ${PREFIX}/share/ucto
+
+.include <bsd.port.mk>
Added: head/textproc/uctodata/distinfo
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/uctodata/distinfo Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,3 @@
+TIMESTAMP = 1544225721
+SHA256 (LanguageMachines-uctodata-v0.8_GH0.tar.gz) = a8e5e69696facbd2c2251406560762cf7f4817620179e4a8ee8d241cf0371a5e
+SIZE (LanguageMachines-uctodata-v0.8_GH0.tar.gz) = 37527
Added: head/textproc/uctodata/pkg-descr
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/uctodata/pkg-descr Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,4 @@
+Datafiles for ucto, the rule-based tokenization package that is used to
+parse texts in different languages.
+
+WWW: https://languagemachines.github.io/ucto/
Added: head/textproc/uctodata/pkg-plist
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/textproc/uctodata/pkg-plist Sat Dec 8 04:26:45 2018 (r486909)
@@ -0,0 +1,33 @@
+libdata/pkgconfig/uctodata.pc
+%%DATADIR%%/e-mail.rule
+%%DATADIR%%/exotic-eos.eos
+%%DATADIR%%/exotic-quotes.quote
+%%DATADIR%%/fra.abr
+%%DATADIR%%/fry.abr
+%%DATADIR%%/ligatures.filter
+%%DATADIR%%/nld_afk.abr
+%%DATADIR%%/por.abr
+%%DATADIR%%/rus.abr
+%%DATADIR%%/smiley.rule
+%%DATADIR%%/spa.abr
+%%DATADIR%%/standard-eos.eos
+%%DATADIR%%/standard-quotes.quote
+%%DATADIR%%/swe.abr
+%%DATADIR%%/tokconfig-deu
+%%DATADIR%%/tokconfig-eng
+%%DATADIR%%/tokconfig-fra
+%%DATADIR%%/tokconfig-fry
+%%DATADIR%%/tokconfig-generic
+%%DATADIR%%/tokconfig-ita
+%%DATADIR%%/tokconfig-nld
+%%DATADIR%%/tokconfig-nld-historical
+%%DATADIR%%/tokconfig-nld-sonarchat
+%%DATADIR%%/tokconfig-nld-twitter
+%%DATADIR%%/tokconfig-nld-withplaceholder
+%%DATADIR%%/tokconfig-por
+%%DATADIR%%/tokconfig-rus
+%%DATADIR%%/tokconfig-spa
+%%DATADIR%%/tokconfig-swe
+%%DATADIR%%/tokconfig-tur
+%%DATADIR%%/tur.abr
+%%DATADIR%%/url.rule
More information about the svn-ports-head
mailing list