git: c5c128b186be - main - math/py-annoy: New port: Approximate Nearest Neighbors in C++

From: Yuri Victorovich <yuri_at_FreeBSD.org>
Date: Thu, 12 Jan 2023 05:25:28 UTC
The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=c5c128b186be0150e2d4885d9dde759fc93b25c9

commit c5c128b186be0150e2d4885d9dde759fc93b25c9
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2023-01-12 05:24:48 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2023-01-12 05:25:23 +0000

    math/py-annoy: New port: Approximate Nearest Neighbors in C++
---
 math/Makefile                                |  1 +
 math/py-annoy/Makefile                       | 37 ++++++++++++++++++++++++++++
 math/py-annoy/distinfo                       |  3 +++
 math/py-annoy/files/patch-setup.py           | 10 ++++++++
 math/py-annoy/files/patch-src_annoymodule.cc | 11 +++++++++
 math/py-annoy/files/test-load.py             |  7 ++++++
 math/py-annoy/files/test-save.py             | 12 +++++++++
 math/py-annoy/pkg-descr                      |  4 +++
 8 files changed, 85 insertions(+)

diff --git a/math/Makefile b/math/Makefile
index 6392ae0c3cdd..b1feac913044 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -867,6 +867,7 @@
     SUBDIR += py-amply
     SUBDIR += py-animatplot
     SUBDIR += py-animatplot-ng
+    SUBDIR += py-annoy
     SUBDIR += py-apgl
     SUBDIR += py-arviz
     SUBDIR += py-arybo
diff --git a/math/py-annoy/Makefile b/math/py-annoy/Makefile
new file mode 100644
index 000000000000..f60de58fde88
--- /dev/null
+++ b/math/py-annoy/Makefile
@@ -0,0 +1,37 @@
+PORTNAME=	annoy
+DISTVERSIONPREFIX=	v
+DISTVERSION=	1.17.1
+CATEGORIES=	math
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Approximate Nearest Neighbors in C++
+WWW=		https://github.com/spotify/annoy
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+TEST_DEPENDS=	${PYTHON_PKGNAMEPREFIX}h5py>0:science/py-h5py@${PY_FLAVOR} \
+		${PYNUMPY}
+
+USES=		python
+USE_PYTHON=	distutils autoplist pytest # tests fail because nose is broken
+
+USE_GITHUB=	yes
+GH_ACCOUNT=	spotify
+
+TEST_ENV=	${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}
+TEST_WRKSRC=	${WRKSRC}/test
+
+post-install:
+	@${STRIP_CMD} ${STAGEDIR}${PYTHON_SITELIBDIR}/annoy/annoylib${PYTHON_EXT_SUFFIX}.so
+
+do-test:
+	cd ${TEST_WRKSRC} && \
+		${ECHO} "saving data" && \
+		${SETENV} ${TEST_ENV} ${PYTHON_CMD} ${FILESDIR}/test-save.py && \
+		${ECHO} "loading data" && \
+		${SETENV} ${TEST_ENV} ${PYTHON_CMD} ${FILESDIR}/test-load.py && \
+		${ECHO} "tests succeeded"
+
+.include <bsd.port.mk>
diff --git a/math/py-annoy/distinfo b/math/py-annoy/distinfo
new file mode 100644
index 000000000000..7820871881b9
--- /dev/null
+++ b/math/py-annoy/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1673498821
+SHA256 (spotify-annoy-v1.17.1_GH0.tar.gz) = 4f7a2f2d86d45b432de68dba06667b23d0ce2b03595d64bd5c05f42dc32e7f4b
+SIZE (spotify-annoy-v1.17.1_GH0.tar.gz) = 674087
diff --git a/math/py-annoy/files/patch-setup.py b/math/py-annoy/files/patch-setup.py
new file mode 100644
index 000000000000..f1bf9c98ae64
--- /dev/null
+++ b/math/py-annoy/files/patch-setup.py
@@ -0,0 +1,10 @@
+--- setup.py.orig	2023-01-12 05:20:26 UTC
++++ setup.py
+@@ -104,6 +104,6 @@ setup(name='annoy',
+           'Programming Language :: Python :: 3.9',
+       ],
+       keywords='nns, approximate nearest neighbor search',
+-      setup_requires=['nose>=1.0'],
++      setup_requires=[],
+       tests_require=['numpy', 'h5py']
+       )
diff --git a/math/py-annoy/files/patch-src_annoymodule.cc b/math/py-annoy/files/patch-src_annoymodule.cc
new file mode 100644
index 000000000000..6c0be8088fde
--- /dev/null
+++ b/math/py-annoy/files/patch-src_annoymodule.cc
@@ -0,0 +1,11 @@
+--- src/annoymodule.cc.orig	2023-01-12 04:57:07 UTC
++++ src/annoymodule.cc
+@@ -179,7 +179,7 @@ py_an_init(py_annoy *self, PyObject *args, PyObject *k
+   int f;
+   static char const * kwlist[] = {"f", "metric", NULL};
+   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|s", (char**)kwlist, &f, &metric))
+-    return (int) NULL;
++    return 0;
+   return 0;
+ }
+ 
diff --git a/math/py-annoy/files/test-load.py b/math/py-annoy/files/test-load.py
new file mode 100644
index 000000000000..3eecbf6bb041
--- /dev/null
+++ b/math/py-annoy/files/test-load.py
@@ -0,0 +1,7 @@
+from annoy import AnnoyIndex
+
+f = 40  # Length of item vector that will be indexed
+
+u = AnnoyIndex(f, 'angular')
+u.load('test.ann') # super fast, will just mmap the file
+print(u.get_nns_by_item(0, 1000)) # will find the 1000 nearest neighbors
diff --git a/math/py-annoy/files/test-save.py b/math/py-annoy/files/test-save.py
new file mode 100644
index 000000000000..21a8f50df0c0
--- /dev/null
+++ b/math/py-annoy/files/test-save.py
@@ -0,0 +1,12 @@
+from annoy import AnnoyIndex
+import random
+
+f = 40  # Length of item vector that will be indexed
+
+t = AnnoyIndex(f, 'angular')
+for i in range(1000):
+    v = [random.gauss(0, 1) for z in range(f)]
+    t.add_item(i, v)
+
+t.build(10) # 10 trees
+t.save('test.ann')
diff --git a/math/py-annoy/pkg-descr b/math/py-annoy/pkg-descr
new file mode 100644
index 000000000000..8d40cf570d67
--- /dev/null
+++ b/math/py-annoy/pkg-descr
@@ -0,0 +1,4 @@
+Annoy (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python
+bindings to search for points in space that are close to a given query point.
+It also creates large read-only file-based data structures that are mmapped
+into memory so that many processes may share the same data.