git: 04d04b8dafba - main - biology/seqwish: New port: Assembly By Short Sequences: parallel, paired-end sequence assembler

From: Yuri Victorovich <yuri_at_FreeBSD.org>
Date: Fri, 21 Jul 2023 21:39:01 UTC
The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=04d04b8dafba8cd1a9875fe8f027c1e310d8ed5f

commit 04d04b8dafba8cd1a9875fe8f027c1e310d8ed5f
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2023-07-21 21:38:22 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2023-07-21 21:38:59 +0000

    biology/seqwish: New port: Assembly By Short Sequences: parallel, paired-end sequence assembler
---
 biology/Makefile                             |  1 +
 biology/seqwish/Makefile                     | 25 ++++++++++++++++++++++
 biology/seqwish/distinfo                     |  3 +++
 biology/seqwish/files/patch-CMakeLists.txt   | 14 +++++++++++++
 biology/seqwish/files/patch-src_main.cpp     | 24 +++++++++++++++++++++
 biology/seqwish/files/patch-src_tempfile.cpp | 31 ++++++++++++++++++++++++++++
 biology/seqwish/pkg-descr                    | 10 +++++++++
 7 files changed, 108 insertions(+)

diff --git a/biology/Makefile b/biology/Makefile
index e41e9edcb828..4f426e0a00ca 100644
--- a/biology/Makefile
+++ b/biology/Makefile
@@ -214,6 +214,7 @@
     SUBDIR += seqio
     SUBDIR += seqkit
     SUBDIR += seqtk
+    SUBDIR += seqwish
     SUBDIR += sigviewer
     SUBDIR += sim4
     SUBDIR += slclust
diff --git a/biology/seqwish/Makefile b/biology/seqwish/Makefile
new file mode 100644
index 000000000000..a34c6f9e1370
--- /dev/null
+++ b/biology/seqwish/Makefile
@@ -0,0 +1,25 @@
+PORTNAME=	seqwish
+DISTVERSIONPREFIX=	v
+DISTVERSION=	0.7.9
+CATEGORIES=	biology
+MASTER_SITES=	https://github.com/ekg/seqwish/releases/download/v${DISTVERSION}/
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Assembly By Short Sequences: parallel, paired-end sequence assembler
+WWW=		https://www.bcgsc.ca/resources/software/abyss
+
+LICENSE=	MIT
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+USES=		cmake:noninja
+
+PLIST_FILES=	bin/${PORTNAME}
+
+OPTIONS_DEFINE=		OPENMP
+OPTIONS_DEFAULT=	OPENMP
+
+OPENMP_CMAKE_BOOL=	USE_OPENMP
+OPENMP_CXXFLAGS=	-fopenmp
+OPENMP_BROKEN_OFF=	https://github.com/ekg/seqwish/issues/114
+
+.include <bsd.port.mk>
diff --git a/biology/seqwish/distinfo b/biology/seqwish/distinfo
new file mode 100644
index 000000000000..5753ca0fb6de
--- /dev/null
+++ b/biology/seqwish/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1689972439
+SHA256 (seqwish-v0.7.9.tar.gz) = 1ca7ebf02a35ad147c8a2b81f087aaba2deb878fec7acb4deb9f14e932e89d36
+SIZE (seqwish-v0.7.9.tar.gz) = 12116251
diff --git a/biology/seqwish/files/patch-CMakeLists.txt b/biology/seqwish/files/patch-CMakeLists.txt
new file mode 100644
index 000000000000..3c6ca1d2757f
--- /dev/null
+++ b/biology/seqwish/files/patch-CMakeLists.txt
@@ -0,0 +1,14 @@
+--- CMakeLists.txt.orig	2023-04-19 15:25:32 UTC
++++ CMakeLists.txt
+@@ -256,9 +256,9 @@ target_link_libraries(seqwish
+   "${sdsl-lite_LIB}/libsdsl.a"
+   "${sdsl-lite-divsufsort_LIB}/libdivsufsort.a"
+   "${sdsl-lite-divsufsort_LIB}/libdivsufsort64.a"
+-  "-latomic"
++  #"-latomic"
+   Threads::Threads
+-  jemalloc
++  #jemalloc
+   z)
+ if (BUILD_STATIC)
+   #set(CMAKE_EXE_LINKER_FLAGS "-static")
diff --git a/biology/seqwish/files/patch-src_main.cpp b/biology/seqwish/files/patch-src_main.cpp
new file mode 100644
index 000000000000..73ff8da2acdb
--- /dev/null
+++ b/biology/seqwish/files/patch-src_main.cpp
@@ -0,0 +1,24 @@
+--- src/main.cpp.orig	2023-07-21 20:57:07 UTC
++++ src/main.cpp
+@@ -24,6 +24,8 @@
+ #include "version.hpp"
+ #include "tempfile.hpp"
+ 
++#include <sys/param.h>
++
+ using namespace seqwish;
+ 
+ int main(int argc, char** argv) {
+@@ -116,9 +118,10 @@ int main(int argc, char** argv) {
+     if (tmp_base) {
+         temp_file::set_dir(args::get(tmp_base));
+     } else {
+-        char* cwd = get_current_dir_name();
++        char cwd[MAXPATHLEN];
++	getwd(cwd);
+         temp_file::set_dir(std::string(cwd));
+-        free(cwd);
++        //free(cwd);
+     }
+ 
+     temp_file::set_keep_temp(args::get(keep_temp_files));
diff --git a/biology/seqwish/files/patch-src_tempfile.cpp b/biology/seqwish/files/patch-src_tempfile.cpp
new file mode 100644
index 000000000000..7f6c4699400d
--- /dev/null
+++ b/biology/seqwish/files/patch-src_tempfile.cpp
@@ -0,0 +1,31 @@
+--- src/tempfile.cpp.orig	2023-07-21 20:48:46 UTC
++++ src/tempfile.cpp
+@@ -3,6 +3,8 @@
+ #include <unistd.h>
+ #include "tempfile.hpp"
+ 
++#include <sys/param.h>
++
+ namespace temp_file {
+ 
+     // We use this to make the API thread-safe
+@@ -96,9 +98,10 @@ namespace temp_file {
+ 
+         // Get the default temp dir from environment variables.
+         if (temp_dir.empty()) {
+-            char* cwd = get_current_dir_name();
++            char cwd[MAXPATHLEN];
++            getwd(cwd);
+             temp_dir = std::string(cwd);
+-            free(cwd);
++            //free(cwd);
+             /*const char *system_temp_dir = nullptr;
+             for (const char *var_name : {"TMPDIR", "TMP", "TEMP", "TEMPDIR", "USERPROFILE"}) {
+                 if (system_temp_dir == nullptr) {
+@@ -114,4 +117,4 @@ namespace temp_file {
+     void set_keep_temp(bool setting) {
+         keep_temp = setting;
+     }
+-}
+\ No newline at end of file
++}
diff --git a/biology/seqwish/pkg-descr b/biology/seqwish/pkg-descr
new file mode 100644
index 000000000000..2267dba52b9f
--- /dev/null
+++ b/biology/seqwish/pkg-descr
@@ -0,0 +1,10 @@
+seqwish implements a lossless conversion from pairwise alignments between
+sequences to a variation graph encoding the sequences and their alignments.
+As input we typically take all-versus-all alignments, but the exact structure
+of the alignment set may be defined in an application specific way. This
+algorithm uses a series of disk-backed sorts and passes over the alignment and
+sequence inputs to allow the graph to be constructed from very large inputs
+that are commonly encountered when working with large numbers of noisy input
+sequences. Memory usage during construction and traversal is limited by the use
+of sorted disk-backed arrays and succinct rank/select dictionaries to record a
+queryable version of the graph.