about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEfraim Flashner2022-03-21 17:55:12 +0200
committerEfraim Flashner2022-03-21 17:55:45 +0200
commitac474ea342f344e83ec2ee3381d21bed1e96134b (patch)
treeb131b5c4d0d13b9fd7adb21105032a0416744ab2
parente82292fcfcab5dc09da0f8af3c7def5e16a2bbd2 (diff)
downloadguix-bioinformatics-ac474ea342f344e83ec2ee3381d21bed1e96134b.tar.gz
gn: wfmash: update to 0.7.0-26.81b8292.
-rw-r--r--VERSION2
-rw-r--r--gn/packages/riscv.scm226
2 files changed, 136 insertions, 92 deletions
diff --git a/VERSION b/VERSION
index 8be7524..868c742 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3e34b01b70fe4b919757c6f6d02a27c870d440c9
+61b4b6aea29b24f4a1c6af7d8f4e1c416ab41c86
diff --git a/gn/packages/riscv.scm b/gn/packages/riscv.scm
index 2e6da2c..90de616 100644
--- a/gn/packages/riscv.scm
+++ b/gn/packages/riscv.scm
@@ -10,95 +10,53 @@
 ;;;
 
 (define-module (gn packages riscv)
-  #:use-module (gnu packages autotools)
-  #:use-module ((gnu packages bioinformatics) #:prefix guix:)
-  #:use-module (gnu packages compression)
-  #:use-module (gnu packages jemalloc)
-  #:use-module ((gnu packages maths) #:prefix guix:)
-  #:use-module (guix build-system cmake)
-  #:use-module (guix build-system gnu)
+  #:use-module (guix utils)
+  #:use-module (guix packages)
   #:use-module (guix git-download)
+  #:use-module (guix build-system cmake)
   #:use-module ((guix licenses) #:prefix license:)
-  #:use-module (guix packages)
-  #:use-module (guix utils))
+  #:use-module (gnu packages)
+  #:use-module (gnu packages bioinformatics)
+  #:use-module (gnu packages cpp)
+  #:use-module (gnu packages compression)
+  #:use-module (gnu packages jemalloc)
+  #:use-module (gnu packages maths))
 
-(define-public htslib
-  (package
-    (inherit guix:htslib)
-    (name "htslib")
-    (inputs
-     `(("bzip2" ,bzip2)
-       ("xz" ,xz)
-       ,@(package-inputs guix:htslib)))))
 
-(define-public gsl
-  (package
-    (inherit guix:gsl)
-    (name "gsl")
-    (arguments
-     (substitute-keyword-arguments (package-arguments guix:gsl)
-       ((#:configure-flags _) `(list))
-       ((#:phases phases '%standard-phases)
-        `(modify-phases ,phases
-           (add-after 'unpack 'force-bootstrap
-             (lambda _
-               (delete-file "configure")))))))
-    (native-inputs
-     `(("autoconf" ,autoconf)
-       ("automake" ,automake)
-       ("libtool" ,libtool)))))
-
-(define-public atomic-queue
-  (package
-    (name "atomic-queue")
-    (version "1.0")
-    (source (origin
-              (method git-fetch)
-              (uri (git-reference
-                    (url "https://github.com/max0x7ba/atomic_queue")
-                    (commit (string-append "v" version))))
-              (file-name (git-file-name name version))
-              (sha256
-               (base32
-                "0ssff73wlvrsk2nma99dmvm0ijyzfr54jk37kxgpb694r7ajc90l"))))
-    (build-system gnu-build-system)
-    (arguments
-     `(#:tests? #f
-       #:phases
-       (modify-phases %standard-phases
-         (delete 'configure)
-         (delete 'build)
-         (replace 'install
-           (lambda* (#:key outputs #:allow-other-keys)
-             (copy-recursively "include/atomic_queue"
-                               (string-append (assoc-ref outputs "out")
-                                              "/include/atomic_queue")))))))
-    (home-page "https://github.com/max0x7ba/atomic_queue")
-    (synopsis "C++ lockless queue")
-    (description "@code{atomic-queue} provides C++14
-multiple-producer-multiple-consumer lockless queues based on a circular buffer
-with std::atomic.  The maximum queue size must be set at compile time. And,
-there are no OS-blocking push/pop functions, thus making it suitable for
-ultra-low latency applications.")
-    (license license:expat)))
+;; Improvements to riscv support have been merged since the last release.
+(define-public atomic-queue-git
+  (let ((commit "7d75e9ed0359650224b29cdf6728c5fe0a19fffb")     ; 2022-03-11
+        (revision "1"))
+    (package
+      (inherit atomic-queue)
+      (name "atomic-queue")
+      (version (git-version "1.0" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                      (url "https://github.com/max0x7ba/atomic_queue")
+                      (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32
+                  "1dh8x0ikfwk0by5avwfv9gvr9ay6jy13yr66rvgw9wwyxmklz848")))))))
 
 (define-public wfmash
   (let ((version "0.7.0")
-        (commit "50a68f0d8c372e720d73e7fc9d90a0d0a4e54ef8")
-        (package-revision "25"))
+        (commit "81b8292479648058c6986da808afba0eadcce8d0")
+        (package-revision "26"))
     (package
       (name "wfmash")
-      (version (string-append version "+" (string-take commit 7) "-" package-revision))
+      (version (git-version version package-revision commit))
       (source (origin
                 (method git-fetch)
                 (uri (git-reference
                       (url "https://github.com/ekg/wfmash.git")
-                      (commit commit)
-                      (recursive? #f)))
+                      (commit commit)))
                 (file-name (git-file-name name version))
                 (sha256
                  (base32
-                  "1xf742sxn7xvrcmn67zk9rv8zxl9nb9f72hbw8khdlz1qj3n00vp"))
+                  "0nfmbnmlk2ji5f651dkv0jl1h3d1lp2npldwhdiyylp96z3yz8zb"))
                 (modules '((guix build utils)))
                 (snippet '(begin
                             (delete-file-recursively "src/common/atomic_queue")
@@ -107,34 +65,118 @@ ultra-low latency applications.")
                                "<atomic_queue/atomic_queue.h>"))))))
       (build-system cmake-build-system)
       (arguments
-       `(#:configure-flags '("-DBUILD_SHARED_LIBS=OFF")
-         #:phases
-         (modify-phases
-             %standard-phases
+       `(#:phases
+         (modify-phases %standard-phases
            (add-after 'unpack 'remove-x86-specific-compile-flags
              (lambda _
-               (substitute* (list "CMakeLists.txt"
-                                  "src/common/wflign/CMakeLists.txt"
-                                  "src/common/wflign/deps/WFAv2/CMakeLists.txt"
-                                  "src/common/wflign/deps/wflambdav2/CMakeLists.txt")
-                 (("-mcx16") ""))
-               (substitute* (list "CMakeLists.txt"
-                                  "src/common/wflign/CMakeLists.txt"
-                                  "src/common/wflign/deps/WFAv2/CMakeLists.txt"
-                                  "src/common/wflign/deps/wflambdav2/CMakeLists.txt")
+               (substitute* (find-files "." "CMakeLists\\.txt")
+                 (("-mcx16") "")
                  (("-march=native") ""))
                (substitute* "src/common/dset64.hpp"
-                 (("#error \"wfmash can only be built on an x86_64 machine \\(64-bit Intel/AMD\\)\"")
-                  ""))))
+                 (("__x86_64__" all) (string-append all " && " all)))))
            ;; This stashes our build version in the executable
            (add-after 'unpack 'set-version
              (lambda _
                (mkdir "include")
                (with-output-to-file "include/wfmash_git_version.hpp"
                  (lambda ()
-                   (format #t "#define WFMASH_GIT_VERSION \"~a\"~%" version)))
-               #t))
-           (delete 'check))
+                   (format #t "#define WFMASH_GIT_VERSION \"~a\"~%" version)))))
+           (replace 'check
+             ;; Adapted from .github/workflows/test_on_push.yml
+             (lambda* (#:key tests? #:allow-other-keys)
+               (when tests?
+                 (and
+                   ;; This test takes 60 minutes on riscv64-linux.
+                   ,@(if (not (target-riscv64?))
+                       `((begin
+                           ;; Test with a subset of the LPA dataset (PAF output)
+                           (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
+                           (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
+                           (with-output-to-file "LPA.subset.paf"
+                             (lambda _
+                               (invoke "bin/wfmash"
+                                       "../source/data/LPA.subset.fa.gz"
+                                       "../source/data/LPA.subset.fa.gz"
+                                       "-X" "-n" "10" "-T" "wflign_info.")))
+                           (invoke "head" "LPA.subset.paf")))
+                       '())
+                   ;; This test takes about 5 hours on riscv64-linux.
+                   ,@(if (not (target-riscv64?))
+                       `((begin
+                           ;; Test with a subset of the LPA dataset (SAM output)
+                           (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
+                           (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
+                           (with-output-to-file "LPA.subset.sam"
+                             (lambda _
+                               (invoke "bin/wfmash"
+                                       "../source/data/LPA.subset.fa.gz"
+                                       "../source/data/LPA.subset.fa.gz"
+                                       "-X" "-N" "-a" "-T" "wflign_info.")))
+                           (with-output-to-file "LPA.subset.sam-view"
+                             (lambda _
+                               (invoke "samtools" "view" "LPA.subset.sam" "-bS")))
+                           (with-output-to-file "LPA.subset.bam"
+                             (lambda _
+                               (invoke "samtools" "sort" "LPA.subset.sam-view")))
+                           (invoke "samtools" "index" "LPA.subset.bam")
+                           ;; samtools view LPA.subset.bam | head | cut -f 1-9
+                           ;(invoke "samtools" "view" "LPA.subset.bam")
+                           ;; There should be an easier way to do this with pipes.
+                           (with-output-to-file "LPA.subset.bam-incr1"
+                             (lambda _
+                               (invoke "samtools" "view" "LPA.subset.bam")))
+                           (with-output-to-file "LPA.subset.bam-incr2"
+                             (lambda _
+                               (invoke "head" "LPA.subset.bam-incr1")))
+                           (invoke "cut" "-f" "1-9" "LPA.subset.bam-incr2")))
+                       '())
+                   ;; This test takes 60 minutes on riscv64-linux.
+                   ,@(if (not (target-riscv64?))
+                       `((begin
+                           ;; Test with a subset of the LPA dataset,
+                           ;; setting a lower identity threshold (PAF output)
+                           (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
+                           (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
+                           (with-output-to-file "LPA.subset.p90.paf"
+                             (lambda _
+                               (invoke "bin/wfmash"
+                                       "../source/data/LPA.subset.fa.gz"
+                                       "../source/data/LPA.subset.fa.gz"
+                                       "-X" "-p" "90" "-n" "10" "-T" "wflign_info.")))
+                           (invoke "head" "LPA.subset.p90.paf")))
+                       '())
+                   (begin
+                     ;; Test aligning short reads (500 bps) to a reference (SAM output)
+                     (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
+                     (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
+                     (with-output-to-file "reads.500bps.sam"
+                       (lambda _
+                         (invoke "bin/wfmash"
+                                 "../source/data/reference.fa.gz"
+                                 "../source/data/reads.500bps.fa.gz"
+                                 "-s" "0.5k" "-N" "-a")))
+                     (with-output-to-file "reads.500bps.sam-view"
+                       (lambda _
+                         (invoke "samtools" "view" "reads.500bps.sam" "-bS")))
+                     (with-output-to-file "reads.500bps.bam"
+                       (lambda _
+                         (invoke "samtools" "sort" "reads.500bps.sam-view")))
+                     (invoke "samtools" "index" "reads.500bps.bam")
+                     (with-output-to-file "reads.500bps.bam-view"
+                       (lambda _
+                         (invoke "samtools" "view" "reads.500bps.bam")))
+                     (invoke "head" "reads.500bps.bam-view"))
+                   (begin
+                     ;; Test with few very short reads (255bps) (PAF output)
+                     (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
+                     (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
+                     (with-output-to-file "reads.255bps.paf"
+                       (lambda _
+                         (invoke "bin/wfmash"
+                                 "../source/data/reads.255bps.fa.gz"
+                                 "../source/data/reads.255bps.fa.gz"
+                                 "-X")))
+                     (invoke "head" "reads.255bps.paf")))))))
          #:make-flags (list (string-append "CC=" ,(cc-for-target))
                             (string-append "CXX=" ,(cxx-for-target)))))
       (inputs (list atomic-queue
@@ -142,6 +184,8 @@ ultra-low latency applications.")
                     htslib
                     jemalloc
                     zlib))
+      (native-inputs
+       (list samtools))
       (synopsis "base-accurate DNA sequence alignments using WFA and mashmap2")
       (description "wfmash is a fork of MashMap that implements
 base-level alignment using the wavefront alignment algorithm WFA. It