about summary refs log tree commit diff
path: root/gn/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gn/packages/bioinformatics.scm')
-rw-r--r--gn/packages/bioinformatics.scm319
1 files changed, 196 insertions, 123 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index 3f89abe..e6ca5db 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -41,8 +41,7 @@
   #:use-module (gnu packages compression)
   #:use-module (gnu packages cpp)
   #:use-module (gnu packages cran)
-  #:use-module (gnu packages crates-io)
-  #:use-module (gnu packages crates-graphics)
+  #:use-module (past-crates packages crates-io)
   #:use-module (gnu packages curl)
   #:use-module (gnu packages databases)
   #:use-module (gnu packages datastructures)
@@ -55,6 +54,7 @@
   #:use-module (gnu packages graph)
   #:use-module (gnu packages gtk)
   #:use-module (gnu packages guile)
+  #:use-module (gnu packages haskell-xyz)
   #:use-module (gnu packages image)
   #:use-module (gnu packages imagemagick)
   #:use-module (gnu packages java)
@@ -74,11 +74,13 @@
   #:use-module (gnu packages python-build)
   #:use-module (gnu packages python-science)
   #:use-module (gnu packages python-web)
-  #:use-module ((gnu packages python-xyz) #:hide (python2-six))
+  #:use-module (gnu packages python-xyz)
+  #:use-module (gnu packages python-check)
   #:use-module (gnu packages rdf)
   #:use-module (gnu packages readline)
   #:use-module (gnu packages rsync)
   #:use-module (gnu packages ruby)
+  #:use-module (gnu packages ruby-xyz)
   #:use-module (gnu packages rust)
   #:use-module (gnu packages serialization)
   #:use-module (gnu packages shells)
@@ -2163,6 +2165,7 @@ suitable for long reads, but works also well with short reads.")
     (build-system cmake-build-system)
     (arguments
      (list
+       #:tests? #f  ;; FIXME: tests are broken
        #:phases
        #~(modify-phases %standard-phases
            (add-after 'unpack 'use-gnuinstalldirs-macros
@@ -2337,14 +2340,14 @@ in-memory footprint at the cost of packing and unpacking.")
 (define-public vg
   (package
     (name "vg")
-    (version "1.61.0")
+    (version "1.65.0")
     (source
       (origin
         (method url-fetch)
         (uri (string-append "https://github.com/vgteam/vg/releases/download/v"
                             version "/vg-v" version ".tar.gz"))
         (sha256
-         (base32 "1yb8ffqq65ma5di42qlj483x42viq5j4xyyg3vpww60gm15n1yxx"))
+         (base32 "022wiz0rs1x90nk9zaxcf8szhma8kzygnbfhciq9166kjysd0pc6"))
         (snippet
          #~(begin
              (use-modules (guix build utils))
@@ -2504,8 +2507,11 @@ in-memory footprint at the cost of packing and unpacking.")
                 (substitute* "Makefile"
                   ((".*test-docs.*") "")))))
           (add-after 'build 'build-manpages
-            (lambda* (#:key inputs #:allow-other-keys)
-              (invoke "make" "man")))
+            (lambda* (#:key make-flags #:allow-other-keys)
+              ;; vg is not in PATH. Replace it with full path.
+              (substitute* "doc/vgmanmd.py"
+                (("'vg'") "'./bin/vg'"))
+              (apply invoke "make" "man" make-flags)))
           (replace 'install
             (lambda* (#:key outputs #:allow-other-keys)
               (let ((out (assoc-ref outputs "out")))
@@ -2525,9 +2531,11 @@ in-memory footprint at the cost of packing and unpacking.")
              bash-tap
              bc
              cmake-minimal
+             ghc-pandoc
              jq
              perl
              pkg-config
+             python
              samtools
              util-linux
              which
@@ -2580,121 +2588,159 @@ multiple sequence alignment.")
         license:zlib    ; deps/sonLib/externalTools/cutest
         license:boost1.0)))) ; catch.hpp
 
+(define-public wfmash-0.14
+  (package
+    (inherit wfmash)
+    (version "0.14.0")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (string-append "https://github.com/waveygang/wfmash/releases/download/v"
+                           version "/wfmash-v" version ".tar.gz"))
+       (sha256
+        (base32
+         "1mk3jschn3hdr45glds65g3hxk7v6nc3plkvxmfbd5pr4kyzlf13"))
+       (snippet
+        #~(begin
+            (use-modules (guix build utils))
+            ;; Unbundle atomic-queue.
+            (delete-file-recursively "src/common/atomic_queue")
+            (substitute* "src/align/include/computeAlignments.hpp"
+              (("\"common/atomic_queue/atomic_queue.h\"")
+               "<atomic_queue/atomic_queue.h>"))
+            ;; Remove compiler optimizations.
+            (substitute* (find-files "." "CMakeLists\\.txt")
+              (("-march=native ") ""))))))
+    (arguments
+     (substitute-keyword-arguments (package-arguments wfmash)
+       ((#:tests? tests? #f) #f)
+       ((#:phases phases #~%standard-phases)
+        #~(modify-phases #$phases
+            (replace 'build-check-prerequisites
+              (lambda _
+                (let ((wfa2-lib #$(string-append "../wfmash-v"
+                                                 version
+                                                 "/src/common/wflign/deps/WFA2-lib")))
+                  (substitute* (string-append wfa2-lib "/Makefile")
+                    ;; Remove architecture-specific flags.
+                    (("-march=x86-64-v3") ""))
+                  (substitute* (string-append wfa2-lib "/tests/wfa.utest.sh")
+                    ;; Fix time command.
+                    (("\\\\time -v") "time"))
+                  ;; Build wfa2-lib.
+                  (invoke "make" "-C" wfa2-lib
+                          #$(string-append "CC=" (cc-for-target))))))))))
+    (inputs
+     (modify-inputs (package-inputs wfmash)
+       (prepend jemalloc)
+       (delete "libdeflate")))))
+
 (define-public pggb
-  (let ((commit "9ebff27320382e470ed38a85b4448402e1e7c353")
-        (revision "1"))
-    (package
-      (name "pggb")
-      (version (git-version "0.5.1" revision commit))
-      (source (origin
-                (method git-fetch)
-                (uri (git-reference
-                       (url "https://github.com/pangenome/pggb")
-                       (commit commit)))
-                (file-name (git-file-name name version))
-                (sha256
-                 (base32 "0rgpj52q3ai7f1saqbilgx5gz4f403x3427wq649qwv84ivmi1sf"))))
-      (build-system copy-build-system)
-      (arguments
-       (list
-         #:install-plan
-         #~'(("pggb" "bin/")
-             ("partition-before-pggb" "bin/")
-             ("scripts/" "bin/")
-             ("scripts" "bin/scripts"))
-         #:phases
-         #~(modify-phases %standard-phases
-             (add-after 'unpack 'force-python3
-               (lambda _
-                 (substitute* (find-files "scripts" "\\.py$")
-                   (("/usr/bin/python") "/usr/bin/python3"))))
-             (add-before 'install 'patch-and-wrap-scripts
-               (lambda* (#:key inputs #:allow-other-keys)
-                 (substitute* "scripts/vcf_preprocess.sh"
-                   (("bcftools ")
-                    (string-append (search-input-file inputs "/bin/bcftools") " ")))
-                 (wrap-script "scripts/net2communities.py"
-                   `("GUIX_PYTHONPATH" ":" prefix
-                     (,(getenv "GUIX_PYTHONPATH"))))))
-             (add-after 'install 'wrap-scripts
-               (lambda* (#:key inputs outputs #:allow-other-keys)
-                 (let ((out (assoc-ref outputs "out")))
-                   (for-each
-                     (lambda (file)
-                       (wrap-script file
-                         `("R_LIBS_SITE" ":" prefix
-                           (,(getenv "R_LIBS_SITE")))
-                         `("PATH" ":" prefix
-                           ,(map (lambda (input) (string-append input "/bin"))
-                                 '#$(map (lambda (label)
-                                           (or (this-package-input (string-append label "-hwcaps"))
-                                               (this-package-input label)))
-                                         (list "bc"
-                                               "bcftools"
-                                               "bedtools"
-                                               "gfaffix"
-                                               "htslib"
-                                               "fastix"
-                                               "multiqc"
-                                               "mummer"
-                                               "odgi"
-                                               "pafplot"
-                                               "parallel"
-                                               "pigz"
-                                               "python"
-                                               "r-data-table"
-                                               "r-minimal"
-                                               "rtg-tools"
-                                               "samtools"
-                                               "seqwish"
-                                               "smoothxg"
-                                               "time"
-                                               "vcfbub"
-                                               "vcflib"
-                                               "vg"
-                                               "wfmash"))))))
-                          (list (string-append out "/bin/pggb")
-                                (string-append out "/bin/partition-before-pggb")
-                                (string-append out "/bin/gfa2evaluation.sh")
-                                (string-append out "/bin/scripts/gfa2evaluation.sh"))))))
-             (add-after 'install 'substitute-file-paths
-               (lambda* (#:key outputs #:allow-other-keys)
-                 (let ((out (assoc-ref outputs "out")))
-                   (substitute* (string-append out "/bin/gfa2evaluation.sh")
-                     (("/usr/local/bin/vcf_preprocess.sh")
-                      (string-append out "/bin/vcf_preprocess.sh"))
-                     (("/usr/local/bin/nucmer2vcf.R")
-                      (string-append out "/bin/nucmer2vcf.R")))))))))
-      (inputs
-       (list bc
-             bcftools
-             bedtools
-             gfaffix
-             guile-3.0      ; for wrap-script
-             htslib         ; tabix
-             fastix
-             multiqc
-             mummer
-             odgi
-             pafplot
-             parallel
-             pigz
-             python
-             python-igraph
-             r-data-table
-             r-minimal
-             rtg-tools
-             samtools
-             seqwish
-             smoothxg
-             time
-             vcfbub
-             vcflib
-             vg
-             wfmash))
-      (home-page "https://doi.org/10.1101/2023.04.05.535718")
-      (synopsis "PanGenome Graph Builder")
-      (description "@command{pggb} builds
+  (package
+    (name "pggb")
+    (version "0.7.2")
+    (source (origin
+              (method git-fetch)
+              (uri (git-reference
+                    (url "https://github.com/pangenome/pggb")
+                    (commit (string-append "v" version))))
+              (file-name (git-file-name name version))
+              (sha256
+               (base32
+                "1k3ffl5h5jqnz9k9y5cjqr8z6hh0gyjfj4szggz11lncp5wkskgv"))))
+    (build-system copy-build-system)
+    (arguments
+     (list
+      #:install-plan
+      #~'(("pggb" "bin/")
+          ("partition-before-pggb" "bin/")
+          ("scripts/" "bin/")
+          ("scripts" "bin/scripts"))
+      #:phases
+      #~(modify-phases %standard-phases
+          (add-after 'unpack 'force-python3
+            (lambda _
+              (substitute* (find-files "scripts" "\\.py$")
+                (("/usr/bin/python") "/usr/bin/python3"))))
+          (add-before 'install 'patch-and-wrap-scripts
+            (lambda* (#:key inputs #:allow-other-keys)
+              (substitute* "scripts/vcf_preprocess.sh"
+                (("bcftools ")
+                 (string-append (search-input-file inputs "/bin/bcftools") " ")))
+              (wrap-script "scripts/net2communities.py"
+                           `("GUIX_PYTHONPATH" ":" prefix
+                             (,(getenv "GUIX_PYTHONPATH"))))))
+          (add-after 'install 'wrap-scripts
+            (lambda* (#:key inputs outputs #:allow-other-keys)
+              (let ((out (assoc-ref outputs "out")))
+                (for-each
+                 (lambda (file)
+                   (wrap-script file
+                                `("R_LIBS_SITE" ":" prefix
+                                  (,(getenv "R_LIBS_SITE")))
+                                `("PATH" ":" prefix
+                                  ,(map (lambda (input) (string-append input "/bin"))
+                                        '#$(map (lambda (label)
+                                                  (or (this-package-input (string-append label "-hwcaps"))
+                                                      (this-package-input label)))
+                                                (list "bc"
+                                                      "bcftools"
+                                                      "bedtools"
+                                                      "gfaffix"
+                                                      "htslib"
+                                                      "multiqc"
+                                                      "odgi"
+                                                      "parallel"
+                                                      "pigz"
+                                                      "python"
+                                                      "r-data-table"
+                                                      "r-minimal"
+                                                      "rtg-tools"
+                                                      "samtools"
+                                                      "seqwish"
+                                                      "smoothxg"
+                                                      "time"
+                                                      "vcfbub"
+                                                      "vg"
+                                                      "wfmash"))))))
+                 (list (string-append out "/bin/pggb")
+                       (string-append out "/bin/partition-before-pggb")
+                       (string-append out "/bin/gfa2evaluation.sh")
+                       (string-append out "/bin/scripts/gfa2evaluation.sh"))))))
+          (add-after 'install 'substitute-file-paths
+            (lambda* (#:key outputs #:allow-other-keys)
+              (let ((out (assoc-ref outputs "out")))
+                (substitute* (string-append out "/bin/gfa2evaluation.sh")
+                  (("/usr/local/bin/vcf_preprocess.sh")
+                   (string-append out "/bin/vcf_preprocess.sh"))
+                  (("/usr/local/bin/nucmer2vcf.R")
+                   (string-append out "/bin/nucmer2vcf.R")))))))))
+    (inputs
+     (list bc
+           bcftools
+           bedtools
+           gfaffix
+           guile-3.0                    ; for wrap-script
+           htslib                       ; tabix
+           multiqc
+           odgi
+           parallel
+           pigz
+           python
+           python-igraph
+           r-data-table
+           r-minimal
+           rtg-tools
+           samtools
+           seqwish
+           smoothxg
+           time
+           vcfbub
+           vg
+           wfmash-0.14))
+    (home-page "https://doi.org/10.1101/2023.04.05.535718")
+    (synopsis "PanGenome Graph Builder")
+    (description "@command{pggb} builds
 @url{https://doi.org/10.1146%2Fannurev-genom-120219-080406, pangenome}
 @url{https://doi.org/10.1038/nbt.4227, variation graphs} from a set of input
 sequences.
@@ -2719,7 +2765,7 @@ such as the @url{https://github.com/vgteam/vg, vg} and
 @command{pggb} has been tested at scale in the @acronym{Human Pangenome
 Reference Consortium, HPRC} as a method to build a graph from the
 @url{https://doi.org/10.1101/2022.07.09.499321, draft human pangenome}.")
-      (license license:expat))))
+    (license license:expat)))
 
 (define use-glibc-hwcaps
   (package-input-rewriting/spec
@@ -4445,7 +4491,7 @@ automatically vectorize for different architectures without adapting the code.")
 
 (define-public r-rrbgen
   (package
-    (name "r-stitch")
+    (name "r-rrbgen")
     (version "0.0.6")
     (source
      (origin
@@ -4554,6 +4600,33 @@ interface to the basic htslib.  It can be easily included in a C++
 program for scripting high-performance genomic analyses.")
     (license license:asl2.0)))
 
+(define-public r-genio
+  (package
+    (name "r-genio")
+    (version "1.1.2")
+    (source
+     (origin
+       (method url-fetch)
+       (uri (cran-uri "genio" version))
+       (sha256
+        (base32 "0izx8yv8mvnfxdqnqpnp2ldw1hzs6ggxi7jgmjlgxkgmm4vngbgl"))))
+    (properties `((upstream-name . "genio")))
+    (build-system r-build-system)
+    (propagated-inputs (list r-dplyr r-rcpp r-readr r-tibble))
+    (native-inputs (list r-knitr r-testthat))
+    (home-page "https://github.com/OchoaLab/genio")
+    (synopsis "Genetics Input/Output Functions")
+    (description "@code{r-genio} implements readers and writers for file formats
+associated with genetics data.  Reading and writing Plink BED/BIM/FAM
+and GCTA binary GRM formats is fully supported, including a
+lightning-fast BED reader and writer implementations.  Other functions
+are readr wrappers that are more constrained, user-friendly, and
+efficient for these particular applications; handles Plink and
+Eigenstrat tables (FAM, BIM, IND, and SNP files).  There are also make
+functions for FAM and BIM tables with default values to go with
+simulated genotype data.")
+    (license license:gpl3)))
+
 (define-public r-stitch
   (package
     (name "r-stitch")