diff options
Diffstat (limited to 'gn/packages/bioinformatics.scm')
| -rw-r--r-- | gn/packages/bioinformatics.scm | 392 |
1 files changed, 269 insertions, 123 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 3f89abe..fb2ee18 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -41,8 +41,7 @@ #:use-module (gnu packages compression) #:use-module (gnu packages cpp) #:use-module (gnu packages cran) - #:use-module (gnu packages crates-io) - #:use-module (gnu packages crates-graphics) + #:use-module (past-crates packages crates-io) #:use-module (gnu packages curl) #:use-module (gnu packages databases) #:use-module (gnu packages datastructures) @@ -55,6 +54,7 @@ #:use-module (gnu packages graph) #:use-module (gnu packages gtk) #:use-module (gnu packages guile) + #:use-module (gnu packages haskell-xyz) #:use-module (gnu packages image) #:use-module (gnu packages imagemagick) #:use-module (gnu packages java) @@ -72,13 +72,16 @@ #:use-module (gnu packages protobuf) #:use-module (gnu packages python) #:use-module (gnu packages python-build) + #:use-module (gnu packages python-compression) #:use-module (gnu packages python-science) #:use-module (gnu packages python-web) - #:use-module ((gnu packages python-xyz) #:hide (python2-six)) + #:use-module (gnu packages python-xyz) + #:use-module (gnu packages python-check) #:use-module (gnu packages rdf) #:use-module (gnu packages readline) #:use-module (gnu packages rsync) #:use-module (gnu packages ruby) + #:use-module (gnu packages ruby-xyz) #:use-module (gnu packages rust) #:use-module (gnu packages serialization) #:use-module (gnu packages shells) @@ -2163,6 +2166,7 @@ suitable for long reads, but works also well with short reads.") (build-system cmake-build-system) (arguments (list + #:tests? #f ;; FIXME: tests are broken #:phases #~(modify-phases %standard-phases (add-after 'unpack 'use-gnuinstalldirs-macros @@ -2337,14 +2341,14 @@ in-memory footprint at the cost of packing and unpacking.") (define-public vg (package (name "vg") - (version "1.61.0") + (version "1.65.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/vgteam/vg/releases/download/v" version "/vg-v" version ".tar.gz")) (sha256 - (base32 "1yb8ffqq65ma5di42qlj483x42viq5j4xyyg3vpww60gm15n1yxx")) + (base32 "022wiz0rs1x90nk9zaxcf8szhma8kzygnbfhciq9166kjysd0pc6")) (snippet #~(begin (use-modules (guix build utils)) @@ -2504,8 +2508,11 @@ in-memory footprint at the cost of packing and unpacking.") (substitute* "Makefile" ((".*test-docs.*") ""))))) (add-after 'build 'build-manpages - (lambda* (#:key inputs #:allow-other-keys) - (invoke "make" "man"))) + (lambda* (#:key make-flags #:allow-other-keys) + ;; vg is not in PATH. Replace it with full path. + (substitute* "doc/vgmanmd.py" + (("'vg'") "'./bin/vg'")) + (apply invoke "make" "man" make-flags))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) @@ -2525,9 +2532,11 @@ in-memory footprint at the cost of packing and unpacking.") bash-tap bc cmake-minimal + ghc-pandoc jq perl pkg-config + python samtools util-linux which @@ -2580,121 +2589,159 @@ multiple sequence alignment.") license:zlib ; deps/sonLib/externalTools/cutest license:boost1.0)))) ; catch.hpp +(define-public wfmash-0.14 + (package + (inherit wfmash) + (version "0.14.0") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/waveygang/wfmash/releases/download/v" + version "/wfmash-v" version ".tar.gz")) + (sha256 + (base32 + "1mk3jschn3hdr45glds65g3hxk7v6nc3plkvxmfbd5pr4kyzlf13")) + (snippet + #~(begin + (use-modules (guix build utils)) + ;; Unbundle atomic-queue. + (delete-file-recursively "src/common/atomic_queue") + (substitute* "src/align/include/computeAlignments.hpp" + (("\"common/atomic_queue/atomic_queue.h\"") + "<atomic_queue/atomic_queue.h>")) + ;; Remove compiler optimizations. + (substitute* (find-files "." "CMakeLists\\.txt") + (("-march=native ") "")))))) + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:tests? tests? #f) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (replace 'build-check-prerequisites + (lambda _ + (let ((wfa2-lib #$(string-append "../wfmash-v" + version + "/src/common/wflign/deps/WFA2-lib"))) + (substitute* (string-append wfa2-lib "/Makefile") + ;; Remove architecture-specific flags. + (("-march=x86-64-v3") "")) + (substitute* (string-append wfa2-lib "/tests/wfa.utest.sh") + ;; Fix time command. + (("\\\\time -v") "time")) + ;; Build wfa2-lib. + (invoke "make" "-C" wfa2-lib + #$(string-append "CC=" (cc-for-target)))))))))) + (inputs + (modify-inputs (package-inputs wfmash) + (prepend jemalloc) + (delete "libdeflate"))))) + (define-public pggb - (let ((commit "9ebff27320382e470ed38a85b4448402e1e7c353") - (revision "1")) - (package - (name "pggb") - (version (git-version "0.5.1" revision commit)) - (source (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/pangenome/pggb") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 "0rgpj52q3ai7f1saqbilgx5gz4f403x3427wq649qwv84ivmi1sf")))) - (build-system copy-build-system) - (arguments - (list - #:install-plan - #~'(("pggb" "bin/") - ("partition-before-pggb" "bin/") - ("scripts/" "bin/") - ("scripts" "bin/scripts")) - #:phases - #~(modify-phases %standard-phases - (add-after 'unpack 'force-python3 - (lambda _ - (substitute* (find-files "scripts" "\\.py$") - (("/usr/bin/python") "/usr/bin/python3")))) - (add-before 'install 'patch-and-wrap-scripts - (lambda* (#:key inputs #:allow-other-keys) - (substitute* "scripts/vcf_preprocess.sh" - (("bcftools ") - (string-append (search-input-file inputs "/bin/bcftools") " "))) - (wrap-script "scripts/net2communities.py" - `("GUIX_PYTHONPATH" ":" prefix - (,(getenv "GUIX_PYTHONPATH")))))) - (add-after 'install 'wrap-scripts - (lambda* (#:key inputs outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (for-each - (lambda (file) - (wrap-script file - `("R_LIBS_SITE" ":" prefix - (,(getenv "R_LIBS_SITE"))) - `("PATH" ":" prefix - ,(map (lambda (input) (string-append input "/bin")) - '#$(map (lambda (label) - (or (this-package-input (string-append label "-hwcaps")) - (this-package-input label))) - (list "bc" - "bcftools" - "bedtools" - "gfaffix" - "htslib" - "fastix" - "multiqc" - "mummer" - "odgi" - "pafplot" - "parallel" - "pigz" - "python" - "r-data-table" - "r-minimal" - "rtg-tools" - "samtools" - "seqwish" - "smoothxg" - "time" - "vcfbub" - "vcflib" - "vg" - "wfmash")))))) - (list (string-append out "/bin/pggb") - (string-append out "/bin/partition-before-pggb") - (string-append out "/bin/gfa2evaluation.sh") - (string-append out "/bin/scripts/gfa2evaluation.sh")))))) - (add-after 'install 'substitute-file-paths - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (substitute* (string-append out "/bin/gfa2evaluation.sh") - (("/usr/local/bin/vcf_preprocess.sh") - (string-append out "/bin/vcf_preprocess.sh")) - (("/usr/local/bin/nucmer2vcf.R") - (string-append out "/bin/nucmer2vcf.R"))))))))) - (inputs - (list bc - bcftools - bedtools - gfaffix - guile-3.0 ; for wrap-script - htslib ; tabix - fastix - multiqc - mummer - odgi - pafplot - parallel - pigz - python - python-igraph - r-data-table - r-minimal - rtg-tools - samtools - seqwish - smoothxg - time - vcfbub - vcflib - vg - wfmash)) - (home-page "https://doi.org/10.1101/2023.04.05.535718") - (synopsis "PanGenome Graph Builder") - (description "@command{pggb} builds + (package + (name "pggb") + (version "0.7.2") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pangenome/pggb") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1k3ffl5h5jqnz9k9y5cjqr8z6hh0gyjfj4szggz11lncp5wkskgv")))) + (build-system copy-build-system) + (arguments + (list + #:install-plan + #~'(("pggb" "bin/") + ("partition-before-pggb" "bin/") + ("scripts/" "bin/") + ("scripts" "bin/scripts")) + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'force-python3 + (lambda _ + (substitute* (find-files "scripts" "\\.py$") + (("/usr/bin/python") "/usr/bin/python3")))) + (add-before 'install 'patch-and-wrap-scripts + (lambda* (#:key inputs #:allow-other-keys) + (substitute* "scripts/vcf_preprocess.sh" + (("bcftools ") + (string-append (search-input-file inputs "/bin/bcftools") " "))) + (wrap-script "scripts/net2communities.py" + `("GUIX_PYTHONPATH" ":" prefix + (,(getenv "GUIX_PYTHONPATH")))))) + (add-after 'install 'wrap-scripts + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (for-each + (lambda (file) + (wrap-script file + `("R_LIBS_SITE" ":" prefix + (,(getenv "R_LIBS_SITE"))) + `("PATH" ":" prefix + ,(map (lambda (input) (string-append input "/bin")) + '#$(map (lambda (label) + (or (this-package-input (string-append label "-hwcaps")) + (this-package-input label))) + (list "bc" + "bcftools" + "bedtools" + "gfaffix" + "htslib" + "multiqc" + "odgi" + "parallel" + "pigz" + "python" + "r-data-table" + "r-minimal" + "rtg-tools" + "samtools" + "seqwish" + "smoothxg" + "time" + "vcfbub" + "vg" + "wfmash")))))) + (list (string-append out "/bin/pggb") + (string-append out "/bin/partition-before-pggb") + (string-append out "/bin/gfa2evaluation.sh") + (string-append out "/bin/scripts/gfa2evaluation.sh")))))) + (add-after 'install 'substitute-file-paths + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (substitute* (string-append out "/bin/gfa2evaluation.sh") + (("/usr/local/bin/vcf_preprocess.sh") + (string-append out "/bin/vcf_preprocess.sh")) + (("/usr/local/bin/nucmer2vcf.R") + (string-append out "/bin/nucmer2vcf.R"))))))))) + (inputs + (list bc + bcftools + bedtools + gfaffix + guile-3.0 ; for wrap-script + htslib ; tabix + multiqc + odgi + parallel + pigz + python + python-igraph + r-data-table + r-minimal + rtg-tools + samtools + seqwish + smoothxg + time + vcfbub + vg + wfmash-0.14)) + (home-page "https://doi.org/10.1101/2023.04.05.535718") + (synopsis "PanGenome Graph Builder") + (description "@command{pggb} builds @url{https://doi.org/10.1146%2Fannurev-genom-120219-080406, pangenome} @url{https://doi.org/10.1038/nbt.4227, variation graphs} from a set of input sequences. @@ -2719,7 +2766,7 @@ such as the @url{https://github.com/vgteam/vg, vg} and @command{pggb} has been tested at scale in the @acronym{Human Pangenome Reference Consortium, HPRC} as a method to build a graph from the @url{https://doi.org/10.1101/2022.07.09.499321, draft human pangenome}.") - (license license:expat)))) + (license license:expat))) (define use-glibc-hwcaps (package-input-rewriting/spec @@ -2934,6 +2981,76 @@ to the user.") "http://www.kentinformatics.com/index.html" "Free for universities and non-profit institutions."))))) +(define htslib-1.14 + (package/inherit htslib + (version "1.14") + (source (origin + (method url-fetch) + (uri (string-append + "https://github.com/samtools/htslib/releases/download/" + version "/htslib-" version ".tar.bz2")) + (sha256 + (base32 + "0pwk8yhhvb85mi1d2qhwsb4samc3rmbcrq7b1s0jz0glaa7in8pd")))) + (arguments + (substitute-keyword-arguments (package-arguments htslib) + ((#:configure-flags cf #~'()) + #~(delete "--with-external-htscodecs" #$cf)))) + (propagated-inputs + (modify-inputs (package-propagated-inputs htslib) + (delete "htscodecs"))))) + +(define-public pbbam + (package + (name "pbbam") + (version "2.1.0") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/PacificBiosciences/pbbam") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1iyazi3l7dswpfxh39k5j7ydi0ywja0579xz3r6l9kkwz2n1z6dc")))) + (build-system meson-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'patch-tests + (lambda* (#:key inputs #:allow-other-keys) + ;; Disable this test. I tried fixing it by including + ;; optional_io.hpp, but there's a type error. + (substitute* "tests/src/meson.build" + (("'test_ReadGroupInfo.cpp',") "")) + #; + (substitute* "include/pbbam/ReadGroupInfo.h" + (("#include <boost/optional.hpp>" m) + (string-append m "\n#include <boost/optional/optional_io.hpp>"))) + (substitute* '("tests/scripts/cram/_test.py" + "tests/scripts/cram/_main.py") + (("'/bin/sh'") + (string-append "'" (which "sh") "'")))))))) + ;; These libraries are listed as "Required" in the pkg-config file. + (propagated-inputs + (list htslib-1.14 pbcopper zlib)) + (inputs + (list boost samtools)) + (native-inputs + (list googletest + pkg-config + python-wrapper)) ;for tests + (home-page "https://github.com/PacificBiosciences/pbbam") + (synopsis "Work with PacBio BAM files") + (description + "The pbbam software package provides components to create, query, and +edit PacBio BAM files and associated indices. These components include a core +C++ library, bindings for additional languages, and command-line utilities. +This library is not intended to be used as a general-purpose BAM utility - all +input and output BAMs must adhere to the PacBio BAM format specification. +Non-PacBio BAMs will cause exceptions to be thrown.") + (license license:bsd-3))) + (define-public bam2fastx (package (name "bam2fastx") @@ -4445,7 +4562,7 @@ automatically vectorize for different architectures without adapting the code.") (define-public r-rrbgen (package - (name "r-stitch") + (name "r-rrbgen") (version "0.0.6") (source (origin @@ -4456,6 +4573,8 @@ automatically vectorize for different architectures without adapting the code.") (base32 "1vhqy8licl2pkzar4aag0q5fhnb3fdch8acyjh9445ia42z01z9c")))) (build-system r-build-system) + (arguments + (list #:tests? #f)) ;; tests fail and seem to require data (propagated-inputs (list r-rcpp r-rcpparmadillo)) @@ -4554,6 +4673,33 @@ interface to the basic htslib. It can be easily included in a C++ program for scripting high-performance genomic analyses.") (license license:asl2.0))) +(define-public r-genio + (package + (name "r-genio") + (version "1.1.2") + (source + (origin + (method url-fetch) + (uri (cran-uri "genio" version)) + (sha256 + (base32 "0izx8yv8mvnfxdqnqpnp2ldw1hzs6ggxi7jgmjlgxkgmm4vngbgl")))) + (properties `((upstream-name . "genio"))) + (build-system r-build-system) + (propagated-inputs (list r-dplyr r-rcpp r-readr r-tibble)) + (native-inputs (list r-knitr r-testthat)) + (home-page "https://github.com/OchoaLab/genio") + (synopsis "Genetics Input/Output Functions") + (description "@code{r-genio} implements readers and writers for file formats +associated with genetics data. Reading and writing Plink BED/BIM/FAM +and GCTA binary GRM formats is fully supported, including a +lightning-fast BED reader and writer implementations. Other functions +are readr wrappers that are more constrained, user-friendly, and +efficient for these particular applications; handles Plink and +Eigenstrat tables (FAM, BIM, IND, and SNP files). There are also make +functions for FAM and BIM tables with default values to go with +simulated genotype data.") + (license license:gpl3))) + (define-public r-stitch (package (name "r-stitch") |
