From cc8ee17a6e63cf20adc9374735a1322dd9c7f88e Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Wed, 16 Nov 2022 14:56:36 +0200 Subject: gn: Add graph-genotyper and dependent packages --- gn/packages/bioinformatics.scm | 133 ++++++++++++++++++++++++++++++++++++++++ gn/packages/crates-io.scm | 135 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 267 insertions(+), 1 deletion(-) diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 8f98b8e..76b93d9 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -512,6 +512,34 @@ reads.") collapses them into a non-redundant graph structure.") (license license:expat))) +(define-public gafpack + (let ((commit "ad31875b6914d964c6fd72d1bf334f0843538fb6") ; November 10, 2022 + (revision "1")) + (package + (name "gafpack") + (version (git-version "0.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/ekg/gafpack") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0di2psh0ls7jlbnqs7k71p55f73pn23a09k1h3ril7gwjcrzr3rk")))) + (build-system cargo-build-system) + (arguments + `(#:install-source? #f + #:cargo-inputs + (("rust-clap" ,rust-clap-4) + ("rust-gfa" ,rust-gfa-0.10)))) + (home-page "https://github.com/ekg/gafpack") + (synopsis "Convert variation graph alignments to coverage maps over nodes") + (description + "Gafpack converts alignments to pangenome variation graphs to coverage +maps useful in haplotype-based genotyping.") + (license license:expat)))) + (define-public agc-for-pgr-tk (let ((commit "453c0afdc54b4aa00fa8e97a63f196931fdb81c4") ; April 26, 2022 (revision "1")) @@ -728,6 +756,61 @@ performance.)") "file:///LICENSE" "CC-BY-NC-SA 4.0")))) +(define-public graph-genotyper + (let ((commit "e7cc6b43a5b1f389d76bf9aac7f2ee02f92caeaf") ; October 17, 2022 + (revision "13")) + (package + (name "graph-genotyper") + (version (git-version "0.0.0" revision commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/davidebolo1993/graph_genotyper") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "1l8yjpkqamiqr1q5i7vr5z04aba7skpbcwyc9dx5fiklvljjfhcx")))) + (build-system copy-build-system) + (arguments + `(#:install-plan + '(("genotype.py" "bin/") + ("genotype.sh" "bin/")) + #:phases + (modify-phases %standard-phases + (add-after 'install 'wrap-genotype + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (wrap-script (string-append out "/bin/genotype.sh") + `("GUIX_PYTHONPATH" ":" prefix (,(getenv "GUIX_PYTHONPATH"))) + `("PATH" ":" prefix + ,(map (lambda (file-name) + (string-append (assoc-ref inputs file-name) "/bin")) + (list "gafpack" + "odgi" + "python" + "samtools" + "vg")))))))))) + (inputs + (list gafpack + guile-3.0 + odgi + python + python-numpy + python-pandas + python-scipy + samtools + vg)) + (home-page "https://bitbucket.org/jana_ebler") + (synopsis "Genotyping based on k-mers and pangenome graphs") + (description + "This package provides a genotyper for various types of genetic variants +(such as SNPs, indels and structural variants). Genotypes are computed based on +read k-mer counts and a panel of known haplotypes. A description of the method +can be found @url{https://www.biorxiv.org/content/10.1101/2020.11.11.378133v1, +here}.") + (license (license:non-copyleft + "No license listed"))))) + (define-public pangenie (let ((commit "e779076827022d1416ab9fabf99a03d8f4725956") ; September 2, 2021 from phasing-tests branch (revision "2")) @@ -1811,6 +1894,56 @@ available to other researchers.") (delete-file "scanpy/tests/test_pca.py") #t))))))))) +;; TODO: Unbundle everything +(define-public odgi + (package + (name "odgi") + (version "0.8.1") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/pangenome/odgi/releases" + "/download/v" version + "/odgi-v" version ".tar.gz")) + (sha256 + (base32 "175083pb9hp0vn9a00hbxlayyk5a5j8p52yq5qfmbnfvndisbmbv")) + (snippet + #~(begin + (use-modules (guix build utils)) + (substitute* "CMakeLists.txt" + (("-march=native") "") + (("-msse4\\.2") "")) + (delete-file-recursively "deps/pybind11") + (delete-file-recursively "deps/sdsl-lite"))))) + (build-system cmake-build-system) + (native-inputs + (list pkg-config)) + (inputs + (list jemalloc + libdivsufsort + pybind11 + python + sdsl-lite)) + (home-page "https://github.com/vgteam/odgi") + (synopsis "Optimized Dynamic Genome/Graph Implementation") + (description "@acronym{Optimized Dynamic Genome/Graph Implementation, odgi} +provides an efficient and succinct dynamic DNA sequence graph model, as well as +a host of algorithms that allow the use of such graphs in bioinformatic +analyses. + +Careful encoding of graph entities allows odgi to efficiently compute and +transform pangenomes with minimal overheads. @command{odgi} implements a +dynamic data structure that leveraged multi-core CPUs and can be updated on the +fly. + +The edges and path steps are recorded as deltas between the current node id and +the target node id, where the node id corresponds to the rank in the global +array of nodes. Graphs built from biological data sets tend to have local +partial order and, when sorted, the deltas be small. This allows them to be +compressed with a variable length integer representation, resulting in a small +in-memory footprint at the cost of packing and unpacking.") + (properties '((tunable? . #t))) + (license license:expat))) + (define-public vg (package (name "vg") diff --git a/gn/packages/crates-io.scm b/gn/packages/crates-io.scm index 847b2ef..8b7bc4c 100644 --- a/gn/packages/crates-io.scm +++ b/gn/packages/crates-io.scm @@ -7,7 +7,8 @@ #:use-module (guix build-system cargo) #:use-module (gnu packages crates-graphics) #:use-module (gnu packages crates-io) - #:use-module (gnu packages maths)) + #:use-module (gnu packages maths) + #:use-module (gnu packages python)) (define-public rust-bgzip-0.2 (package @@ -65,6 +66,25 @@ for massive key sets}. It generates an @acronym{MPHF, minimal perfect hash functions} for a collection of hashable objects.") (license license:expat))) +(define-public rust-clap-lex-0.3 + (package + (name "rust-clap-lex") + (version "0.3.0") + (source (origin + (method url-fetch) + (uri (crate-uri "clap-lex" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1a4dzbnlxiamfsn0pnkhn7n9bdfjh66j9fxm6mmr7d227vvrhh8d")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs (("rust-os-str-bytes" ,rust-os-str-bytes-6)))) + (home-page "https://github.com/clap-rs/clap/tree/master/clap_lex") + (synopsis "Minimal, flexible command line parser") + (description "Minimal, flexible command line parser") + (license (list license:expat license:asl2.0)))) + (define-public rust-cuckoofilter-0.5 (package (name "rust-cuckoofilter") @@ -352,6 +372,51 @@ Python code from a Rust binary is also supported.") (description "This package provides succinct data structures for Rust.") (license (list license:expat license:asl2.0)))) +(define-public rust-terminal-size-0.2 + (package + (name "rust-terminal-size") + (version "0.2.2") + (source (origin + (method url-fetch) + (uri (crate-uri "terminal-size" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "0yhza8sc6jkka6j0nq5sl749ckx1jagvxp3b38yhh4px6k291jj0")))) + (build-system cargo-build-system) + (arguments + `(#:skip-build? #t + ;#:cargo-inputs + ;(("rust-rustix" ,rust-rustix-0.35) + ; ("rust-windows-sys" ,rust-windows-sys-0.42)) + )) + (home-page "https://github.com/eminence/terminal-size") + (synopsis "Gets the size of your Linux or Windows terminal") + (description "Gets the size of your Linux or Windows terminal") + (license (list license:expat license:asl2.0)))) + +(define-public rust-unic-emoji-char-0.9 + (package + (name "rust-unic-emoji-char") + (version "0.9.0") + (source (origin + (method url-fetch) + (uri (crate-uri "unic-emoji-char" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "0ka9fr7s6lv0z43r9xphg9injn35pfxf9g9q18ki0wl9d0g241qb")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-unic-char-property" ,rust-unic-char-property-0.9) + ("rust-unic-char-range" ,rust-unic-char-range-0.9) + ("rust-unic-ucd-version" ,rust-unic-ucd-version-0.9)))) + (home-page "https://github.com/open-i18n/rust-unic/") + (synopsis "UNIC â Unicode Emoji â Emoji Character Properties") + (description "UNIC â Unicode Emoji â Emoji Character Properties") + (license (list license:expat license:asl2.0)))) + (define-public rust-clap-for-jrep (package (name "rust-clap") @@ -499,6 +564,48 @@ or any combination.") Argument Parser") (license (list license:expat license:asl2.0)))) +(define-public rust-clap-4 + (package + (name "rust-clap") + (version "4.0.9") + (source (origin + (method url-fetch) + (uri (crate-uri "clap" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1w0dxqzrh449s9l2k8g66pdsff02599bwi5mh0gny3227kcpsq1h")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-atty" ,rust-atty-0.2) + ("rust-backtrace" ,rust-backtrace-0.3) + ("rust-bitflags" ,rust-bitflags-1) + ("rust-clap-derive" ,rust-clap-derive-4) + ("rust-clap-lex" ,rust-clap-lex-0.3) + ("rust-once-cell" ,rust-once-cell-1) + ("rust-strsim" ,rust-strsim-0.10) + ("rust-termcolor" ,rust-termcolor-1) + ("rust-terminal-size" ,rust-terminal-size-0.2) + ("rust-unicase" ,rust-unicase-2) + ("rust-unicode-width" ,rust-unicode-width-0.1)) + #:cargo-development-inputs + (("rust-humantime" ,rust-humantime-2) + ("rust-rustversion" ,rust-rustversion-1) + ("rust-shlex" ,rust-shlex-1) + ("rust-snapbox" ,rust-snapbox-0.4) + ("rust-static-assertions" ,rust-static-assertions-1) + ("rust-trybuild" ,rust-trybuild-1) + ("rust-trycmd" ,rust-trycmd-0.13) + ("rust-unic-emoji-char" ,rust-unic-emoji-char-0.9)))) + (home-page "https://github.com/clap-rs/clap") + (synopsis + "A simple to use, efficient, and full-featured Command Line Argument Parser") + (description + "This package provides a simple to use, efficient, and full-featured Command Line + Argument Parser") + (license (list license:expat license:asl2.0)))) + ;; ready to upstream, WITH rust-clap-derive ;; replace fields with those from upstream. (define-public rust-clap-derive-3.1 @@ -527,6 +634,32 @@ or any combination.") "Parse command line argument by defining a struct, derive crate.") (license (list license:expat license:asl2.0)))) +(define-public rust-clap-derive-4 + (package + (name "rust-clap-derive") + (version "4.0.9") + (source (origin + (method url-fetch) + (uri (crate-uri "clap-derive" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "02zhbbmyz3dpy9ml6xfp7i8p3ffj1djvkdnkg6gr6d0s5r4hg8x4")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-heck" ,rust-heck-0.4) + ("rust-proc-macro-error" ,rust-proc-macro-error-1) + ("rust-proc-macro2" ,rust-proc-macro2-1) + ("rust-quote" ,rust-quote-1) + ("rust-syn" ,rust-syn-1)))) + (home-page "https://github.com/clap-rs/clap/tree/master/clap_derive") + (synopsis + "Parse command line argument by defining a struct, derive crate.") + (description + "Parse command line argument by defining a struct, derive crate.") + (license (list license:expat license:asl2.0)))) + (define-public rust-textwrap-0.15 (package (name "rust-textwrap") -- cgit v1.2.3