about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEfraim Flashner2022-11-16 14:56:36 +0200
committerEfraim Flashner2022-11-16 14:57:29 +0200
commitcc8ee17a6e63cf20adc9374735a1322dd9c7f88e (patch)
treedd52eb357089ae4c94b66687f716d5bb0bdf6b4a
parentdd9aab67b68cad7f2fa2c15626d73b1e25198dee (diff)
downloadguix-bioinformatics-cc8ee17a6e63cf20adc9374735a1322dd9c7f88e.tar.gz
gn: Add graph-genotyper and dependent packages
-rw-r--r--gn/packages/bioinformatics.scm133
-rw-r--r--gn/packages/crates-io.scm135
2 files changed, 267 insertions, 1 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index 8f98b8e..76b93d9 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -512,6 +512,34 @@ reads.")
 collapses them into a non-redundant graph structure.")
     (license license:expat)))
 
+(define-public gafpack
+  (let ((commit "ad31875b6914d964c6fd72d1bf334f0843538fb6")     ; November 10, 2022
+        (revision "1"))
+    (package
+      (name "gafpack")
+      (version (git-version "0.0.0" revision commit))
+      (source
+        (origin
+          (method git-fetch)
+          (uri (git-reference
+                 (url "https://github.com/ekg/gafpack")
+                 (commit commit)))
+          (file-name (git-file-name name version))
+          (sha256
+           (base32 "0di2psh0ls7jlbnqs7k71p55f73pn23a09k1h3ril7gwjcrzr3rk"))))
+      (build-system cargo-build-system)
+      (arguments
+       `(#:install-source? #f
+         #:cargo-inputs
+         (("rust-clap" ,rust-clap-4)
+          ("rust-gfa" ,rust-gfa-0.10))))
+      (home-page "https://github.com/ekg/gafpack")
+      (synopsis "Convert variation graph alignments to coverage maps over nodes")
+      (description
+       "Gafpack converts alignments to pangenome variation graphs to coverage
+maps useful in haplotype-based genotyping.")
+      (license license:expat))))
+
 (define-public agc-for-pgr-tk
   (let ((commit "453c0afdc54b4aa00fa8e97a63f196931fdb81c4") ; April 26, 2022
         (revision "1"))
@@ -728,6 +756,61 @@ performance.)")
                "file:///LICENSE"
                "CC-BY-NC-SA 4.0"))))
 
+(define-public graph-genotyper
+  (let ((commit "e7cc6b43a5b1f389d76bf9aac7f2ee02f92caeaf") ; October 17, 2022
+        (revision "13"))
+    (package
+      (name "graph-genotyper")
+      (version (git-version "0.0.0" revision commit))
+      (source (origin
+        (method git-fetch)
+        (uri (git-reference
+               (url "https://github.com/davidebolo1993/graph_genotyper")
+               (commit commit)))
+        (file-name (git-file-name name version))
+        (sha256
+         (base32 "1l8yjpkqamiqr1q5i7vr5z04aba7skpbcwyc9dx5fiklvljjfhcx"))))
+      (build-system copy-build-system)
+      (arguments
+       `(#:install-plan
+         '(("genotype.py" "bin/")
+           ("genotype.sh" "bin/"))
+         #:phases
+         (modify-phases %standard-phases
+           (add-after 'install 'wrap-genotype
+             (lambda* (#:key inputs outputs #:allow-other-keys)
+               (let ((out (assoc-ref outputs "out")))
+                 (wrap-script (string-append out "/bin/genotype.sh")
+                  `("GUIX_PYTHONPATH" ":" prefix (,(getenv "GUIX_PYTHONPATH")))
+                  `("PATH" ":" prefix
+                    ,(map (lambda (file-name)
+                            (string-append (assoc-ref inputs file-name) "/bin"))
+                          (list "gafpack"
+                                "odgi"
+                                "python"
+                                "samtools"
+                                "vg"))))))))))
+      (inputs
+       (list gafpack
+             guile-3.0
+             odgi
+             python
+             python-numpy
+             python-pandas
+             python-scipy
+             samtools
+             vg))
+      (home-page "https://bitbucket.org/jana_ebler")
+      (synopsis "Genotyping based on k-mers and pangenome graphs")
+      (description
+       "This package provides a genotyper for various types of genetic variants
+(such as SNPs, indels and structural variants).  Genotypes are computed based on
+read k-mer counts and a panel of known haplotypes.  A description of the method
+can be found @url{https://www.biorxiv.org/content/10.1101/2020.11.11.378133v1,
+here}.")
+      (license (license:non-copyleft
+                 "No license listed")))))
+
 (define-public pangenie
   (let ((commit "e779076827022d1416ab9fabf99a03d8f4725956") ; September 2, 2021 from phasing-tests branch
         (revision "2"))
@@ -1811,6 +1894,56 @@ available to other researchers.")
                  (delete-file "scanpy/tests/test_pca.py")
                  #t)))))))))
 
+;; TODO: Unbundle everything
+(define-public odgi
+  (package
+    (name "odgi")
+    (version "0.8.1")
+    (source (origin
+              (method url-fetch)
+              (uri (string-append "https://github.com/pangenome/odgi/releases"
+                                  "/download/v" version
+                                  "/odgi-v" version ".tar.gz"))
+              (sha256
+               (base32 "175083pb9hp0vn9a00hbxlayyk5a5j8p52yq5qfmbnfvndisbmbv"))
+              (snippet
+               #~(begin
+                   (use-modules (guix build utils))
+                   (substitute* "CMakeLists.txt"
+                     (("-march=native") "")
+                     (("-msse4\\.2") ""))
+                   (delete-file-recursively "deps/pybind11")
+                   (delete-file-recursively "deps/sdsl-lite")))))
+    (build-system cmake-build-system)
+    (native-inputs
+     (list pkg-config))
+    (inputs
+     (list jemalloc
+           libdivsufsort
+           pybind11
+           python
+           sdsl-lite))
+    (home-page "https://github.com/vgteam/odgi")
+    (synopsis "Optimized Dynamic Genome/Graph Implementation")
+    (description "@acronym{Optimized Dynamic Genome/Graph Implementation, odgi}
+provides an efficient and succinct dynamic DNA sequence graph model, as well as
+a host of algorithms that allow the use of such graphs in bioinformatic
+analyses.
+
+Careful encoding of graph entities allows odgi to efficiently compute and
+transform pangenomes with minimal overheads.  @command{odgi} implements a
+dynamic data structure that leveraged multi-core CPUs and can be updated on the
+fly.
+
+The edges and path steps are recorded as deltas between the current node id and
+the target node id, where the node id corresponds to the rank in the global
+array of nodes.  Graphs built from biological data sets tend to have local
+partial order and, when sorted, the deltas be small.  This allows them to be
+compressed with a variable length integer representation, resulting in a small
+in-memory footprint at the cost of packing and unpacking.")
+    (properties '((tunable? . #t)))
+    (license license:expat)))
+
 (define-public vg
   (package
     (name "vg")
diff --git a/gn/packages/crates-io.scm b/gn/packages/crates-io.scm
index 847b2ef..8b7bc4c 100644
--- a/gn/packages/crates-io.scm
+++ b/gn/packages/crates-io.scm
@@ -7,7 +7,8 @@
   #:use-module (guix build-system cargo)
   #:use-module (gnu packages crates-graphics)
   #:use-module (gnu packages crates-io)
-  #:use-module (gnu packages maths))
+  #:use-module (gnu packages maths)
+  #:use-module (gnu packages python))
 
 (define-public rust-bgzip-0.2
   (package
@@ -65,6 +66,25 @@ for massive key sets}.  It generates an @acronym{MPHF, minimal perfect hash
 functions} for a collection of hashable objects.")
     (license license:expat)))
 
+(define-public rust-clap-lex-0.3
+  (package
+    (name "rust-clap-lex")
+    (version "0.3.0")
+    (source (origin
+              (method url-fetch)
+              (uri (crate-uri "clap-lex" version))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "1a4dzbnlxiamfsn0pnkhn7n9bdfjh66j9fxm6mmr7d227vvrhh8d"))))
+    (build-system cargo-build-system)
+    (arguments
+     `(#:cargo-inputs (("rust-os-str-bytes" ,rust-os-str-bytes-6))))
+    (home-page "https://github.com/clap-rs/clap/tree/master/clap_lex")
+    (synopsis "Minimal, flexible command line parser")
+    (description "Minimal, flexible command line parser")
+    (license (list license:expat license:asl2.0))))
+
 (define-public rust-cuckoofilter-0.5
   (package
     (name "rust-cuckoofilter")
@@ -352,6 +372,51 @@ Python code from a Rust binary is also supported.")
     (description "This package provides succinct data structures for Rust.")
     (license (list license:expat license:asl2.0))))
 
+(define-public rust-terminal-size-0.2
+  (package
+    (name "rust-terminal-size")
+    (version "0.2.2")
+    (source (origin
+              (method url-fetch)
+              (uri (crate-uri "terminal-size" version))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "0yhza8sc6jkka6j0nq5sl749ckx1jagvxp3b38yhh4px6k291jj0"))))
+    (build-system cargo-build-system)
+    (arguments
+     `(#:skip-build? #t
+       ;#:cargo-inputs
+       ;(("rust-rustix" ,rust-rustix-0.35)
+       ; ("rust-windows-sys" ,rust-windows-sys-0.42))
+       ))
+    (home-page "https://github.com/eminence/terminal-size")
+    (synopsis "Gets the size of your Linux or Windows terminal")
+    (description "Gets the size of your Linux or Windows terminal")
+    (license (list license:expat license:asl2.0))))
+
+(define-public rust-unic-emoji-char-0.9
+  (package
+    (name "rust-unic-emoji-char")
+    (version "0.9.0")
+    (source (origin
+              (method url-fetch)
+              (uri (crate-uri "unic-emoji-char" version))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "0ka9fr7s6lv0z43r9xphg9injn35pfxf9g9q18ki0wl9d0g241qb"))))
+    (build-system cargo-build-system)
+    (arguments
+     `(#:cargo-inputs
+       (("rust-unic-char-property" ,rust-unic-char-property-0.9)
+        ("rust-unic-char-range" ,rust-unic-char-range-0.9)
+        ("rust-unic-ucd-version" ,rust-unic-ucd-version-0.9))))
+    (home-page "https://github.com/open-i18n/rust-unic/")
+    (synopsis "UNIC â Unicode Emoji â Emoji Character Properties")
+    (description "UNIC â Unicode Emoji â Emoji Character Properties")
+    (license (list license:expat license:asl2.0))))
+
 (define-public rust-clap-for-jrep
   (package
     (name "rust-clap")
@@ -499,6 +564,48 @@ or any combination.")
       Argument Parser")
     (license (list license:expat license:asl2.0))))
 
+(define-public rust-clap-4
+  (package
+    (name "rust-clap")
+    (version "4.0.9")
+    (source (origin
+              (method url-fetch)
+              (uri (crate-uri "clap" version))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "1w0dxqzrh449s9l2k8g66pdsff02599bwi5mh0gny3227kcpsq1h"))))
+    (build-system cargo-build-system)
+    (arguments
+     `(#:cargo-inputs
+       (("rust-atty" ,rust-atty-0.2)
+        ("rust-backtrace" ,rust-backtrace-0.3)
+        ("rust-bitflags" ,rust-bitflags-1)
+        ("rust-clap-derive" ,rust-clap-derive-4)
+        ("rust-clap-lex" ,rust-clap-lex-0.3)
+        ("rust-once-cell" ,rust-once-cell-1)
+        ("rust-strsim" ,rust-strsim-0.10)
+        ("rust-termcolor" ,rust-termcolor-1)
+        ("rust-terminal-size" ,rust-terminal-size-0.2)
+        ("rust-unicase" ,rust-unicase-2)
+        ("rust-unicode-width" ,rust-unicode-width-0.1))
+       #:cargo-development-inputs
+       (("rust-humantime" ,rust-humantime-2)
+        ("rust-rustversion" ,rust-rustversion-1)
+        ("rust-shlex" ,rust-shlex-1)
+        ("rust-snapbox" ,rust-snapbox-0.4)
+        ("rust-static-assertions" ,rust-static-assertions-1)
+        ("rust-trybuild" ,rust-trybuild-1)
+        ("rust-trycmd" ,rust-trycmd-0.13)
+        ("rust-unic-emoji-char" ,rust-unic-emoji-char-0.9))))
+    (home-page "https://github.com/clap-rs/clap")
+    (synopsis
+      "A simple to use, efficient, and full-featured Command Line Argument Parser")
+    (description
+      "This package provides a simple to use, efficient, and full-featured Command Line
+      Argument Parser")
+    (license (list license:expat license:asl2.0))))
+
 ;; ready to upstream, WITH rust-clap-derive
 ;; replace fields with those from upstream.
 (define-public rust-clap-derive-3.1
@@ -527,6 +634,32 @@ or any combination.")
       "Parse command line argument by defining a struct, derive crate.")
     (license (list license:expat license:asl2.0))))
 
+(define-public rust-clap-derive-4
+  (package
+    (name "rust-clap-derive")
+    (version "4.0.9")
+    (source (origin
+              (method url-fetch)
+              (uri (crate-uri "clap-derive" version))
+              (file-name (string-append name "-" version ".tar.gz"))
+              (sha256
+               (base32
+                "02zhbbmyz3dpy9ml6xfp7i8p3ffj1djvkdnkg6gr6d0s5r4hg8x4"))))
+    (build-system cargo-build-system)
+    (arguments
+     `(#:cargo-inputs
+       (("rust-heck" ,rust-heck-0.4)
+        ("rust-proc-macro-error" ,rust-proc-macro-error-1)
+        ("rust-proc-macro2" ,rust-proc-macro2-1)
+        ("rust-quote" ,rust-quote-1)
+        ("rust-syn" ,rust-syn-1))))
+    (home-page "https://github.com/clap-rs/clap/tree/master/clap_derive")
+    (synopsis
+      "Parse command line argument by defining a struct, derive crate.")
+    (description
+      "Parse command line argument by defining a struct, derive crate.")
+    (license (list license:expat license:asl2.0))))
+
 (define-public rust-textwrap-0.15
   (package
     (name "rust-textwrap")