about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEfraim Flashner2023-09-22 17:33:21 +0300
committerEfraim Flashner2023-09-22 17:33:21 +0300
commit2ceeed2d0a9938327784b26a68f21ff533e9c634 (patch)
tree8845fb4c02868418c93bbd250c5acacc2174a5c0
parent5b36e14b798dbebd0c1aeec818d4b7991dc29839 (diff)
downloadguix-bioinformatics-2ceeed2d0a9938327784b26a68f21ff533e9c634.tar.gz
Add pggb.
-rw-r--r--gn/packages/bioinformatics.scm129
1 files changed, 129 insertions, 0 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index 700dec4..0a131f5 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -48,6 +48,7 @@
   #:use-module (gnu packages fontutils)
   #:use-module (gnu packages gcc)
   #:use-module (gnu packages ghostscript)
+  #:use-module (gnu packages graph)
   #:use-module (gnu packages gtk)
   #:use-module (gnu packages guile)
   #:use-module (gnu packages image)
@@ -61,6 +62,7 @@
   #:use-module (gnu packages mpi)
   #:use-module (gnu packages ncurses)
   #:use-module (gnu packages ocaml)
+  #:use-module (gnu packages parallel)
   #:use-module (gnu packages perl)
   #:use-module (gnu packages pkg-config)
   #:use-module (gnu packages protobuf)
@@ -2483,6 +2485,133 @@ multiple sequence alignment.")
         license:zlib    ; deps/sonLib/externalTools/cutest
         license:boost1.0)))) ; catch.hpp
 
+(define-public pggb
+  (let ((commit "9ebff27320382e470ed38a85b4448402e1e7c353")
+        (revision "1"))
+    (package
+      (name "pggb")
+      (version (git-version "0.5.1" revision commit))
+      (source (origin
+                (method git-fetch)
+                (uri (git-reference
+                       (url "https://github.com/pangenome/pggb")
+                       (commit commit)))
+                (file-name (git-file-name name version))
+                (sha256
+                 (base32 "0rgpj52q3ai7f1saqbilgx5gz4f403x3427wq649qwv84ivmi1sf"))))
+      (build-system copy-build-system)
+      (arguments
+       (list
+         #:install-plan
+         #~'(("pggb" "bin/")
+             ("partition-before-pggb" "bin/")
+             ("scripts/" "bin/")
+             ("scripts" "bin/scripts"))
+         #:phases
+         #~(modify-phases %standard-phases
+             (add-before 'install 'patch-binary-path
+               (lambda* (#:key inputs #:allow-other-keys)
+                 (substitute* "scripts/vcf_preprocess.sh"
+                   (("bcftools ")
+                    (string-append (search-input-file inputs "/bin/bcftools") " ")))
+                 (wrap-script "scripts/net2communities.py"
+                   `("GUIX_PYTHONPATH" ":" prefix
+                     (,(getenv "GUIX_PYTHONPATH"))))))
+             (add-after 'install 'wrap-scripts
+               (lambda* (#:key inputs outputs #:allow-other-keys)
+                 (let ((out (assoc-ref outputs "out")))
+                   (for-each
+                     (lambda (file)
+                       (wrap-script file
+                         `("PATH" ":" prefix
+                           ,(map (lambda (input) (string-append input "/bin"))
+                                 '#$(map (lambda (label) (this-package-input label))
+                                         (list "bcftools"
+                                               "bedtools"
+                                               "gfaffix"
+                                               "fastix"
+                                               "multiqc"
+                                               "mummer"
+                                               "odgi-hwcaps"
+                                               "pafplot"
+                                               "parallel"
+                                               "pigz"
+                                               "r-data-table"
+                                               "rtg-tools"
+                                               "samtools"
+                                               "seqwish"
+                                               "smoothxg"
+                                               ;"tabix"
+                                               "vcfbub"
+                                               "vcflib"
+                                               "vg"
+                                               "wfmash-hwcaps"))))))
+                          (list (string-append out "/bin/pggb")
+                                (string-append out "/bin/partition-before-pggb")
+                                (string-append out "/bin/gfa2evaluation.sh")
+                                (string-append out "/bin/scripts/gfa2evaluation.sh"))))))
+             (add-after 'install 'substitute-file-paths
+               (lambda* (#:key outputs #:allow-other-keys)
+                 (let ((out (assoc-ref outputs "out")))
+                   (substitute* (string-append out "/bin/gfa2evaluation.sh")
+                     (("/usr/local/bin/vcf_preprocess.sh")
+                      (string-append out "/bin/vcf_preprocess.sh"))
+                     (("/usr/local/bin/nucmer2vcf.R")
+                      (string-append out "/bin/nucmer2vcf.R")))))))))
+      (inputs
+       (list bcftools
+             bedtools
+             gfaffix
+             guile-3.0      ; for wrap-script
+             fastix
+             multiqc
+             mummer
+             odgi-hwcaps
+             pafplot
+             parallel
+             pigz
+             python-igraph
+             python-pycairo
+             python-wrapper
+             r-data-table
+             rtg-tools
+             samtools
+             seqwish
+             smoothxg
+             ;tabix
+             vcfbub
+             vcflib
+             vg
+             wfmash-hwcaps))
+      (home-page "https://doi.org/10.1101/2023.04.05.535718")
+      (synopsis "PanGenome Graph Builder")
+      (description "@command{pggb} builds
+@url{https://doi.org/10.1146%2Fannurev-genom-120219-080406, pangenome}
+@url{https://doi.org/10.1038/nbt.4227, variation graphs} from a set of input
+sequences.
+A pangenome variation graph is a kind of generic multiple sequence alignment.
+It lets us understand any kind of sequence variation between a collection of
+genomes.  It shows us similarity where genomes walk through the same parts of
+the graph, and differences where they do not.
+@command{pggb} generates this kind of graph using an all-to-all alignment of
+input sequences (@url{https://github.com/waveygang/wfmash, wfmash}), graph
+induction (@url{https://doi.org/10.1101/2022.02.14.480413, seqwish}), and
+progressive normalization (@url{https://github.com/pangenome/smoothxg,
+smoothxg}, @url{https://github.com/marschall-lab/GFAffix, gfaffix}).  After
+construction, @command{pggb} generates diagnostic visualizations of the graph
+(@url{https://doi.org/10.1093/bioinformatics/btac308, odgi}).  A variant call
+report (in VCF) representing both small and large variants can be generated
+based on any reference genome included in the graph
+(@url{https://github.com/vgteam/vg, vg}).  @command{pggb} writes its output in
+@url{https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md, GFAv1} format,
+which can be used as input by numerous \"genome graph\" and pangenome tools,
+such as the @url{https://github.com/vgteam/vg, vg} and
+@url{https://doi.org/10.1093/bioinformatics/btac308, odgi} toolkits.
+@command{pggb} has been tested at scale in the @acronym{Human Pangenome
+Reference Consortium, HPRC} as a method to build a graph from the
+@url{https://doi.org/10.1101/2022.07.09.499321, draft human pangenome}.")
+      (license license:expat))))
+
 (define-public ucsc-genome-browser
   (package
     (name "ucsc-genome-browser")