about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn/packages/bioinformatics.scm67
1 files changed, 67 insertions, 0 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index b0865f3..8a4b278 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -1660,6 +1660,73 @@ limited by the use of sorted disk-backed arrays and succinct rank/select
 dictionaries to record a queryable version of the graph.")
     (license license:expat)))
 
+(define-public smoothxg
+  (package
+    (name "smoothxg")
+    (version "0.7.2")
+    (source (origin
+             (method url-fetch)
+             (uri (string-append "https://github.com/pangenome/smoothxg"
+                                 "/releases/download/v" version
+                                 "/smoothxg-v" version ".tar.gz"))
+             (sha256
+              (base32 "1px8b5aaa23z85i7ximdamk2jj7wk5hb7bpbrgxsvkxc69zlwy38"))
+             (snippet
+              #~(begin
+                  (use-modules (guix build utils))
+                  (substitute* (find-files "." "CMakeLists.txt")
+                    (("spoa_optimize_for_native ON")
+                     "spoa_optimize_for_native OFF")
+                    (("-msse4\\.2") "")
+                    (("-march=native") ""))))))
+    (build-system cmake-build-system)
+    (arguments
+     (list
+       #:make-flags
+       #~(list (string-append "CC = " #$(cc-for-target)))
+       #:phases
+       #~(modify-phases %standard-phases
+           (add-before 'build 'build-abPOA
+             (lambda* (#:key make-flags #:allow-other-keys)
+               ;; This helps with portability to other architectures.
+               (with-directory-excursion
+                 (string-append "../smoothxg-v" #$version "/deps/abPOA")
+                 (substitute* "Makefile"
+                   (("-march=native") ""))
+                 (apply invoke "make" "libabpoa" make-flags)))))))
+    (inputs
+     (list jemalloc
+           openmpi
+           pybind11
+           python
+           zlib
+           (list zstd "lib")))
+    (native-inputs
+     (list pkg-config))
+    (home-page "https://github.com/ekg/smoothxg")
+    (synopsis
+     "Linearize and simplify variation graphs using blocked partial order alignment")
+    (description "Pangenome graphs built from raw sets of alignments may have
+complex local structures generated by common patterns of genome variation.
+These local nonlinearities can introduce difficulty in downstream analyses,
+visualization, and interpretation of variation graphs.
+
+@command{smoothxg} finds blocks of paths that are collinear within a variation
+graph.  It applies partial order alignment to each block, yielding an acyclic
+variation graph.  Then, to yield a smoothed graph, it walks the original paths
+to lace these subgraphs together.  The resulting graph only contains cyclic or
+inverting structures larger than the chosen block size, and is otherwise
+manifold linear.  In addition to providing a linear structure to the graph,
+smoothxg can be used to extract the consensus pangenome graph by applying the
+heaviest bundle algorithm to each chain.
+
+To find blocks, smoothxg applies a greedy algorithm that assumes that the graph
+nodes are sorted according to their occurence in the graph's embedded paths.
+The path-guided stochastic gradient descent based 1D sort implemented in
+@command{odgi sort -Y} is designed to provide this kind of sort.")
+    (properties `((tunable? . #t)))
+    (license license:expat)))
+
 ;; TODO: Unbundle BBHash, parallel-hashmap, zstr
 (define-public graphaligner
   (package