aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEfraim Flashner2023-09-21 09:07:37 +0300
committerEfraim Flashner2023-09-21 10:30:43 +0300
commitfa03e6f84c0ff8e1c168568fd33316c170014251 (patch)
treea793914cec12b2e477e44bf09097ac429e56194a
parenta599c69673da59fc129ceefffb73f8958f3d82f0 (diff)
downloadguix-bioinformatics-fa03e6f84c0ff8e1c168568fd33316c170014251.tar.gz
Add smoothxg
-rw-r--r--gn/packages/bioinformatics.scm67
1 files changed, 67 insertions, 0 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index b0865f3..8a4b278 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -1660,6 +1660,73 @@ limited by the use of sorted disk-backed arrays and succinct rank/select
dictionaries to record a queryable version of the graph.")
(license license:expat)))
+(define-public smoothxg
+ (package
+ (name "smoothxg")
+ (version "0.7.2")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "https://github.com/pangenome/smoothxg"
+ "/releases/download/v" version
+ "/smoothxg-v" version ".tar.gz"))
+ (sha256
+ (base32 "1px8b5aaa23z85i7ximdamk2jj7wk5hb7bpbrgxsvkxc69zlwy38"))
+ (snippet
+ #~(begin
+ (use-modules (guix build utils))
+ (substitute* (find-files "." "CMakeLists.txt")
+ (("spoa_optimize_for_native ON")
+ "spoa_optimize_for_native OFF")
+ (("-msse4\\.2") "")
+ (("-march=native") ""))))))
+ (build-system cmake-build-system)
+ (arguments
+ (list
+ #:make-flags
+ #~(list (string-append "CC = " #$(cc-for-target)))
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-before 'build 'build-abPOA
+ (lambda* (#:key make-flags #:allow-other-keys)
+ ;; This helps with portability to other architectures.
+ (with-directory-excursion
+ (string-append "../smoothxg-v" #$version "/deps/abPOA")
+ (substitute* "Makefile"
+ (("-march=native") ""))
+ (apply invoke "make" "libabpoa" make-flags)))))))
+ (inputs
+ (list jemalloc
+ openmpi
+ pybind11
+ python
+ zlib
+ (list zstd "lib")))
+ (native-inputs
+ (list pkg-config))
+ (home-page "https://github.com/ekg/smoothxg")
+ (synopsis
+ "Linearize and simplify variation graphs using blocked partial order alignment")
+ (description "Pangenome graphs built from raw sets of alignments may have
+complex local structures generated by common patterns of genome variation.
+These local nonlinearities can introduce difficulty in downstream analyses,
+visualization, and interpretation of variation graphs.
+
+@command{smoothxg} finds blocks of paths that are collinear within a variation
+graph. It applies partial order alignment to each block, yielding an acyclic
+variation graph. Then, to yield a smoothed graph, it walks the original paths
+to lace these subgraphs together. The resulting graph only contains cyclic or
+inverting structures larger than the chosen block size, and is otherwise
+manifold linear. In addition to providing a linear structure to the graph,
+smoothxg can be used to extract the consensus pangenome graph by applying the
+heaviest bundle algorithm to each chain.
+
+To find blocks, smoothxg applies a greedy algorithm that assumes that the graph
+nodes are sorted according to their occurence in the graph's embedded paths.
+The path-guided stochastic gradient descent based 1D sort implemented in
+@command{odgi sort -Y} is designed to provide this kind of sort.")
+ (properties `((tunable? . #t)))
+ (license license:expat)))
+
;; TODO: Unbundle BBHash, parallel-hashmap, zstr
(define-public graphaligner
(package