From fa03e6f84c0ff8e1c168568fd33316c170014251 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 21 Sep 2023 09:07:37 +0300 Subject: Add smoothxg --- gn/packages/bioinformatics.scm | 67 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index b0865f3..8a4b278 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1660,6 +1660,73 @@ limited by the use of sorted disk-backed arrays and succinct rank/select dictionaries to record a queryable version of the graph.") (license license:expat))) +(define-public smoothxg + (package + (name "smoothxg") + (version "0.7.2") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/pangenome/smoothxg" + "/releases/download/v" version + "/smoothxg-v" version ".tar.gz")) + (sha256 + (base32 "1px8b5aaa23z85i7ximdamk2jj7wk5hb7bpbrgxsvkxc69zlwy38")) + (snippet + #~(begin + (use-modules (guix build utils)) + (substitute* (find-files "." "CMakeLists.txt") + (("spoa_optimize_for_native ON") + "spoa_optimize_for_native OFF") + (("-msse4\\.2") "") + (("-march=native") "")))))) + (build-system cmake-build-system) + (arguments + (list + #:make-flags + #~(list (string-append "CC = " #$(cc-for-target))) + #:phases + #~(modify-phases %standard-phases + (add-before 'build 'build-abPOA + (lambda* (#:key make-flags #:allow-other-keys) + ;; This helps with portability to other architectures. + (with-directory-excursion + (string-append "../smoothxg-v" #$version "/deps/abPOA") + (substitute* "Makefile" + (("-march=native") "")) + (apply invoke "make" "libabpoa" make-flags))))))) + (inputs + (list jemalloc + openmpi + pybind11 + python + zlib + (list zstd "lib"))) + (native-inputs + (list pkg-config)) + (home-page "https://github.com/ekg/smoothxg") + (synopsis + "Linearize and simplify variation graphs using blocked partial order alignment") + (description "Pangenome graphs built from raw sets of alignments may have +complex local structures generated by common patterns of genome variation. +These local nonlinearities can introduce difficulty in downstream analyses, +visualization, and interpretation of variation graphs. + +@command{smoothxg} finds blocks of paths that are collinear within a variation +graph. It applies partial order alignment to each block, yielding an acyclic +variation graph. Then, to yield a smoothed graph, it walks the original paths +to lace these subgraphs together. The resulting graph only contains cyclic or +inverting structures larger than the chosen block size, and is otherwise +manifold linear. In addition to providing a linear structure to the graph, +smoothxg can be used to extract the consensus pangenome graph by applying the +heaviest bundle algorithm to each chain. + +To find blocks, smoothxg applies a greedy algorithm that assumes that the graph +nodes are sorted according to their occurence in the graph's embedded paths. +The path-guided stochastic gradient descent based 1D sort implemented in +@command{odgi sort -Y} is designed to provide this kind of sort.") + (properties `((tunable? . #t))) + (license license:expat))) + ;; TODO: Unbundle BBHash, parallel-hashmap, zstr (define-public graphaligner (package -- cgit v1.2.3