From 8885768936858286babae788a5dcfb01c2cad0a8 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 19 Sep 2023 19:00:45 +0300 Subject: gfaffix: Update to 0.1.5. --- gn/packages/bioinformatics.scm | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index db420b7..63934fc 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -469,7 +469,7 @@ reads.") (define-public gfaffix (package (name "gfaffix") - (version "0.1.3") + (version "0.1.5") (source (origin (method git-fetch) @@ -478,12 +478,7 @@ reads.") (commit version))) (file-name (git-file-name name version)) (sha256 - (base32 "1biss5qv6ag1dfkn1nspwd528hpzgn8i4jydvbv2z7yv7sc685rh")) - (modules '((guix build utils))) - (snippet - '(begin - (substitute* "Cargo.toml" - (("^handlegraph.*") "handlegraph = \"0.7\"\n")))))) + (base32 "181jxl8ldj39jgscyqzhz4l4k5kxj1j9hvzi8dxj59h2zzznb0kb")))) (build-system cargo-build-system) (arguments `(#:install-source? #f @@ -491,19 +486,12 @@ reads.") (("rust-clap" ,rust-clap-3) ("rust-rustc-hash" ,rust-rustc-hash-1) ("rust-regex" ,rust-regex-1) - ("rust-handlegraph" ,rust-handlegraph-0.7) + ("rust-handlegraph" ,rust-handlegraph-0.7.0-alpha.9) ("rust-gfa" ,rust-gfa-0.10) ("rust-quick-csv", rust-quick-csv-0.1) + ("rust-rayon" ,rust-rayon-1) ("rust-log" ,rust-log-0.4) - ("rust-env-logger" ,rust-env-logger-0.7)) - #:phases - (modify-phases %standard-phases - (add-after 'unpack 'adjust-dependency-version - (lambda* (#:key inputs #:allow-other-keys) - (let ((handlebar-version ,(package-version rust-handlegraph-0.7))) - (substitute* "Cargo.toml" - (("\"0.7\"") - (string-append "{ version = \"" handlebar-version "\" }"))))))))) + ("rust-env-logger" ,rust-env-logger-0.7)))) (home-page "https://github.com/marschall-lab/GFAffix") (synopsis "Identify walk-preserving shared affixes in variation graphs") (description -- cgit v1.2.3 From 90d0c931a585ec862151a11b6a88fce678670b9d Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 19 Sep 2023 19:16:01 +0300 Subject: Add vcfbub --- gn/packages/bioinformatics.scm | 49 ++++++++++++++++++++++++++++++++++++++++++ gn/packages/crates-io.scm | 26 ++++++++++++++++++++++ 2 files changed, 75 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 63934fc..2b1d514 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -499,6 +499,55 @@ reads.") collapses them into a non-redundant graph structure.") (license license:expat))) +(define-public vcfbub + (package + (name "vcfbub") + (version "0.1.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pangenome/vcfbub") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0sk2ab22z6qa00j1w8a8f5kbb7q2xb10fhd32zy4lh351v3mqmyg")))) + (build-system cargo-build-system) + (arguments + `(#:install-source? #f + #:cargo-inputs + (("rust-clap" ,rust-clap-2) + ("rust-flate2" ,rust-flate2-1) + ("rust-vcf" ,rust-vcf-0.6)))) + (home-page "https://github.com/pangenome/vcfbub") + (synopsis "Popping bubbles in vg deconstruct VCFs") + (description + "The VCF output produced by a command like @command{vg deconstruct -e -a +-H '#' ...} includes information about the nesting of variants. With @code{-a}, +@code{--all-snarls}, we obtain not just the top level bubbles, but all nested +ones. This exposed snarl tree information can be used to filter the VCF to +obtain a set of non-overlapping sites (n.b. \"snarl\" is a generic model of +graph bubbles including tips and loops). +@code{vcfbub} lets us do two common operations on these VCFs: +@enumerate +@item We can filter sites by maximum level in the snarl tree. For instance, +@code{--max-level 0} would keep only sites with @code{LV=0}. In practice, vg's +snarl finder ensures that these are sites rooted on the main linear axis of the +pangenome graph. Those at higher levels occur within larger variants. +@item We can filter sites by maximum allele size, either for the reference +allele or any allele. In this case, @code{--max-ref-length 10000} would keep +only sites where the reference allele is less than 10kb long. Setting +@code{--max-ref-length} or @code{--max-allele-length} additionally ensures that +the output contains the bubbles nested inside of any popped bubble, even if +they are at greater than @code{--max-level}. +@end enumerate +@code{vcfbub} accomplishes a simple task: we keep sites that are the children +of those which we \"pop\" due to their size. These occur around complex large +SVs, such as multi-Mbp inversions and segmental duplications. We often need to +remove these, as they provide little information for many downstream +applications, such as haplotype panels or other imputation references.") + (license license:expat))) + (define-public gafpack (let ((commit "ad31875b6914d964c6fd72d1bf334f0843538fb6") ; November 10, 2022 (revision "1")) diff --git a/gn/packages/crates-io.scm b/gn/packages/crates-io.scm index a0e31ce..79832bc 100644 --- a/gn/packages/crates-io.scm +++ b/gn/packages/crates-io.scm @@ -575,6 +575,32 @@ or any combination.") (description "Yet another format-preserving TOML parser.") (license (list license:expat license:asl2.0)))) +(define-public rust-vcf-0.6 + (package + (name "rust-vcf") + (version "0.6.1") + (source (origin + (method url-fetch) + (uri (crate-uri "vcf" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "0dc0p00a19rpmhrqcshrn2qg5l716b5s1fy8vpd3p32bw77vpbs0")))) + (build-system cargo-build-system) + (arguments + `(#:tests? #f ; Not all files included + #:cargo-inputs + (("rust-nom" ,rust-nom-7) + ("rust-once-cell" ,rust-once-cell-1) + ("rust-thiserror" ,rust-thiserror-1)) + #:cargo-development-inputs + (("rust-clap" ,rust-clap-2) + ("rust-flate2" ,rust-flate2-1)))) + (home-page "https://github.com/informationsea/vcf-rs") + (synopsis "Rust implmentation of VCF parser") + (description "This package provides a rust implmentation of a VCF parser.") + (license license:asl2.0))) + (define-public rust-gsl-sys (package (name "rust-gsl-sys") -- cgit v1.2.3 From c2415bfa014892a56be9b621089a0a2ffab234f7 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 19 Sep 2023 19:24:04 +0300 Subject: Add fastix --- gn/packages/bioinformatics.scm | 26 ++++++++++++++++++++++++++ gn/packages/crates-io.scm | 25 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 2b1d514..4b37c8f 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -548,6 +548,32 @@ remove these, as they provide little information for many downstream applications, such as haplotype panels or other imputation references.") (license license:expat))) +(define-public fastix + (package + (name "fastix") + (version "0.1.0") + (source (origin + (method url-fetch) + (uri (crate-uri "fastix" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 "1mzk65mg8vx0hz39xis6zqdmq56abhmza656gn9pgmlsn151gpx2")))) + (build-system cargo-build-system) + (arguments + `(#:install-source? #f + #:cargo-inputs + (("rust-clap" ,rust-clap-2)) + #:cargo-development-inputs + (("rust-assert-cmd" ,rust-assert-cmd-0.12) + ("rust-predicates" ,rust-predicates-1)))) + (home-page "https://github.com/ekg/fastix") + (synopsis "Prefix-renaming FASTA records") + (description "A command line tool to add prefixes to FASTA headers. The +idea is to support pangenomic applications, following the +@url{https://github.com/pangenome/PanSN-spec, PanSN} hierarchical naming +specification.") + (license license:expat))) + (define-public gafpack (let ((commit "ad31875b6914d964c6fd72d1bf334f0843538fb6") ; November 10, 2022 (revision "1")) diff --git a/gn/packages/crates-io.scm b/gn/packages/crates-io.scm index 79832bc..557c980 100644 --- a/gn/packages/crates-io.scm +++ b/gn/packages/crates-io.scm @@ -10,6 +10,31 @@ #:use-module (gnu packages maths) #:use-module (gnu packages python)) +(define-public rust-assert-cmd-0.12 + (package + (name "rust-assert-cmd") + (version "0.12.2") + (source (origin + (method url-fetch) + (uri (crate-uri "assert-cmd" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1xfn2spazxk3ljj9q3250a24gndja9vwa0h0rnbccdrbd4ncyvwk")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-doc-comment" ,rust-doc-comment-0.3) + ("rust-escargot" ,rust-escargot-0.5) + ("rust-predicates" ,rust-predicates-1) + ("rust-predicates-core" ,rust-predicates-core-1) + ("rust-predicates-tree" ,rust-predicates-tree-1) + ("rust-wait-timeout" ,rust-wait-timeout-0.2)))) + (home-page "https://github.com/assert-rs/assert_cmd") + (synopsis "Test CLI Applications.") + (description "Test CLI Applications.") + (license (list license:expat license:asl2.0)))) + (define-public rust-bgzip-0.2 (package (name "rust-bgzip") -- cgit v1.2.3 From 29839033efcc894718b1596e66bcea811da19d7a Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 19 Sep 2023 19:38:52 +0300 Subject: Add pafplot --- gn/packages/bioinformatics.scm | 37 +++++++++++++++++++++++++++ gn/packages/crates-io.scm | 58 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 4b37c8f..75449f3 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -39,6 +39,7 @@ #:use-module (gnu packages cpp) #:use-module (gnu packages cran) #:use-module (gnu packages crates-io) + #:use-module (gnu packages crates-graphics) #:use-module (gnu packages curl) #:use-module (gnu packages databases) #:use-module (gnu packages datastructures) @@ -574,6 +575,42 @@ idea is to support pangenomic applications, following the specification.") (license license:expat))) +(define-public pafplot + (let ((commit "7dda24c0aeba8556b600d53d748ae3103ec85501") + (revision "1")) + (package + (name "pafplot") + (version (git-version "0.0.0" revision commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/ekg/pafplot.git") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "04ffz0zfj4mvfxmrwgisv213fypgl02f7sim950a067pm7375g1l")))) + (build-system cargo-build-system) + (arguments + `(#:install-source? #f + #:cargo-inputs + (("rust-clap" ,rust-clap-2) + ("rust-boomphf" ,rust-boomphf-0.5) + ("rust-itertools" ,rust-itertools-0.10) + ("rust-fnv" ,rust-fnv-1) + ("rust-lodepng" ,rust-lodepng-3) + ("rust-rgb" ,rust-rgb-0.8) + ("rust-line-drawing" ,rust-line-drawing-0.8)))) + (home-page "https://github.com/ekg/pafplot.git") + (synopsis "Base-level dotplots from PAF alignments") + (description "In the process of generating alignments between whole +genomes, we often need to understand the base-level alignment between +particular sequences. @command{pafplot} allows us to do so by rasterizing the +matches alignment set. It draws a line on a raster image to represent each +match found in a set of alignments. The resulting image provides a high-level +view of the structure of the alignments, and in consequence the homology +relationships between the sequences in consideration.") + (license license:expat)))) + (define-public gafpack (let ((commit "ad31875b6914d964c6fd72d1bf334f0843538fb6") ; November 10, 2022 (revision "1")) diff --git a/gn/packages/crates-io.scm b/gn/packages/crates-io.scm index 557c980..6db2716 100644 --- a/gn/packages/crates-io.scm +++ b/gn/packages/crates-io.scm @@ -246,6 +246,64 @@ functions} for a collection of hashable objects.") (description "Key String provides a Rust package optimized for map keys.") (license (list license:expat license:asl2.0)))) +(define-public rust-line-drawing-0.8 + (package + (name "rust-line-drawing") + (version "0.8.1") + (source (origin + (method url-fetch) + (uri (crate-uri "line-drawing" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "18a940s3mv8w5prpb99sdxykzhrvzrnymw3hvd7wisnkgbr11jqm")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-num-traits" ,rust-num-traits-0.2)) + #:cargo-development-inputs + (("rust-bresenham" ,rust-bresenham-0.1) + ("rust-image" ,rust-image-0.23) + ("rust-rand" ,rust-rand-0.8)))) + (home-page "https://github.com/expenses/line_drawing") + (synopsis + "A collection of line-drawing algorithms for use in graphics and video games.") + (description + "This package provides a collection of line-drawing algorithms for use in +graphics and video games.") + (license license:expat))) + +(define-public rust-lodepng-3 + (package + (name "rust-lodepng") + (version "3.8.0") + (source (origin + (method url-fetch) + (uri (crate-uri "lodepng" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1f5d9bva17sq7npw9qh7h6sh02k0ycrjx5hr147q0jv4m0qd2970")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-test-flags + (list "--release" "--" + ;; Not all files included. + "--skip=test::read_icc") + #:cargo-inputs + (("rust-crc32fast" ,rust-crc32fast-1) + ("rust-fallible-collections" ,rust-fallible-collections-0.4) + ("rust-flate2" ,rust-flate2-1) + ("rust-libc" ,rust-libc-0.2) + ("rust-rgb" ,rust-rgb-0.8)))) + (home-page "https://lib.rs/crates/lodepng") + (synopsis + "Reading and writing PNG files without system dependencies. Pure Rust port of LodePNG.") + (description + "Reading and writing PNG files without system dependencies. Pure Rust port of +@code{LodePNG}.") + (license license:zlib))) + (define-public rust-pyo3-0.14 (package (name "rust-pyo3") -- cgit v1.2.3 From 9fd1f19078f055144f1192534d6f32eb26eb32f1 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Wed, 20 Sep 2023 10:18:06 +0300 Subject: Add wfmash with glibc-hwcap libraries. --- gn/packages/bioinformatics.scm | 93 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 75449f3..a303baf 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -195,6 +195,99 @@ accurately delineate genomic rearrangements throughout the genome. Structural variants can be visualized using Delly-maze and Delly-suave.") (license license:gpl3))) +(define-public wfmash-x86-64-v2 + (package/inherit wfmash + (name "wfmash-x86-64-v2") + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:configure-flags flags #~()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v2")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-binary + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +(define-public wfmash-x86-64-v3 + (package/inherit wfmash + (name "wfmash-x86-64-v3") + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:configure-flags flags #~()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v3")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-binary + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))))))) + (supported-systems '("x86_64-linux")) + #;(properties `((hidden? . #t))))) + +(define-public wfmash-x86-64-v4 + (package/inherit wfmash + (name "wfmash-x86-64-v4") + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:configure-flags flags #~()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v4")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-binary + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +;; This copy of wfmash will automatically use the libraries that target the +;; x86_64 psABI which the hardware supports. +(define-public wfmash-hwcaps + (package/inherit wfmash + (name "wfmash-hwcaps") + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'install-optimized-libraries + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((hwcaps "/lib/glibc-hwcaps")) + (copy-recursively + (string-append (assoc-ref inputs "wfmash-x86-64-v2") + hwcaps "/x86-64-v2") + (string-append #$output hwcaps "/x86-64-v2")) + (copy-recursively + (string-append (assoc-ref inputs "wfmash-x86-64-v3") + hwcaps "/x86-64-v3") + (string-append #$output hwcaps "/x86-64-v3")) + (copy-recursively + (string-append (assoc-ref inputs "wfmash-x86-64-v4") + hwcaps "/x86-64-v4") + (string-append #$output hwcaps "/x86-64-v4"))))))))) + (native-inputs + (modify-inputs (package-native-inputs wfmash) + (append wfmash-x86-64-v2 + wfmash-x86-64-v3 + wfmash-x86-64-v4))))) + (define-public freec (package (name "control-freec") -- cgit v1.2.3 From a599c69673da59fc129ceefffb73f8958f3d82f0 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Wed, 20 Sep 2023 16:03:50 +0300 Subject: seqwish: Update to 0.7.9. --- gn/packages/bioinformatics.scm | 208 ++++++++--------------------------------- seqwish-paryfor-riscv.diff | 20 ++++ 2 files changed, 59 insertions(+), 169 deletions(-) create mode 100644 seqwish-paryfor-riscv.diff (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index a303baf..b0865f3 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1592,120 +1592,60 @@ runApp(launch.browser=0, port=4208)~%\n" (scRNA-seq) data analysis.") (license license:agpl3)))) -(define-public seqwish-0.1 +(define-public seqwish (package (name "seqwish") - (version "0.1") + (version "0.7.9") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/ekg/seqwish.git") - (commit (string-append "v" version)))) + (commit (string-append "v" version)) + (recursive? #t))) (file-name (git-file-name name version)) (sha256 - (base32 - "1gp72cmi13hbkmwwhgckmxkbx8w644jc5l6dvvvxdbl6sk8xsi5r")))) - (build-system gnu-build-system) + (base32 "0xnv40kjlb610bk67n4xdqz5dfsjhrqld5bxzblji57k6bb4n66x")) + (patches (search-patches "seqwish-paryfor-riscv.diff")) + (snippet + #~(begin + (use-modules (guix build utils)) + (substitute* '("deps/atomic_queue/Makefile" + "deps/mmmulti/deps/DYNAMIC/CMakeLists.txt" + "deps/mmmulti/deps/atomic_queue/Makefile" + "deps/mmmulti/deps/ips4o/CMakeLists.txt") + (("-march=native") "") + (("-mcx16") "")) + (substitute* '("deps/mmmulti/deps/sdsl-lite/CMakeLists.txt" + "deps/sdsl-lite/CMakeLists.txt") + (("-msse4.2 -march=native") "")))))) + (build-system cmake-build-system) (arguments - `(#:phases + `(#:configure-flags + '(,@(cond ((target-x86-64?) + ;; This seems to be about the minimum + '("-DEXTRA_FLAGS=-march=x86-64-v2")) + ((target-aarch64?) + '("-DEXTRA_FLAGS=-march=armv8-a")) + ((target-riscv64?) + '("-DEXTRA_FLAGS=-march=rv64imafdc")) + ((target-ppc64le?) + '("-DEXTRA_FLAGS=-mcpu=power8")) + ;; The default case is '-march=native' + (else '()))) + #:phases (modify-phases %standard-phases - (delete 'configure) - (replace 'build - (lambda* (#:key inputs #:allow-other-keys) - (let ((sdsl-lite (assoc-ref inputs "sdsl-lite")) - (sufsort (assoc-ref inputs "sufsort")) - (bsort (assoc-ref inputs "bsort")) - (mmap_allocator (assoc-ref inputs "mmap-allocator")) - (tayweeargs (assoc-ref inputs "tayweeargs-source")) - (gzipreader (assoc-ref inputs "gzipreader-source")) - (mmmultimap (assoc-ref inputs "mmmultimap-source")) - (iitii (assoc-ref inputs "iitii-source")) - (ips4o (assoc-ref inputs "ips4o-source"))) - (apply invoke "g++" "-o" "seqwish" - "-O3" "-g" "-std=c++14" "-fopenmp" - "-latomic" "-lz" - (string-append "-I" sdsl-lite "/include") - (string-append "-I" sdsl-lite "/include/sdsl") - (string-append "-I" bsort "/include") - (string-append "-I" tayweeargs) - (string-append "-I" gzipreader) - (string-append "-I" mmmultimap "/src") - (string-append "-I" iitii "/src") - (string-append "-I" mmap_allocator "/include") - (string-append "-I" ips4o) - (append - (find-files "src" ".") - (list - (string-append sdsl-lite "/lib/libsdsl.so") - (string-append sufsort "/lib/libdivsufsort.so") - (string-append sufsort "/lib/libdivsufsort64.so") - (string-append mmap_allocator "/lib/libmmap_allocator.a") - (string-append bsort "/lib/libbsort.a"))))))) (replace 'check - (lambda _ + (lambda* (#:key tests? #:allow-other-keys) ;; Add seqwish to the PATH for the tests. (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH"))) - (with-directory-excursion "test" - (invoke "make")))) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (install-file "seqwish" (string-append out "/bin"))) - #t))))) + (when tests? + (with-directory-excursion "../source/test" + (invoke "make")))))))) (inputs - `(("bsort" ,ekg-bsort) - ("mmap-allocator" ,ekg-mmap-allocator) - ("openmpi" ,openmpi) - ("sdsl-lite" ,sdsl-lite) - ("sufsort" ,libdivsufsort) - ("zlib" ,zlib))) + (list jemalloc + zlib)) (native-inputs - `(("prove" ,perl) - ("tayweeargs-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/Taywee/args.git") - (commit "3de44ec671db452cc0c4ef86399b108939768abb"))) - (file-name "tayweeargs-source-for-seqwish") - (sha256 - (base32 - "1v8kq1gvl5waysrfp0s58881rx39mnf3ifdsl6pb3y3c4zaki2xh")))) - ("gzipreader-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/gatoravi/gzip_reader.git") - (commit "0ef26c0399e926087f9d6c4a56067a7bf1fc4f5e"))) - (file-name "gzipreader-source-for-seqwish") - (sha256 - (base32 - "1wy84ksx900840c06w0f1mgzvr7zsfsgxq1b0jdjh8qka26z1r17")))) - ("mmmultimap-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/mmmultimap.git") - (commit "88c734c36563048b0f3acc04dd8856f19e02b75f"))) - (file-name "mmmultimap-source-for-seqwish") - (sha256 - (base32 - "06mnf3bd32s3ngxkl573ylg2qsvlw80r1ksdwamx3fzxa1a5yls0")))) - ("iitii-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/iitii.git") - (commit "85209e07a3ee403fb6557387a7f897cd76be4406"))) - (file-name "iitii-source-for-seqwish") - (sha256 - (base32 - "0sszvffkswf89nkbjmjg3wjwqvy2w0d3wgy3ngy33ma4sy4s025s")))) - ("ips4o-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/SaschaWitt/ips4o.git") - (commit "bff3ccf0bf349497f2bb10f825d160b792236367"))) - (file-name "ips4o-source-for-seqwish") - (sha256 - (base32 - "0yjfvrkiwgmy5cn0a7b9j8jwc3zp0l8j4dl5n0jgz68pdnhlp96h")))))) + (list perl)) (home-page "https://github.com/ekg/seqwish") (synopsis "Alignment to variation graph inducer") (description "Seqwish implements a lossless conversion from pairwise @@ -1720,76 +1660,6 @@ limited by the use of sorted disk-backed arrays and succinct rank/select dictionaries to record a queryable version of the graph.") (license license:expat))) -(define ekg-bsort - (let ((commit "c3ab0d3308424030e0a000645a26d2c10a59a124") - (revision "1")) - (package - (name "bsort") - (version (git-version "0.0.0" revision commit)) - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/bsort.git") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "0dgpflzcp3vdhbjwbjw347czi86gyk73hxcwjdqnaqh5vg61bdb6")))) - (build-system cmake-build-system) - (arguments - '(#:tests? #f ; no test target - #:out-of-source? #f - #:phases - (modify-phases %standard-phases - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (install-file "bin/bsort" (string-append out "/bin")) - (install-file "src/bsort.hpp" (string-append out "/include")) - (install-file "lib/libbsort.a" (string-append out "/lib"))) - #t))))) - (home-page "") - (synopsis "") - (description "") - (license license:gpl2)))) - -(define ekg-mmap-allocator - (let ((commit "ed61daf094de1c2e1adbe8306287ad52da5f0264") - (revision "1")) - (package - (name "mmap-allocator") - (version (git-version "0.10.1" revision commit)) - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/mmap_allocator.git") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "1f30b2kpwwzh6333s0qi5samk458ghbnvyycf6rwx6n6j7xswhbw")))) - (build-system gnu-build-system) - (arguments - '(#:phases - (modify-phases %standard-phases - (delete 'configure) ; no configure script - (add-before 'install 'pre-install - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (substitute* "Makefile" - (("HEADERS=") "HEADERS=mmappable_vector.h ") - (("/usr") out)) - (mkdir-p (string-append out "/lib")) - (mkdir (string-append out "/include")) - #t)))) - #:test-target "test")) - (home-page "") - (synopsis "") - (description "") - (license license:lgpl2.0+)))) ; README just says "lpgl". - ;; TODO: Unbundle BBHash, parallel-hashmap, zstr (define-public graphaligner (package diff --git a/seqwish-paryfor-riscv.diff b/seqwish-paryfor-riscv.diff new file mode 100644 index 0000000..cecf806 --- /dev/null +++ b/seqwish-paryfor-riscv.diff @@ -0,0 +1,20 @@ +diff --git a/deps/paryfor/paryfor.hpp b/deps/paryfor/paryfor.hpp +index b8ced09..0536580 100644 +--- a/deps/paryfor/paryfor.hpp ++++ b/deps/paryfor/paryfor.hpp +@@ -51,6 +51,15 @@ static inline void spin_loop_pause() noexcept { + } + } // namespace atomic_queue + } // namespace paryfor ++#elif defined(__riscv) && (__riscv_xlen == 64) ++namespace paryfor { ++namespace atomic_queue { ++constexpr int CACHE_LINE_SIZE = 64; ++static inline void spin_loop_pause() noexcept { ++ asm volatile ("nop" ::: "memory"); ++} ++} ++} + #else + #error "Unknown CPU architecture." + #endif -- cgit v1.2.3 From fa03e6f84c0ff8e1c168568fd33316c170014251 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 21 Sep 2023 09:07:37 +0300 Subject: Add smoothxg --- gn/packages/bioinformatics.scm | 67 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index b0865f3..8a4b278 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1660,6 +1660,73 @@ limited by the use of sorted disk-backed arrays and succinct rank/select dictionaries to record a queryable version of the graph.") (license license:expat))) +(define-public smoothxg + (package + (name "smoothxg") + (version "0.7.2") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/pangenome/smoothxg" + "/releases/download/v" version + "/smoothxg-v" version ".tar.gz")) + (sha256 + (base32 "1px8b5aaa23z85i7ximdamk2jj7wk5hb7bpbrgxsvkxc69zlwy38")) + (snippet + #~(begin + (use-modules (guix build utils)) + (substitute* (find-files "." "CMakeLists.txt") + (("spoa_optimize_for_native ON") + "spoa_optimize_for_native OFF") + (("-msse4\\.2") "") + (("-march=native") "")))))) + (build-system cmake-build-system) + (arguments + (list + #:make-flags + #~(list (string-append "CC = " #$(cc-for-target))) + #:phases + #~(modify-phases %standard-phases + (add-before 'build 'build-abPOA + (lambda* (#:key make-flags #:allow-other-keys) + ;; This helps with portability to other architectures. + (with-directory-excursion + (string-append "../smoothxg-v" #$version "/deps/abPOA") + (substitute* "Makefile" + (("-march=native") "")) + (apply invoke "make" "libabpoa" make-flags))))))) + (inputs + (list jemalloc + openmpi + pybind11 + python + zlib + (list zstd "lib"))) + (native-inputs + (list pkg-config)) + (home-page "https://github.com/ekg/smoothxg") + (synopsis + "Linearize and simplify variation graphs using blocked partial order alignment") + (description "Pangenome graphs built from raw sets of alignments may have +complex local structures generated by common patterns of genome variation. +These local nonlinearities can introduce difficulty in downstream analyses, +visualization, and interpretation of variation graphs. + +@command{smoothxg} finds blocks of paths that are collinear within a variation +graph. It applies partial order alignment to each block, yielding an acyclic +variation graph. Then, to yield a smoothed graph, it walks the original paths +to lace these subgraphs together. The resulting graph only contains cyclic or +inverting structures larger than the chosen block size, and is otherwise +manifold linear. In addition to providing a linear structure to the graph, +smoothxg can be used to extract the consensus pangenome graph by applying the +heaviest bundle algorithm to each chain. + +To find blocks, smoothxg applies a greedy algorithm that assumes that the graph +nodes are sorted according to their occurence in the graph's embedded paths. +The path-guided stochastic gradient descent based 1D sort implemented in +@command{odgi sort -Y} is designed to provide this kind of sort.") + (properties `((tunable? . #t))) + (license license:expat))) + ;; TODO: Unbundle BBHash, parallel-hashmap, zstr (define-public graphaligner (package -- cgit v1.2.3 From 88f74e4ae05a562c4a4373b9e3b86bac4ea132b2 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 21 Sep 2023 10:31:05 +0300 Subject: odgi: Update to 0.8.3. --- gn/packages/bioinformatics.scm | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 8a4b278..dba5f6f 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2033,32 +2033,48 @@ available to other researchers.") (delete-file "scanpy/tests/test_pca.py") #t))))))))) -;; TODO: Unbundle everything +;; TODO: Unbundle everything before upstreaming (define-public odgi (package (name "odgi") - (version "0.8.1") + (version "0.8.3") + (outputs '("out" "static")) (source (origin (method url-fetch) (uri (string-append "https://github.com/pangenome/odgi/releases" "/download/v" version "/odgi-v" version ".tar.gz")) (sha256 - (base32 "175083pb9hp0vn9a00hbxlayyk5a5j8p52yq5qfmbnfvndisbmbv")) + (base32 "1gw1xdb945z25rar6pba6kq5xdx8l7fkhxjyrvc1z1brva53p9hk")) (snippet #~(begin (use-modules (guix build utils)) (substitute* "CMakeLists.txt" (("-march=native") "") - (("-msse4\\.2") "")) - (delete-file-recursively "deps/pybind11") - (delete-file-recursively "deps/sdsl-lite"))))) + (("-msse4\\.2") "")))))) (build-system cmake-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'link-to-libodgi + (lambda _ + ;; This lets us provide libraries for different psABI levels. + (substitute* "CMakeLists.txt" + (("^ \\$.*") "") + (("target_link_libraries\\(odgi " all) + (string-append all "libodgi_shared "))))) + (add-after 'install 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (mkdir-p (string-append #$output:static "/lib")) + (rename-file (string-append #$output "/lib/libodgi.a") + (string-append #$output:static "/lib/libodgi.a"))))))) (native-inputs (list pkg-config)) (inputs (list jemalloc libdivsufsort + openmpi pybind11 python sdsl-lite)) -- cgit v1.2.3 From 91d55ae18950e470e32fbdbebd867196e691a82a Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 21 Sep 2023 11:30:21 +0300 Subject: wfmash: Fix typos. --- gn/packages/bioinformatics.scm | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index dba5f6f..71ea9c0 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -200,7 +200,7 @@ variants can be visualized using Delly-maze and Delly-suave.") (name "wfmash-x86-64-v2") (arguments (substitute-keyword-arguments (package-arguments wfmash) - ((#:configure-flags flags #~()) + ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" (string-append "-DCMAKE_INSTALL_RPATH=" #$output @@ -221,7 +221,7 @@ variants can be visualized using Delly-maze and Delly-suave.") (name "wfmash-x86-64-v3") (arguments (substitute-keyword-arguments (package-arguments wfmash) - ((#:configure-flags flags #~()) + ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" (string-append "-DCMAKE_INSTALL_RPATH=" #$output @@ -235,14 +235,14 @@ variants can be visualized using Delly-maze and Delly-suave.") (lambda _ (delete-file-recursively (string-append #$output "/bin")))))))) (supported-systems '("x86_64-linux")) - #;(properties `((hidden? . #t))))) + (properties `((hidden? . #t))))) (define-public wfmash-x86-64-v4 (package/inherit wfmash (name "wfmash-x86-64-v4") (arguments (substitute-keyword-arguments (package-arguments wfmash) - ((#:configure-flags flags #~()) + ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" (string-append "-DCMAKE_INSTALL_RPATH=" #$output @@ -286,7 +286,8 @@ variants can be visualized using Delly-maze and Delly-suave.") (modify-inputs (package-native-inputs wfmash) (append wfmash-x86-64-v2 wfmash-x86-64-v3 - wfmash-x86-64-v4))))) + wfmash-x86-64-v4))) + (properties `((tunable? . #f))))) (define-public freec (package -- cgit v1.2.3 From 283304f54edbac26a62368e76460528ec478dc61 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 21 Sep 2023 11:31:51 +0300 Subject: odgi: Add variant with glibc-hwcaps. --- gn/packages/bioinformatics.scm | 127 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 71ea9c0..7a20200 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2058,6 +2058,18 @@ available to other researchers.") (list #:phases #~(modify-phases %standard-phases + (add-after 'unpack 'use-gnuinstalldirs-macros + (lambda _ + (substitute* "CMakeLists.txt" + (("project\\(odgi\\)" all) + (string-append all "\ninclude(GNUInstallDirs)")) + ;; This is different than the default. + ;(("PUBLIC_HEADER DESTINATION include/odgi") + ; "PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}") + (("LIBRARY DESTINATION lib") + "LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}") + (("ARCHIVE DESTINATION lib") + "ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}")))) (add-after 'unpack 'link-to-libodgi (lambda _ ;; This lets us provide libraries for different psABI levels. @@ -2100,6 +2112,121 @@ in-memory footprint at the cost of packing and unpacking.") (properties '((tunable? . #t))) (license license:expat))) +(define-public odgi-x86-64-v2 + (package/inherit odgi + (name "odgi-x86-64-v2") + (arguments + (substitute-keyword-arguments (package-arguments odgi) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v2")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))) + (replace 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v2/libodgi.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +(define-public odgi-x86-64-v3 + (package/inherit odgi + (name "odgi-x86-64-v3") + (arguments + (substitute-keyword-arguments (package-arguments odgi) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v3")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))) + (replace 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v3/libodgi.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +(define-public odgi-x86-64-v4 + (package/inherit odgi + (name "odgi-x86-64-v4") + (arguments + (substitute-keyword-arguments (package-arguments odgi) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v4")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))) + (replace 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v4/libodgi.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +;; This copy of odgi will automatically use the libraries that target the +;; x86_64 psABI which the hardware supports. +(define-public odgi-hwcaps + (package/inherit odgi + (name "odgi-hwcaps") + (arguments + (substitute-keyword-arguments (package-arguments odgi) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'install-optimized-libraries + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((hwcaps "/lib/glibc-hwcaps")) + (copy-recursively + (string-append (assoc-ref inputs "odgi-x86-64-v2") + hwcaps "/x86-64-v2") + (string-append #$output hwcaps "/x86-64-v2")) + (copy-recursively + (string-append (assoc-ref inputs "odgi-x86-64-v3") + hwcaps "/x86-64-v3") + (string-append #$output hwcaps "/x86-64-v3")) + (copy-recursively + (string-append (assoc-ref inputs "odgi-x86-64-v4") + hwcaps "/x86-64-v4") + (string-append #$output hwcaps "/x86-64-v4"))))))))) + (native-inputs + (modify-inputs (package-native-inputs odgi) + (append odgi-x86-64-v2 + odgi-x86-64-v3 + odgi-x86-64-v4))) + (properties `((tunable? . #f))))) + (define-public vg (package (name "vg") -- cgit v1.2.3 From 86583628060872edc5adb08cc73425087b153825 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 21 Sep 2023 12:32:27 +0300 Subject: mummer: Update to 4.0.0rc1. --- gn/packages/bioinformatics.scm | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 7a20200..78cbcb7 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1796,19 +1796,30 @@ here}.") (define-public mummer (package (name "mummer") - (version "4.0.0beta2") + (version "4.0.0rc1") (source (origin (method url-fetch) (uri (string-append "https://github.com/mummer4/mummer/releases/" "download/v" version "/mummer-" version ".tar.gz")) (sha256 - (base32 - "14qvrmf0gkl4alnh8zgxlzmvwc027arfawl96i7jk75z33j7dknf")))) + (base32 "07bxw1vax1sai3g5xjn6sqngddlbnlabpqy373vw4fb55pdnl045")))) (build-system gnu-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'configure 'skip-test_md5-tests + (lambda _ + ;; There seems to be a bug with how these tests are called. + (substitute* "Makefile" + (("tests/mummer.sh") "") + (("tests/nucmer.sh") "") + (("tests/genome.sh") "") + (("tests/sam.sh") ""))))))) (inputs - `(("gnuplot" ,gnuplot) - ("perl" ,perl))) + (list gnuplot + perl)) (home-page "http://mummer.sourceforge.net/") (synopsis "Efficient sequence alignment of full genomes") (description "MUMmer is a versatil alignment tool for DNA and protein sequences.") -- cgit v1.2.3 From 10823be9475e16396365c04212d175b2dce31f3e Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Fri, 22 Sep 2023 08:41:24 +0300 Subject: vg: Update to 1.50.0. --- gn/packages/bioinformatics.scm | 208 ++++++++++++++--------------------------- 1 file changed, 71 insertions(+), 137 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 78cbcb7..4f802f7 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2241,104 +2241,34 @@ in-memory footprint at the cost of packing and unpacking.") (define-public vg (package (name "vg") - (version "1.39.0") + (version "1.50.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/vgteam/vg/releases/download/v" version "/vg-v" version ".tar.gz")) (sha256 - (base32 "0cj575qr2jkingrm6r4ki7f89s7glrf18d4pvaa69smxh2vbajv3")) - (modules '((guix build utils))) + (base32 "1n06fh6qvffhbxy7m096r8cy16wi0nm6gfgi3rsjy9zrb7g1jzhs")) (snippet - '(begin - ;; List all the options, makes it easier to try to remove them. - ;(delete-file-recursively "deps/BBHash") - ;(delete-file-recursively "deps/DYNAMIC") - ;(delete-file-recursively "deps/FlameGraph") - ;(delete-file-recursively "deps/atomic_queue") - ;(delete-file-recursively "deps/backward-cpp") - (delete-file-recursively "deps/bash-tap") - ;(delete-file-recursively "deps/dozeu") - (delete-file-recursively "deps/elfutils") - (delete-file-recursively "deps/fastahack") - ;(delete-file-recursively "deps/fermi-lite") - ;(delete-file-recursively "deps/gbwt") - ;(delete-file-recursively "deps/gbwtgraph") - ;(delete-file-recursively "deps/gcsa2") - ;(delete-file-recursively "deps/gfakluge") - ;(delete-file-recursively "deps/gssw") - (delete-file-recursively "deps/htslib") - ;(delete-file-recursively "deps/ips4o") - (delete-file-recursively "deps/jemalloc") - ;(delete-file-recursively "deps/libVCFH") - ;(delete-file-recursively "deps/libbdsg") - ;(delete-file-recursively "deps/libbdsg/bdsg/deps") - (delete-file-recursively "deps/libbdsg/bdsg/deps/BBHash") - (delete-file-recursively "deps/libbdsg/bdsg/deps/DYNAMIC") - ;(delete-file-recursively "deps/libbdsg/bdsg/deps/DYNAMIC/deps/hopscotch-map") - ;(delete-file-recursively "deps/libbdsg/bdsg/deps/hopscotch-map") - (delete-file-recursively "deps/libbdsg/bdsg/deps/libhandlegraph") - ;(delete-file-recursively "deps/libbdsg/bdsg/deps/mio") - (delete-file-recursively "deps/libbdsg/bdsg/deps/pybind11") - (delete-file-recursively "deps/libbdsg/bdsg/deps/sdsl-lite") - (delete-file-recursively "deps/libbdsg/bdsg/deps/sparsepp") - ;(delete-file-recursively "deps/libdeflate") - ;(delete-file-recursively "deps/libhandlegraph") - ;(delete-file-recursively "deps/libVCFH") - ;(delete-file-recursively "deps/libvgio") - ;(delete-file-recursively "deps/libvgio/deps") ; libhandlegraph - ;(delete-file-recursively "deps/lru_cache") - ;(delete-file-recursively "deps/mio") - ;(delete-file-recursively "deps/mmmultimap") - (delete-file-recursively "deps/mmmultimap/deps/DYNAMIC") - (delete-file-recursively "deps/mmmultimap/deps/args") - (delete-file-recursively "deps/mmmultimap/deps/atomic_queue") - ;(delete-file-recursively "deps/mmmultimap/deps/hopscotch-map") - (delete-file-recursively "deps/mmmultimap/deps/ips4o") - (delete-file-recursively "deps/mmmultimap/deps/mio") - ;(delete-file-recursively "deps/mmmultimap/deps/paryfor") - (delete-file-recursively "deps/mmmultimap/deps/sdsl-lite") - ;(delete-file-recursively "deps/pinchesAndCacti") - ;(delete-file-recursively "deps/progress_bar") - (delete-file-recursively "deps/raptor") - ;(delete-file-recursively "deps/sdsl-lite") - ;(delete-file-recursively "deps/sha1") - (delete-file-recursively "deps/snappy") - ;(delete-file-recursively "deps/sonLib") - (delete-file-recursively "deps/sparsehash") - ;(delete-file-recursively "deps/sparsepp") - ;(delete-file-recursively "deps/ssw") - ;(delete-file-recursively "deps/structures") - ;(delete-file-recursively "deps/sublinear-Li-Stephens") - (delete-file-recursively "deps/sublinear-Li-Stephens/deps") - (delete-file-recursively "deps/tabixpp") - (delete-file-recursively "deps/vcflib") - ;(delete-file-recursively "deps/xg") - (delete-file-recursively "deps/xg/deps") - ;; libvgio doesn't search the correct include directory. - (copy-recursively "deps/libhandlegraph/src/include/handlegraph" - "deps/libvgio/include/handlegraph"))))) + #~(begin + (use-modules (guix build utils)) + (substitute* (find-files "." "(CMakeLists\\.txt|Makefile)") + (("-march=native") "") + (("-mtune=native") "") + (("-msse4.2") "") + (("-mcx16") "")))))) (build-system gnu-build-system) (arguments `(#:phases (modify-phases %standard-phases (delete 'configure) ; no configure script - ,@(if (target-riscv64?) - ;; riscv64 doesn't take '-march=native. This needs to be removed - ;; for all architectures if/when vg is upstreamed. - `((add-after 'unpack 'dont-build-native - (lambda _ - (substitute* (append (find-files "." "CMakeLists\\.txt") - (find-files "." "Makefile")) - (("-march=native") ""))))) - '()) (add-after 'unpack 'patch-source (lambda* (#:key inputs #:allow-other-keys) + ;; Most of these are so that we can skip bootstrapping some of the sources. (substitute* "Makefile" ;; PKG_CONFIG_DEPS needs to be substituted to actually link to everything. - (("cairo jansson") - "cairo htslib jansson libdw libelf protobuf raptor2 sdsl-lite tabixpp vcflib") + (("cairo libzstd") + "cairo htslib libzstd libdw libelf protobuf raptor2 sdsl-lite tabixpp vcflib fastahack") ;; Skip the part where we link static libraries special. It doesn't like the changes we make (("-Wl,-B.*") "\n") @@ -2359,13 +2289,9 @@ in-memory footprint at the cost of packing and unpacking.") (string-append " " (assoc-ref inputs "vcflib") "/lib/libvcflib.so")) ((" \\$\\(BIN_DIR\\)/vcf2tsv") (string-append " " (assoc-ref inputs "vcflib") "/bin/vcf2tsv")) - ((" \\$\\(VCFLIB_DIR\\)/bin/vcf2tsv") - (string-append " " (assoc-ref inputs "vcflib") "/bin/vcf2tsv")) ((" \\$\\(FASTAHACK_DIR\\)/fastahack") (string-append " " (assoc-ref inputs "fastahack") "/bin/fastahack")) - ((" \\$\\(FASTAHACK_DIR\\)/bin/fastahack") - (string-append " " (assoc-ref inputs "fastahack") "/bin/fastahack")) (("\\+= \\$\\(OBJ_DIR\\)/Fasta\\.o") (string-append "+= " (assoc-ref inputs "fastahack") "/lib/libfastahack.so")) @@ -2403,24 +2329,21 @@ in-memory footprint at the cost of packing and unpacking.") ;; vcf2tsv shows up in a couple of other places (substitute* "test/t/02_vg_construct.t" (("../deps/vcflib/bin/vcf2tsv") (which "vcf2tsv"))))) - (add-after 'unpack 'fix-fastahack-dependency + (add-after 'unpack 'dont-build-shared-vgio (lambda _ - (substitute* "src/aligner.hpp" - (("Fasta.h") "fastahack/Fasta.h")))) - (add-after 'unpack 'fix-hopscotch-dependency + ;; vg will link with libvgio and fail the 'validate-runpath phase. + (substitute* "deps/libvgio/CMakeLists.txt" + (("TARGETS vgio vgio_static") "TARGETS vgio_static")))) + (add-after 'unpack 'fix-fastahack-dependency (lambda _ - (substitute* "Makefile" - ;; The build directory for hopscotch_map-prefix. - (("rm -Rf build && ") "")) - ;; Don't try to download hopscotch_map from the internet. - (substitute* "deps/DYNAMIC/CMakeLists.txt" - ((".*GIT_REPOSITORY.*") - "SOURCE_DIR \"../../libbdsg/bdsg/deps/hopscotch-map\"\n") - ((".*BUILD_IN_SOURCE.*") "")) - ;; We still need to copy it to the expected location. - (copy-recursively - "deps/libbdsg/bdsg/deps/hopscotch-map" - "deps/DYNAMIC/build/hopscotch_map-prefix/src/hopscotch_map"))) + (substitute* (append (list "src/aligner.hpp" + "src/vg.hpp") + (find-files "deps/vcflib/src" "\\.cpp$")) + (("Fasta.h") "fastahack/Fasta.h")) + (substitute* '("deps/vcflib/src/Variant.h" + "src/constructor.hpp" + "src/index_registry.cpp") + (("") "\"fastahack/Fasta.h\"")))) (add-after 'unpack 'adjust-tests (lambda* (#:key inputs #:allow-other-keys) (let ((bash-tap (assoc-ref inputs "bash-tap"))) @@ -2429,13 +2352,21 @@ in-memory footprint at the cost of packing and unpacking.") (string-append "BASH_TAP_ROOT=" bash-tap "/bin\n")) ((".*bash-tap-bootstrap") (string-append ". " bash-tap "/bin/bash-tap-bootstrap"))) - ;; Lets skip the 4 failing tests for now. They fail with our + (substitute* "test/t/02_vg_construct.t" + (("../deps/fastahack/fastahack") (which "fastahack")) + (("../bin/vcf2tsv") (which "vcf2tsv"))) + ;; Lets skip the 7 failing tests for now. They fail with our ;; bash-tap and the bundled one. (substitute* "test/t/02_vg_construct.t" + ((".*self-inconsistent.*") "is $(true) \"\" \"\"\n") ((".*the graph contains.*") "is $(true) \"\" \"\"\n")) (substitute* '("test/t/07_vg_map.t" "test/t/33_vg_mpmap.t") ((".*node id.*") "is $(true) \"\" \"\"\n")) + (substitute* "test/t/50_vg_giraffe.t" + ((".*A long read can.*") "is $(true) \"\" \"\"\n") + ((".*A long read has.*") "is $(true) \"\" \"\"\n") + ((".*Long read minimizer.*") "is $(true) \"\" \"\"\n")) ;; Don't test the docs, we're not providing npm (substitute* "Makefile" ((".*test-docs.*") ""))))) @@ -2454,40 +2385,42 @@ in-memory footprint at the cost of packing and unpacking.") (find-files "doc/man" "\\.1$")))))) #:test-target "test")) (native-inputs - `(,@(if (member (%current-system) - (package-transitive-supported-systems ruby-asciidoctor)) - `(("asciidoctor" ,ruby-asciidoctor)) - '()) - ("bash-tap" ,bash-tap) - ("bc" ,bc) - ("cmake" ,cmake-minimal) - ("jq" ,jq) - ("perl" ,perl) - ("pkg-config" ,pkg-config) - ("samtools" ,samtools) - ("util-linux" ,util-linux) - ("which" ,which) - ("xxd" ,xxd))) + (append + (if (supported-package? ruby-asciidoctor) + (list ruby-asciidoctor) + '()) + (list bash-tap + bc + cmake-minimal + jq + perl + pkg-config + samtools + util-linux + which + xxd))) (inputs - `(("boost" ,boost) - ("cairo" ,cairo) - ("curl" ,curl) - ("elfutils" ,elfutils) - ("fastahack" ,fastahack) - ("htslib" ,htslib) - ("jansson" ,jansson) - ("jemalloc" ,jemalloc) - ("libdivsufsort" ,libdivsufsort) - ("ncurses" ,ncurses) - ("protobuf" ,protobuf) - ("raptor2" ,raptor2) - ("sdsl-lite" ,sdsl-lite) - ("smithwaterman" ,smithwaterman) - ("snappy" ,snappy) - ("sparsehash" ,sparsehash) - ("tabixpp" ,tabixpp) - ("vcflib" ,vcflib) - ("zlib" ,zlib))) + (list boost + cairo + curl + elfutils + fastahack + htslib + jansson + jemalloc + libdivsufsort + ncurses + openmpi + protobuf + raptor2 + sdsl-lite + smithwaterman + snappy + sparsehash + tabixpp + vcflib + zlib + (list zstd "lib"))) (home-page "https://www.biostars.org/t/vg/") (synopsis "Tools for working with genome variation graphs") (description "Variation graphs provide a succinct encoding of the sequences @@ -2501,7 +2434,8 @@ gene models and transcripts) as walks through nodes connected by edges @end enumerate This model is similar to sequence graphs that have been used in assembly and multiple sequence alignment.") - (properties `((release-monitoring-url . "https://github.com/vgteam/vg/releases"))) + (properties `((release-monitoring-url . "https://github.com/vgteam/vg/releases") + (tunable? . #t))) (license (list license:expat ; main program -- cgit v1.2.3 From 5b36e14b798dbebd0c1aeec818d4b7991dc29839 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Fri, 22 Sep 2023 16:00:58 +0300 Subject: vg: Prepare for using a shared library. --- gn/packages/bioinformatics.scm | 49 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 4f802f7..700dec4 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2268,7 +2268,7 @@ in-memory footprint at the cost of packing and unpacking.") (substitute* "Makefile" ;; PKG_CONFIG_DEPS needs to be substituted to actually link to everything. (("cairo libzstd") - "cairo htslib libzstd libdw libelf protobuf raptor2 sdsl-lite tabixpp vcflib fastahack") + "cairo htslib libzstd libdw libelf protobuf raptor2 sdsl-lite tabixpp vcflib fastahack libdeflate") ;; Skip the part where we link static libraries special. It doesn't like the changes we make (("-Wl,-B.*") "\n") @@ -2285,6 +2285,10 @@ in-memory footprint at the cost of packing and unpacking.") (("\\$\\(LIB_DIR\\)/pkgconfig/htslib\\.pc") (string-append " " (assoc-ref inputs "htslib") "/lib/pkgconfig/htslib.pc")) + (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libdeflate\\.a") "$(LIB_DIR)/libdeflate.a") + ((" \\$\\(LIB_DIR\\)/libdeflate\\.a") + (string-append " " (assoc-ref inputs "libdeflate") "/lib/libdeflate.so")) + ((" \\$\\(LIB_DIR\\)/libvcflib.a") (string-append " " (assoc-ref inputs "vcflib") "/lib/libvcflib.so")) ((" \\$\\(BIN_DIR\\)/vcf2tsv") @@ -2309,8 +2313,12 @@ in-memory footprint at the cost of packing and unpacking.") ;((" \\$\\(LIB_DIR\\)/libsdsl.a") ; (string-append " " (assoc-ref inputs "sdsl-lite") "/lib/libsdsl.so")) + ((" \\$\\(LIB_DIR\\)/%divsufsort.a") + (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/%divsufsort.so")) ((" \\$\\(LIB_DIR\\)/libdivsufsort.a") (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/libdivsufsort.so")) + ((" \\$\\(LIB_DIR\\)/%divsufsort64.a") + (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/%divsufsort64.so")) ((" \\$\\(LIB_DIR\\)/libdivsufsort64.a") (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/libdivsufsort64.so")) @@ -2325,10 +2333,38 @@ in-memory footprint at the cost of packing and unpacking.") ((" \\$\\(LIB_DIR\\)/libraptor2.a") (string-append " " (assoc-ref inputs "raptor2") "/lib/libraptor2.so")) ((" \\$\\(BIN_DIR\\)/rapper") - (string-append " " (assoc-ref inputs "raptor2") "/bin/rapper"))) - ;; vcf2tsv shows up in a couple of other places - (substitute* "test/t/02_vg_construct.t" - (("../deps/vcflib/bin/vcf2tsv") (which "vcf2tsv"))))) + (string-append " " (assoc-ref inputs "raptor2") "/bin/rapper"))))) + #; + (add-before 'patch-source 'use-shared-libvg + (lambda* (#:key inputs outputs #:allow-other-keys) + (substitute* "Makefile" + (("libvg\\.a") "libvg.so") + ;; Have the linker find the shared library. + (("\\$\\(LIB_DIR\\)/libvg.\\$\\(SHARED_SUFFIX\\) \\$\\(LDFLAGS\\)") + "-lvg $(LDFLAGS)") + (("\\$\\(LDFLAGS\\) \\$\\(LIB_DIR\\)/libvg.so") + "$(LDFLAGS) -lvg")) + (setenv "LDFLAGS" (string-append "-Wl,-rpath=" + (assoc-ref outputs "out") "/lib")) + + ;; We need to tell a number of dependencies to build with -fPIC. + (substitute* "Makefile" + (("^CXXFLAGS := -O3") + (string-append "CFLAGS := -fPIC\n" + "CXXFLAGS := -O3 -fPIC")) + (("^export CXXFLAGS") + (string-append "export CFLAGS\n" + "$(info CFLAGS are $(CFLAGS))\n" + "export CXXFLAGS")) + ((" \\$\\(LIB_DIR\\)/libjemalloc.a") + (string-append " " (assoc-ref inputs "jemalloc") + "/lib/libjemalloc_pic.a"))) + ;; We don't want to pull in all the global CXXFLAGS here. + (substitute* "deps/sublinear-Li-Stephens/makefile" + (("^CXXFLAGS:=") "CXXFLAGS:= -fPIC ")) + ;; CMAKE_CXX_FLAGS aren't set globally. + (substitute* "deps/kff-cpp-api/CMakeLists.txt" + (("CMAKE_CXX_FLAGS \"") "CMAKE_CXX_FLAGS \" -fPIC ")))) (add-after 'unpack 'dont-build-shared-vgio (lambda _ ;; vg will link with libvgio and fail the 'validate-runpath phase. @@ -2378,7 +2414,7 @@ in-memory footprint at the cost of packing and unpacking.") (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (install-file "bin/vg" (string-append out "/bin")) - (install-file "lib/libvg.a" (string-append out "/lib")) + ;(install-file "lib/libvg.so" (string-append out "/lib")) (for-each (lambda (file) (install-file file (string-append out "/share/man/man1"))) @@ -2408,6 +2444,7 @@ in-memory footprint at the cost of packing and unpacking.") htslib jansson jemalloc + libdeflate libdivsufsort ncurses openmpi -- cgit v1.2.3 From 2ceeed2d0a9938327784b26a68f21ff533e9c634 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Fri, 22 Sep 2023 17:33:21 +0300 Subject: Add pggb. --- gn/packages/bioinformatics.scm | 129 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 700dec4..0a131f5 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -48,6 +48,7 @@ #:use-module (gnu packages fontutils) #:use-module (gnu packages gcc) #:use-module (gnu packages ghostscript) + #:use-module (gnu packages graph) #:use-module (gnu packages gtk) #:use-module (gnu packages guile) #:use-module (gnu packages image) @@ -61,6 +62,7 @@ #:use-module (gnu packages mpi) #:use-module (gnu packages ncurses) #:use-module (gnu packages ocaml) + #:use-module (gnu packages parallel) #:use-module (gnu packages perl) #:use-module (gnu packages pkg-config) #:use-module (gnu packages protobuf) @@ -2483,6 +2485,133 @@ multiple sequence alignment.") license:zlib ; deps/sonLib/externalTools/cutest license:boost1.0)))) ; catch.hpp +(define-public pggb + (let ((commit "9ebff27320382e470ed38a85b4448402e1e7c353") + (revision "1")) + (package + (name "pggb") + (version (git-version "0.5.1" revision commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pangenome/pggb") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0rgpj52q3ai7f1saqbilgx5gz4f403x3427wq649qwv84ivmi1sf")))) + (build-system copy-build-system) + (arguments + (list + #:install-plan + #~'(("pggb" "bin/") + ("partition-before-pggb" "bin/") + ("scripts/" "bin/") + ("scripts" "bin/scripts")) + #:phases + #~(modify-phases %standard-phases + (add-before 'install 'patch-binary-path + (lambda* (#:key inputs #:allow-other-keys) + (substitute* "scripts/vcf_preprocess.sh" + (("bcftools ") + (string-append (search-input-file inputs "/bin/bcftools") " "))) + (wrap-script "scripts/net2communities.py" + `("GUIX_PYTHONPATH" ":" prefix + (,(getenv "GUIX_PYTHONPATH")))))) + (add-after 'install 'wrap-scripts + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (for-each + (lambda (file) + (wrap-script file + `("PATH" ":" prefix + ,(map (lambda (input) (string-append input "/bin")) + '#$(map (lambda (label) (this-package-input label)) + (list "bcftools" + "bedtools" + "gfaffix" + "fastix" + "multiqc" + "mummer" + "odgi-hwcaps" + "pafplot" + "parallel" + "pigz" + "r-data-table" + "rtg-tools" + "samtools" + "seqwish" + "smoothxg" + ;"tabix" + "vcfbub" + "vcflib" + "vg" + "wfmash-hwcaps")))))) + (list (string-append out "/bin/pggb") + (string-append out "/bin/partition-before-pggb") + (string-append out "/bin/gfa2evaluation.sh") + (string-append out "/bin/scripts/gfa2evaluation.sh")))))) + (add-after 'install 'substitute-file-paths + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (substitute* (string-append out "/bin/gfa2evaluation.sh") + (("/usr/local/bin/vcf_preprocess.sh") + (string-append out "/bin/vcf_preprocess.sh")) + (("/usr/local/bin/nucmer2vcf.R") + (string-append out "/bin/nucmer2vcf.R"))))))))) + (inputs + (list bcftools + bedtools + gfaffix + guile-3.0 ; for wrap-script + fastix + multiqc + mummer + odgi-hwcaps + pafplot + parallel + pigz + python-igraph + python-pycairo + python-wrapper + r-data-table + rtg-tools + samtools + seqwish + smoothxg + ;tabix + vcfbub + vcflib + vg + wfmash-hwcaps)) + (home-page "https://doi.org/10.1101/2023.04.05.535718") + (synopsis "PanGenome Graph Builder") + (description "@command{pggb} builds +@url{https://doi.org/10.1146%2Fannurev-genom-120219-080406, pangenome} +@url{https://doi.org/10.1038/nbt.4227, variation graphs} from a set of input +sequences. +A pangenome variation graph is a kind of generic multiple sequence alignment. +It lets us understand any kind of sequence variation between a collection of +genomes. It shows us similarity where genomes walk through the same parts of +the graph, and differences where they do not. +@command{pggb} generates this kind of graph using an all-to-all alignment of +input sequences (@url{https://github.com/waveygang/wfmash, wfmash}), graph +induction (@url{https://doi.org/10.1101/2022.02.14.480413, seqwish}), and +progressive normalization (@url{https://github.com/pangenome/smoothxg, +smoothxg}, @url{https://github.com/marschall-lab/GFAffix, gfaffix}). After +construction, @command{pggb} generates diagnostic visualizations of the graph +(@url{https://doi.org/10.1093/bioinformatics/btac308, odgi}). A variant call +report (in VCF) representing both small and large variants can be generated +based on any reference genome included in the graph +(@url{https://github.com/vgteam/vg, vg}). @command{pggb} writes its output in +@url{https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md, GFAv1} format, +which can be used as input by numerous \"genome graph\" and pangenome tools, +such as the @url{https://github.com/vgteam/vg, vg} and +@url{https://doi.org/10.1093/bioinformatics/btac308, odgi} toolkits. +@command{pggb} has been tested at scale in the @acronym{Human Pangenome +Reference Consortium, HPRC} as a method to build a graph from the +@url{https://doi.org/10.1101/2022.07.09.499321, draft human pangenome}.") + (license license:expat)))) + (define-public ucsc-genome-browser (package (name "ucsc-genome-browser") -- cgit v1.2.3 From b50a05b1b907d8eb0728e7b21ddff3b645aaf53e Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Sun, 24 Sep 2023 08:46:51 +0300 Subject: pggb: Add htslib for tabix dependency. --- gn/packages/bioinformatics.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 0a131f5..969f11c 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2529,6 +2529,7 @@ multiple sequence alignment.") (list "bcftools" "bedtools" "gfaffix" + "htslib" "fastix" "multiqc" "mummer" @@ -2541,7 +2542,6 @@ multiple sequence alignment.") "samtools" "seqwish" "smoothxg" - ;"tabix" "vcfbub" "vcflib" "vg" @@ -2563,6 +2563,7 @@ multiple sequence alignment.") bedtools gfaffix guile-3.0 ; for wrap-script + htslib ; tabix fastix multiqc mummer @@ -2578,7 +2579,6 @@ multiple sequence alignment.") samtools seqwish smoothxg - ;tabix vcfbub vcflib vg -- cgit v1.2.3 From 08c989ee9a328ce5a26cc34a95d71a5c92cfa09e Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Sun, 24 Sep 2023 12:16:19 +0300 Subject: pggb: Remove python-pycairo --- gn/packages/bioinformatics.scm | 1 - 1 file changed, 1 deletion(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 969f11c..c4463a2 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2572,7 +2572,6 @@ multiple sequence alignment.") parallel pigz python-igraph - python-pycairo python-wrapper r-data-table rtg-tools -- cgit v1.2.3 From d73c21e18365433b78ae5f92d80919ed8f6caa04 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Sun, 24 Sep 2023 12:55:44 +0300 Subject: seqwish: Link with sdsl-lib --- gn/packages/bioinformatics.scm | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index c4463a2..a043358 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1637,6 +1637,19 @@ runApp(launch.browser=0, port=4208)~%\n" (else '()))) #:phases (modify-phases %standard-phases + (add-after 'unpack 'link-with-some-shared-libraries + (lambda* (#:key inputs #:allow-other-keys) + (substitute* '("CMakeLists.txt" + "deps/mmmulti/CMakeLists.txt") + (("\".*libsdsl\\.a\"") "\"-lsdsl\"") + (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") + (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") + (("\\$\\{sdsl-lite_INCLUDE\\}") + (string-append (assoc-ref inputs "sdsl-lite") + "/include/sdsl")) + (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") + (string-append (assoc-ref inputs "libdivsufsort") + "/include"))))) (replace 'check (lambda* (#:key tests? #:allow-other-keys) ;; Add seqwish to the PATH for the tests. @@ -1646,6 +1659,9 @@ runApp(launch.browser=0, port=4208)~%\n" (invoke "make")))))))) (inputs (list jemalloc + libdivsufsort + openmpi + sdsl-lite zlib)) (native-inputs (list perl)) -- cgit v1.2.3 From a295df1e9e233df66e4e70b55af01909c5cc9b06 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Sun, 24 Sep 2023 13:06:48 +0300 Subject: smoothxg: Link with sdsl-lite --- gn/packages/bioinformatics.scm | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index a043358..c833e6a 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1705,6 +1705,19 @@ dictionaries to record a queryable version of the graph.") #~(list (string-append "CC = " #$(cc-for-target))) #:phases #~(modify-phases %standard-phases + (add-after 'unpack 'link-with-some-shared-libraries + (lambda* (#:key inputs #:allow-other-keys) + (substitute* '("CMakeLists.txt" + "deps/mmmulti/CMakeLists.txt" + "deps/odgi/deps/mmmulti/CMakeLists.txt") + (("\".*libsdsl\\.a\"") "\"-lsdsl\"") + (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") + (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") + (("\\$\\{sdsl-lite_INCLUDE\\}") + (search-input-directory inputs "/include/sdsl")) + (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") + (dirname + (search-input-file inputs "/include/divsufsort.h")))))) (add-before 'build 'build-abPOA (lambda* (#:key make-flags #:allow-other-keys) ;; This helps with portability to other architectures. @@ -1715,9 +1728,11 @@ dictionaries to record a queryable version of the graph.") (apply invoke "make" "libabpoa" make-flags))))))) (inputs (list jemalloc + libdivsufsort openmpi pybind11 python + sdsl-lite zlib (list zstd "lib"))) (native-inputs -- cgit v1.2.3 From ac6c844f497f9e52887336a35542483053df6ce7 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Sun, 24 Sep 2023 13:50:14 +0300 Subject: smoothxg: Link with odgi --- gn/packages/bioinformatics.scm | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index c833e6a..c2a2e09 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1713,11 +1713,14 @@ dictionaries to record a queryable version of the graph.") (("\".*libsdsl\\.a\"") "\"-lsdsl\"") (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") + (("\".*libodgi\\.a\"") "\"-lodgi\"") (("\\$\\{sdsl-lite_INCLUDE\\}") (search-input-directory inputs "/include/sdsl")) (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") (dirname - (search-input-file inputs "/include/divsufsort.h")))))) + (search-input-file inputs "/include/divsufsort.h"))) + (("\\$\\{odgi_INCLUDE\\}") + (search-input-directory inputs "/include/odgi"))))) (add-before 'build 'build-abPOA (lambda* (#:key make-flags #:allow-other-keys) ;; This helps with portability to other architectures. @@ -1729,6 +1732,7 @@ dictionaries to record a queryable version of the graph.") (inputs (list jemalloc libdivsufsort + odgi openmpi pybind11 python -- cgit v1.2.3 From bc0bc49230c7bfbf2343301345da79a04bff43c9 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Sun, 24 Sep 2023 15:24:17 +0300 Subject: pggb: Provide version with glibc-hwcaps. --- gn/packages/bioinformatics.scm | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index c2a2e09..1fb8844 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -20,6 +20,7 @@ #:use-module (guix build-system waf) #:use-module (gnu packages) #:use-module (gn packages crates-io) + #:use-module (gn packages datastructures) #:use-module (gn packages java) #:use-module (gn packages ocaml) #:use-module (gn packages python) @@ -2602,7 +2603,7 @@ multiple sequence alignment.") fastix multiqc mummer - odgi-hwcaps + odgi pafplot parallel pigz @@ -2616,7 +2617,7 @@ multiple sequence alignment.") vcfbub vcflib vg - wfmash-hwcaps)) + wfmash)) (home-page "https://doi.org/10.1101/2023.04.05.535718") (synopsis "PanGenome Graph Builder") (description "@command{pggb} builds @@ -2646,6 +2647,19 @@ Reference Consortium, HPRC} as a method to build a graph from the @url{https://doi.org/10.1101/2022.07.09.499321, draft human pangenome}.") (license license:expat)))) +(define use-glibc-hwcaps + (package-input-rewriting/spec + ;; Replace some packages with ones built targeting custom packages build + ;; with glibc-hwcaps support. + `(("sdsl-lite" . ,(const sdsl-lite-hwcaps)) + ("odgi" . ,(const odgi-hwcaps)) + ("wfmash" . ,(const wfmash-hwcaps))))) + +(define-public pggb-with-hwcaps + (package + (inherit (use-glibc-hwcaps pggb)) + (name "pggb-with-hwcaps"))) + (define-public ucsc-genome-browser (package (name "ucsc-genome-browser") -- cgit v1.2.3 From 98eb79fc72ab4b6620eb13b7c021ce4975bc2a71 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Sun, 24 Sep 2023 15:52:41 +0300 Subject: pggb: Fix typo. --- gn/packages/bioinformatics.scm | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 1fb8844..89766bb 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2561,7 +2561,9 @@ multiple sequence alignment.") (wrap-script file `("PATH" ":" prefix ,(map (lambda (input) (string-append input "/bin")) - '#$(map (lambda (label) (this-package-input label)) + '#$(map (lambda (label) + (or (this-package-input (string-append label "-hwcaps")) + (this-package-input label))) (list "bcftools" "bedtools" "gfaffix" @@ -2569,7 +2571,7 @@ multiple sequence alignment.") "fastix" "multiqc" "mummer" - "odgi-hwcaps" + "odgi" "pafplot" "parallel" "pigz" @@ -2581,7 +2583,7 @@ multiple sequence alignment.") "vcfbub" "vcflib" "vg" - "wfmash-hwcaps")))))) + "wfmash")))))) (list (string-append out "/bin/pggb") (string-append out "/bin/partition-before-pggb") (string-append out "/bin/gfa2evaluation.sh") -- cgit v1.2.3 From 2dfb781a7f9292cc171e4063b0fd746244381566 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Sun, 24 Sep 2023 17:15:37 +0300 Subject: vg: Link with some shared libraries. --- gn/packages/bioinformatics.scm | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 89766bb..afffcd4 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2372,6 +2372,20 @@ in-memory footprint at the cost of packing and unpacking.") (string-append " " (assoc-ref inputs "raptor2") "/lib/libraptor2.so")) ((" \\$\\(BIN_DIR\\)/rapper") (string-append " " (assoc-ref inputs "raptor2") "/bin/rapper"))))) + (add-after 'unpack 'link-with-some-shared-libraries + (lambda* (#:key inputs #:allow-other-keys) + (substitute* '("deps/mmmultimap/CMakeLists.txt" + "deps/xg/CMakeLists.txt" + "deps/xg/deps/mmmulti/CMakeLists.txt") + (("\".*libsdsl\\.a\"") "\"-lsdsl\"") + (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") + (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") + (("\\$\\{sdsl-lite_INCLUDE\\}") + (string-append (assoc-ref inputs "sdsl-lite") + "/include/sdsl")) + (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") + (string-append (assoc-ref inputs "libdivsufsort") + "/include"))))) #; (add-before 'patch-source 'use-shared-libvg (lambda* (#:key inputs outputs #:allow-other-keys) -- cgit v1.2.3 From 3eb44711f16cd57ce625ce1011041390c7c270da Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Mon, 25 Sep 2023 22:03:12 +0300 Subject: seqwish: Make package tunable. --- gn/packages/bioinformatics.scm | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index afffcd4..285b01d 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1613,7 +1613,8 @@ runApp(launch.browser=0, port=4208)~%\n" (snippet #~(begin (use-modules (guix build utils)) - (substitute* '("deps/atomic_queue/Makefile" + (substitute* '("CMakeLists.txt" + "deps/atomic_queue/Makefile" "deps/mmmulti/deps/DYNAMIC/CMakeLists.txt" "deps/mmmulti/deps/atomic_queue/Makefile" "deps/mmmulti/deps/ips4o/CMakeLists.txt") @@ -1625,17 +1626,12 @@ runApp(launch.browser=0, port=4208)~%\n" (build-system cmake-build-system) (arguments `(#:configure-flags - '(,@(cond ((target-x86-64?) - ;; This seems to be about the minimum - '("-DEXTRA_FLAGS=-march=x86-64-v2")) - ((target-aarch64?) - '("-DEXTRA_FLAGS=-march=armv8-a")) - ((target-riscv64?) - '("-DEXTRA_FLAGS=-march=rv64imafdc")) - ((target-ppc64le?) - '("-DEXTRA_FLAGS=-mcpu=power8")) - ;; The default case is '-march=native' - (else '()))) + '(,@(if (target-x86?) + ;; This is the minimum needed to compile on x86_64, and is a + ;; subset of any other optimizations which might be applied. + '("-DCMAKE_C_FLAGS=-mcx16" + "-DCMAKE_CXX_FLAGS=-mcx16") + '())) #:phases (modify-phases %standard-phases (add-after 'unpack 'link-with-some-shared-libraries @@ -1678,6 +1674,7 @@ large inputs that are commonly encountered when working with large numbers of noisy input sequences. Memory usage during construction and traversal is limited by the use of sorted disk-backed arrays and succinct rank/select dictionaries to record a queryable version of the graph.") + (properties `((tunable? . #t))) (license license:expat))) (define-public smoothxg -- cgit v1.2.3 From 6ee7dfd989d93a083b13a0463e486b5c85ca1ac9 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 26 Sep 2023 10:37:16 +0300 Subject: seqwish: Build and link against shared library. --- gn/packages/bioinformatics.scm | 32 ++++--- seqwish-shared-library.diff | 195 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+), 11 deletions(-) create mode 100644 seqwish-shared-library.diff (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 285b01d..50c207d 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1609,7 +1609,8 @@ runApp(launch.browser=0, port=4208)~%\n" (file-name (git-file-name name version)) (sha256 (base32 "0xnv40kjlb610bk67n4xdqz5dfsjhrqld5bxzblji57k6bb4n66x")) - (patches (search-patches "seqwish-paryfor-riscv.diff")) + (patches (search-patches "seqwish-paryfor-riscv.diff" + "seqwish-shared-library.diff")) (snippet #~(begin (use-modules (guix build utils)) @@ -1626,14 +1627,24 @@ runApp(launch.browser=0, port=4208)~%\n" (build-system cmake-build-system) (arguments `(#:configure-flags - '(,@(if (target-x86?) - ;; This is the minimum needed to compile on x86_64, and is a - ;; subset of any other optimizations which might be applied. - '("-DCMAKE_C_FLAGS=-mcx16" - "-DCMAKE_CXX_FLAGS=-mcx16") - '())) + (cons* ,@(if (target-x86?) + ;; This is the minimum needed to compile on x86_64, and is a + ;; subset of any other optimizations which might be applied. + '("-DCMAKE_C_FLAGS=-mcx16" + "-DCMAKE_CXX_FLAGS=-mcx16") + '()) + '("-DSEQWISH_LINK_SHARED_LIBRARY=ON")) #:phases (modify-phases %standard-phases + (add-after 'unpack 'set-version + (lambda _ + ;; This stashes the build version in the executable. + (mkdir "include") + (substitute* "CMakeLists.txt" + (("^execute_process") "#execute_process")) + (with-output-to-file "include/seqwish_git_version.hpp" + (lambda () + (format #t "#define SEQWISH_GIT_VERSION \"~a\"~%" ,version))))) (add-after 'unpack 'link-with-some-shared-libraries (lambda* (#:key inputs #:allow-other-keys) (substitute* '("CMakeLists.txt" @@ -1642,11 +1653,10 @@ runApp(launch.browser=0, port=4208)~%\n" (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") (("\\$\\{sdsl-lite_INCLUDE\\}") - (string-append (assoc-ref inputs "sdsl-lite") - "/include/sdsl")) + (search-input-directory inputs "/include/sdsl")) (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") - (string-append (assoc-ref inputs "libdivsufsort") - "/include"))))) + (dirname + (search-input-file inputs "/include/divsufsort.h")))))) (replace 'check (lambda* (#:key tests? #:allow-other-keys) ;; Add seqwish to the PATH for the tests. diff --git a/seqwish-shared-library.diff b/seqwish-shared-library.diff new file mode 100644 index 0000000..be982de --- /dev/null +++ b/seqwish-shared-library.diff @@ -0,0 +1,195 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 09eafb5..a39f90f 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -7,6 +7,8 @@ project(seqwish) + # We build using c++14 + set(CMAKE_CXX_STANDARD 14) + ++include(GNUInstallDirs) ++ + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + +@@ -26,6 +28,8 @@ if(NOT DEFINED EXTRA_FLAGS) + "Extra compilation flags for C and CXX." FORCE) + endif() + ++set(SEQWISH_LINK_SHARED_LIBRARY OFF CACHE BOOL "Do not link against the libseqwish shared library") ++ + if (${CMAKE_BUILD_TYPE} MATCHES Release) + set(EXTRA_FLAGS "-Ofast ${EXTRA_FLAGS}") + set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") # reset CXX_FLAGS to replace -O3 with -Ofast +@@ -82,7 +86,7 @@ include(${CMAKE_ROOT}/Modules/ExternalProject.cmake) + # sdsl-lite (full build using its cmake config) + ExternalProject_Add(sdsl-lite + SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/sdsl-lite" +- CMAKE_ARGS "${CMAKE_ARGS};-DCMAKE_INSTALL_PREFIX=" ++ CMAKE_ARGS "${CMAKE_ARGS};-DCMAKE_CXX_FLAGS=-fPIC;-DCMAKE_C_FLAGS=-fPIC;-DCMAKE_INSTALL_PREFIX=" + UPDATE_COMMAND "" + INSTALL_COMMAND "") + ExternalProject_Get_property(sdsl-lite INSTALL_DIR) +@@ -203,27 +207,125 @@ set(mio_INCLUDE "${SOURCE_DIR}/include") + #set(CMAKE_BUILD_TYPE Debug) + set(CMAKE_BUILD_TYPE Release) + ++# set up our target libraries and specify its dependencies and includes ++add_library( libseqwish_static ++ ${CMAKE_SOURCE_DIR}/src/utils.cpp ++ ${CMAKE_SOURCE_DIR}/src/tempfile.cpp ++ ${CMAKE_SOURCE_DIR}/src/main.cpp ++ ${CMAKE_SOURCE_DIR}/src/seqindex.cpp ++ ${CMAKE_SOURCE_DIR}/src/paf.cpp ++ ${CMAKE_SOURCE_DIR}/src/sxs.cpp ++ ${CMAKE_SOURCE_DIR}/src/cigar.cpp ++ ${CMAKE_SOURCE_DIR}/src/alignments.cpp ++ ${CMAKE_SOURCE_DIR}/src/pos.cpp ++ ${CMAKE_SOURCE_DIR}/src/match.cpp ++ ${CMAKE_SOURCE_DIR}/src/transclosure.cpp ++ ${CMAKE_SOURCE_DIR}/src/links.cpp ++ ${CMAKE_SOURCE_DIR}/src/compact.cpp ++ ${CMAKE_SOURCE_DIR}/src/dna.cpp ++ ${CMAKE_SOURCE_DIR}/src/gfa.cpp ++ ${CMAKE_SOURCE_DIR}/src/vgp.cpp ++ ${CMAKE_SOURCE_DIR}/src/exists.cpp ++ ${CMAKE_SOURCE_DIR}/src/time.cpp ++ ${CMAKE_SOURCE_DIR}/src/mmap.cpp ++ ) ++add_dependencies(libseqwish_static tayweeargs) ++add_dependencies(libseqwish_static sdsl-lite) ++add_dependencies(libseqwish_static gzipreader) ++add_dependencies(libseqwish_static mmmulti) ++add_dependencies(libseqwish_static iitii) ++add_dependencies(libseqwish_static ips4o) ++add_dependencies(libseqwish_static bbhash) ++add_dependencies(libseqwish_static atomicbitvector) ++add_dependencies(libseqwish_static atomicqueue) ++add_dependencies(libseqwish_static ska) ++add_dependencies(libseqwish_static paryfor) ++add_dependencies(libseqwish_static mio) ++target_include_directories(libseqwish_static PUBLIC ++ "${sdsl-lite_INCLUDE}" ++ "${sdsl-lite-divsufsort_INCLUDE}" ++ "${tayweeargs_INCLUDE}" ++ "${gzipreader_INCLUDE}" ++ "${ips4o_INCLUDE}" ++ "${mmmulti_INCLUDE}" ++ "${iitii_INCLUDE}" ++ "${bbhash_INCLUDE}" ++ "${atomicbitvector_INCLUDE}" ++ "${atomicqueue_INCLUDE}" ++ "${ska_INCLUDE}" ++ "${paryfor_INCLUDE}" ++ "${mio_INCLUDE}") ++target_link_libraries(libseqwish_static ++ "${sdsl-lite_LIB}/libsdsl.a" ++ "${sdsl-lite-divsufsort_LIB}/libdivsufsort.a" ++ "${sdsl-lite-divsufsort_LIB}/libdivsufsort64.a" ++ "-latomic" ++ Threads::Threads ++ jemalloc ++ z) ++set_target_properties(libseqwish_static PROPERTIES OUTPUT_NAME "seqwish") ++ ++add_library( libseqwish SHARED ++ ${CMAKE_SOURCE_DIR}/src/utils.cpp ++ ${CMAKE_SOURCE_DIR}/src/tempfile.cpp ++ ${CMAKE_SOURCE_DIR}/src/main.cpp ++ ${CMAKE_SOURCE_DIR}/src/seqindex.cpp ++ ${CMAKE_SOURCE_DIR}/src/paf.cpp ++ ${CMAKE_SOURCE_DIR}/src/sxs.cpp ++ ${CMAKE_SOURCE_DIR}/src/cigar.cpp ++ ${CMAKE_SOURCE_DIR}/src/alignments.cpp ++ ${CMAKE_SOURCE_DIR}/src/pos.cpp ++ ${CMAKE_SOURCE_DIR}/src/match.cpp ++ ${CMAKE_SOURCE_DIR}/src/transclosure.cpp ++ ${CMAKE_SOURCE_DIR}/src/links.cpp ++ ${CMAKE_SOURCE_DIR}/src/compact.cpp ++ ${CMAKE_SOURCE_DIR}/src/dna.cpp ++ ${CMAKE_SOURCE_DIR}/src/gfa.cpp ++ ${CMAKE_SOURCE_DIR}/src/vgp.cpp ++ ${CMAKE_SOURCE_DIR}/src/exists.cpp ++ ${CMAKE_SOURCE_DIR}/src/time.cpp ++ ${CMAKE_SOURCE_DIR}/src/mmap.cpp ++ ${CMAKE_SOURCE_DIR}/src/version.cpp ++ ) ++add_dependencies(libseqwish tayweeargs) ++add_dependencies(libseqwish sdsl-lite) ++add_dependencies(libseqwish gzipreader) ++add_dependencies(libseqwish mmmulti) ++add_dependencies(libseqwish iitii) ++add_dependencies(libseqwish ips4o) ++add_dependencies(libseqwish bbhash) ++add_dependencies(libseqwish atomicbitvector) ++add_dependencies(libseqwish atomicqueue) ++add_dependencies(libseqwish ska) ++add_dependencies(libseqwish paryfor) ++add_dependencies(libseqwish mio) ++target_include_directories(libseqwish PUBLIC ++ "${sdsl-lite_INCLUDE}" ++ "${sdsl-lite-divsufsort_INCLUDE}" ++ "${tayweeargs_INCLUDE}" ++ "${gzipreader_INCLUDE}" ++ "${ips4o_INCLUDE}" ++ "${mmmulti_INCLUDE}" ++ "${iitii_INCLUDE}" ++ "${bbhash_INCLUDE}" ++ "${atomicbitvector_INCLUDE}" ++ "${atomicqueue_INCLUDE}" ++ "${ska_INCLUDE}" ++ "${paryfor_INCLUDE}" ++ "${mio_INCLUDE}") ++target_link_libraries(libseqwish ++ "${sdsl-lite_LIB}/libsdsl.a" ++ "${sdsl-lite-divsufsort_LIB}/libdivsufsort.a" ++ "${sdsl-lite-divsufsort_LIB}/libdivsufsort64.a" ++ "-latomic" ++ Threads::Threads ++ jemalloc ++ z) ++set_target_properties(libseqwish PROPERTIES OUTPUT_NAME "seqwish") ++ + # set up our target executable and specify its dependencies and includes + add_executable(seqwish +- ${CMAKE_SOURCE_DIR}/src/utils.cpp +- ${CMAKE_SOURCE_DIR}/src/tempfile.cpp + ${CMAKE_SOURCE_DIR}/src/main.cpp +- ${CMAKE_SOURCE_DIR}/src/seqindex.cpp +- ${CMAKE_SOURCE_DIR}/src/paf.cpp +- ${CMAKE_SOURCE_DIR}/src/sxs.cpp +- ${CMAKE_SOURCE_DIR}/src/cigar.cpp +- ${CMAKE_SOURCE_DIR}/src/alignments.cpp +- ${CMAKE_SOURCE_DIR}/src/pos.cpp +- ${CMAKE_SOURCE_DIR}/src/match.cpp +- ${CMAKE_SOURCE_DIR}/src/transclosure.cpp +- ${CMAKE_SOURCE_DIR}/src/links.cpp +- ${CMAKE_SOURCE_DIR}/src/compact.cpp +- ${CMAKE_SOURCE_DIR}/src/dna.cpp +- ${CMAKE_SOURCE_DIR}/src/gfa.cpp +- ${CMAKE_SOURCE_DIR}/src/vgp.cpp +- ${CMAKE_SOURCE_DIR}/src/exists.cpp +- ${CMAKE_SOURCE_DIR}/src/time.cpp +- ${CMAKE_SOURCE_DIR}/src/mmap.cpp + ${CMAKE_SOURCE_DIR}/src/version.cpp + ) + add_dependencies(seqwish tayweeargs) +@@ -252,6 +354,11 @@ target_include_directories(seqwish PUBLIC + "${ska_INCLUDE}" + "${paryfor_INCLUDE}" + "${mio_INCLUDE}") ++if( SEQWISH_LINK_SHARED_LIBRARY ) ++ target_link_libraries( seqwish libseqwish ) ++else() ++ target_link_libraries( seqwish libseqwish_static ) ++endif() + target_link_libraries(seqwish + "${sdsl-lite_LIB}/libsdsl.a" + "${sdsl-lite-divsufsort_LIB}/libdivsufsort.a" +@@ -269,4 +376,6 @@ endif() + file(MAKE_DIRECTORY ${CMAKE_SOURCE_DIR}/include) + execute_process(COMMAND bash ${CMAKE_SOURCE_DIR}/scripts/generate_git_version.sh ${CMAKE_SOURCE_DIR}/include) + +-install(TARGETS seqwish DESTINATION bin) ++install(TARGETS seqwish DESTINATION "${CMAKE_INSTALL_BIDIR}") ++install(TARGETS libseqwish LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") ++install(TARGETS libseqwish_static ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") -- cgit v1.2.3 From 65a7e2f8a9308ca171013c8533f41e3c8bdba71a Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 26 Sep 2023 11:36:44 +0300 Subject: seqwish: Provide glibc-hwcaps version --- gn/packages/bioinformatics.scm | 119 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 50c207d..15d8811 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -1687,6 +1687,124 @@ dictionaries to record a queryable version of the graph.") (properties `((tunable? . #t))) (license license:expat))) +(define-public seqwish-x86-64-v2 + (package/inherit seqwish + (name "seqwish-x86-64-v2") + (outputs '("out" "static")) + (arguments + (substitute-keyword-arguments (package-arguments seqwish) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v2")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))) + (add-after 'install 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v2/libseqwish.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) + +(define-public seqwish-x86-64-v3 + (package/inherit seqwish + (name "seqwish-x86-64-v3") + (outputs '("out" "static")) + (arguments + (substitute-keyword-arguments (package-arguments seqwish) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v3")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))) + (add-after 'install 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v3/libseqwish.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) + +(define-public seqwish-x86-64-v4 + (package/inherit seqwish + (name "seqwish-x86-64-v4") + (outputs '("out" "static")) + (arguments + (substitute-keyword-arguments (package-arguments seqwish) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v4")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))) + (add-after 'install 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v4/libseqwish.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) + +;; This copy of seqwish will automatically use the libraries that target the +;; x86_64 psABI which the hardware supports. +(define-public seqwish-hwcaps + (package/inherit seqwish + (name "seqwish-hwcaps") + (arguments + (substitute-keyword-arguments (package-arguments seqwish) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'install-optimized-libraries + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((hwcaps "/lib/glibc-hwcaps")) + (copy-recursively + (string-append (assoc-ref inputs "seqwish-x86-64-v2") + hwcaps "/x86-64-v2") + (string-append #$output hwcaps "/x86-64-v2")) + (copy-recursively + (string-append (assoc-ref inputs "seqwish-x86-64-v3") + hwcaps "/x86-64-v3") + (string-append #$output hwcaps "/x86-64-v3")) + (copy-recursively + (string-append (assoc-ref inputs "seqwish-x86-64-v4") + hwcaps "/x86-64-v4") + (string-append #$output hwcaps "/x86-64-v4"))))))))) + (native-inputs + (modify-inputs (package-native-inputs seqwish) + (append seqwish-x86-64-v2 + seqwish-x86-64-v3 + seqwish-x86-64-v4))) + (properties `((tunable? . #f))))) + (define-public smoothxg (package (name "smoothxg") @@ -2675,6 +2793,7 @@ Reference Consortium, HPRC} as a method to build a graph from the ;; Replace some packages with ones built targeting custom packages build ;; with glibc-hwcaps support. `(("sdsl-lite" . ,(const sdsl-lite-hwcaps)) + ("seqwish" . ,(const seqwish-hwcaps)) ("odgi" . ,(const odgi-hwcaps)) ("wfmash" . ,(const wfmash-hwcaps))))) -- cgit v1.2.3 From 01ebd8c995a6bbd45e427ae849cb7e08e84ab6b8 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 26 Sep 2023 12:46:26 +0300 Subject: vg: Update package definition --- gn/packages/bioinformatics.scm | 65 +++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 15d8811..9886e3f 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2438,65 +2438,69 @@ in-memory footprint at the cost of packing and unpacking.") (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libtabixpp\\.a") "$(LIB_DIR)/libtabixpp.a") ((" \\$\\(LIB_DIR\\)/libtabixpp\\.a") - (string-append " " (assoc-ref inputs "tabixpp") "/lib/libtabixpp.so")) + (string-append " " (search-input-file inputs "/lib/libtabixpp.so"))) (("\\$\\(LIB_DIR\\)/pkgconfig/tabixpp\\.pc") - (string-append " " (assoc-ref inputs "tabixpp") "/lib/pkgconfig/tabixpp.pc")) + (string-append " " (search-input-file inputs "/lib/pkgconfig/tabixpp.pc"))) (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libhts\\.a") "$(LIB_DIR)/libhts.a") ((" \\$\\(LIB_DIR\\)/libhts\\.a") - (string-append " " (assoc-ref inputs "htslib") "/lib/libhts.so")) + (string-append " " (search-input-file inputs "/lib/libhts.so"))) (("\\$\\(LIB_DIR\\)/pkgconfig/htslib\\.pc") - (string-append " " (assoc-ref inputs "htslib") "/lib/pkgconfig/htslib.pc")) + (string-append " " (search-input-file inputs "/lib/pkgconfig/htslib.pc"))) (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libdeflate\\.a") "$(LIB_DIR)/libdeflate.a") ((" \\$\\(LIB_DIR\\)/libdeflate\\.a") - (string-append " " (assoc-ref inputs "libdeflate") "/lib/libdeflate.so")) + (string-append " " (search-input-file inputs "/lib/libdeflate.so"))) ((" \\$\\(LIB_DIR\\)/libvcflib.a") - (string-append " " (assoc-ref inputs "vcflib") "/lib/libvcflib.so")) + (string-append " " (search-input-file inputs "/lib/libvcflib.so"))) ((" \\$\\(BIN_DIR\\)/vcf2tsv") - (string-append " " (assoc-ref inputs "vcflib") "/bin/vcf2tsv")) + (string-append " " (search-input-file inputs "/bin/vcf2tsv"))) ((" \\$\\(FASTAHACK_DIR\\)/fastahack") - (string-append " " (assoc-ref inputs "fastahack") "/bin/fastahack")) + (string-append " " (search-input-file inputs "/bin/fastahack"))) (("\\+= \\$\\(OBJ_DIR\\)/Fasta\\.o") - (string-append "+= " (assoc-ref inputs "fastahack") "/lib/libfastahack.so")) + (string-append "+= " (search-input-file inputs "/lib/libfastahack.so"))) ((" \\$\\(LIB_DIR\\)/libsnappy.a") - (string-append " " (assoc-ref inputs "snappy") "/lib/libsnappy.so")) + (string-append " " (search-input-file inputs "/lib/libsnappy.so"))) ;; Only link against the libraries in the elfutils package. (("-ldwfl -ldw -ldwelf -lelf -lebl") "-ldw -lelf") ((" \\$\\(LIB_DIR\\)/libelf.a") - (string-append " " (assoc-ref inputs "elfutils") "/lib/libelf.so")) + (string-append " " (search-input-file inputs "/lib/libelf.so"))) ((" \\$\\(LIB_DIR\\)/libdw.a") - (string-append " " (assoc-ref inputs "elfutils") "/lib/libdw.so")) + (string-append " " (search-input-file inputs "/lib/libdw.so"))) ;; We need the Make.helper file in SDSL_DIR for gcsa2 ;((" \\$\\(LIB_DIR\\)/libsdsl.a") - ; (string-append " " (assoc-ref inputs "sdsl-lite") "/lib/libsdsl.so")) + ; (string-append " " (search-input-file inputs "/lib/libsdsl.so"))) ((" \\$\\(LIB_DIR\\)/%divsufsort.a") - (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/%divsufsort.so")) + (string-append " " (dirname + (search-input-file inputs "/lib/libdivsufsort.so")) + "%divsufsort.so")) ((" \\$\\(LIB_DIR\\)/libdivsufsort.a") - (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/libdivsufsort.so")) + (string-append " " (search-input-file inputs "/lib/libdivsufsort.so"))) ((" \\$\\(LIB_DIR\\)/%divsufsort64.a") - (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/%divsufsort64.so")) + (string-append " " (dirname + (search-input-file inputs "/lib/libdivsufsort64.so")) + "%divsufsort64.so")) ((" \\$\\(LIB_DIR\\)/libdivsufsort64.a") - (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/libdivsufsort64.so")) + (string-append " " (search-input-file inputs "/lib/libdivsufsort64.so"))) ((" \\$\\(LIB_DIR\\)/libjemalloc.a") - (string-append " " (assoc-ref inputs "jemalloc") "/lib/libjemalloc.a")) + (string-append " " (search-input-file inputs "/lib/libjemalloc.a"))) ((" \\$\\(INC_DIR\\)/sparsehash") - (string-append " " (assoc-ref inputs "sparsehash") "/include/sparsehash")) + (string-append " " (search-input-directory inputs "/include/sparsehash"))) ((" \\$\\(INC_DIR\\)/raptor2") - (string-append " " (assoc-ref inputs "raptor2") "/include/raptor2")) + (string-append " " (search-input-directory inputs "/include/raptor2"))) ((" \\$\\(LIB_DIR\\)/libraptor2.a") - (string-append " " (assoc-ref inputs "raptor2") "/lib/libraptor2.so")) + (string-append " " (search-input-file inputs "/lib/libraptor2.so"))) ((" \\$\\(BIN_DIR\\)/rapper") - (string-append " " (assoc-ref inputs "raptor2") "/bin/rapper"))))) + (string-append " " (search-input-file inputs "/bin/rapper")))))) (add-after 'unpack 'link-with-some-shared-libraries (lambda* (#:key inputs #:allow-other-keys) (substitute* '("deps/mmmultimap/CMakeLists.txt" @@ -2506,11 +2510,10 @@ in-memory footprint at the cost of packing and unpacking.") (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") (("\\$\\{sdsl-lite_INCLUDE\\}") - (string-append (assoc-ref inputs "sdsl-lite") - "/include/sdsl")) + (search-input-directory inputs "/include/sdsl")) (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") - (string-append (assoc-ref inputs "libdivsufsort") - "/include"))))) + (dirname + (search-input-file inputs "/include/divsufsort.h")))))) #; (add-before 'patch-source 'use-shared-libvg (lambda* (#:key inputs outputs #:allow-other-keys) @@ -2568,14 +2571,18 @@ in-memory footprint at the cost of packing and unpacking.") (substitute* "test/t/02_vg_construct.t" (("../deps/fastahack/fastahack") (which "fastahack")) (("../bin/vcf2tsv") (which "vcf2tsv"))) - ;; Lets skip the 7 failing tests for now. They fail with our + ;; Lets skip the 9 failing tests for now. They fail with our ;; bash-tap and the bundled one. (substitute* "test/t/02_vg_construct.t" - ((".*self-inconsistent.*") "is $(true) \"\" \"\"\n") - ((".*the graph contains.*") "is $(true) \"\" \"\"\n")) + ((".*self-inconsistent.*") "is $(true) \"\" \"\"\n")) + (substitute* "test/t/07_vg_map.t" + ;; Change in fasta's output + (("identity\\) 1 \"") "identity) 1.0 \"")) (substitute* '("test/t/07_vg_map.t" "test/t/33_vg_mpmap.t") ((".*node id.*") "is $(true) \"\" \"\"\n")) + (substitute* "test/t/48_vg_convert.t" + (("true \"vg.*") "true \"true\"\n")) (substitute* "test/t/50_vg_giraffe.t" ((".*A long read can.*") "is $(true) \"\" \"\"\n") ((".*A long read has.*") "is $(true) \"\" \"\"\n") -- cgit v1.2.3 From a848475c6bf6321dde971528f7c8d0c4d6e9e0d5 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 26 Sep 2023 13:54:28 +0300 Subject: gsl: Provide glibc-hwcaps version. --- gn/packages/bioinformatics.scm | 4 +- gn/packages/maths.scm | 108 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 9886e3f..95dda00 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -22,6 +22,7 @@ #:use-module (gn packages crates-io) #:use-module (gn packages datastructures) #:use-module (gn packages java) + #:use-module (gn packages maths) #:use-module (gn packages ocaml) #:use-module (gn packages python) #:use-module (gnu packages algebra) @@ -2799,7 +2800,8 @@ Reference Consortium, HPRC} as a method to build a graph from the (package-input-rewriting/spec ;; Replace some packages with ones built targeting custom packages build ;; with glibc-hwcaps support. - `(("sdsl-lite" . ,(const sdsl-lite-hwcaps)) + `(;("gsl" . ,(const gsl-hwcaps)) ; Causes too many rebuilds through multiqc + ("sdsl-lite" . ,(const sdsl-lite-hwcaps)) ("seqwish" . ,(const seqwish-hwcaps)) ("odgi" . ,(const odgi-hwcaps)) ("wfmash" . ,(const wfmash-hwcaps))))) diff --git a/gn/packages/maths.scm b/gn/packages/maths.scm index 1c1c1ae..7dfb896 100644 --- a/gn/packages/maths.scm +++ b/gn/packages/maths.scm @@ -3,6 +3,7 @@ #:use-module (guix packages) #:use-module (guix download) #:use-module (guix utils) + #:use-module (guix gexp) #:use-module (gnu packages gcc) #:use-module (gnu packages gperf) #:use-module (gnu packages maths) @@ -131,3 +132,110 @@ (inputs `(,@(fold alist-delete (package-inputs suitesparse) '("metis")))))) + +(define-public gsl-x86-64-v2 + (package/inherit gsl + (name "gsl-x86-64-v2") + (arguments + (substitute-keyword-arguments (package-arguments gsl) + ((#:make-flags flags #~'()) + #~(append (list "CFLAGS=-march=x86-64-v2" + "CXXFLAGS=-march=x86-64-v2") + #$flags)) + ((#:configure-flags flags #~'()) + #~(append (list (string-append "--libdir=" #$output + "/lib/glibc-hwcaps/x86-64-v2")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) + +(define-public gsl-x86-64-v3 + (package/inherit gsl + (name "gsl-x86-64-v3") + (arguments + (substitute-keyword-arguments (package-arguments gsl) + ((#:make-flags flags #~'()) + #~(append (list "CFLAGS=-march=x86-64-v3" + "CXXFLAGS=-march=x86-64-v3") + #$flags)) + ((#:configure-flags flags #~'()) + #~(append (list (string-append "--libdir=" #$output + "/lib/glibc-hwcaps/x86-64-v3")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) + +(define-public gsl-x86-64-v4 + (package/inherit gsl + (name "gsl-x86-64-v4") + (outputs '("out" "static")) + (arguments + (substitute-keyword-arguments (package-arguments gsl) + ((#:make-flags flags #~'()) + #~(append (list "CFLAGS=-march=x86-64-v4" + "CXXFLAGS=-march=x86-64-v4") + #$flags)) + ((#:configure-flags flags #~'()) + #~(append (list (string-append "--libdir=" #$output + "/lib/glibc-hwcaps/x86-64-v4")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) + +;; This copy of gsl will automatically use the libraries that target the +;; x86_64 psABI which the hardware supports. +(define-public gsl-hwcaps + (package/inherit gsl + (name "gsl-hwcaps") + (arguments + (substitute-keyword-arguments (package-arguments gsl) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'install-optimized-libraries + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((hwcaps "/lib/glibc-hwcaps")) + (copy-recursively + (string-append (assoc-ref inputs "gsl-x86-64-v2") + hwcaps "/x86-64-v2") + (string-append #$output hwcaps "/x86-64-v2")) + (copy-recursively + (string-append (assoc-ref inputs "gsl-x86-64-v3") + hwcaps "/x86-64-v3") + (string-append #$output hwcaps "/x86-64-v3")) + (copy-recursively + (string-append (assoc-ref inputs "gsl-x86-64-v4") + hwcaps "/x86-64-v4") + (string-append #$output hwcaps "/x86-64-v4"))))))))) + (native-inputs + (modify-inputs (package-native-inputs gsl) + (append gsl-x86-64-v2 + gsl-x86-64-v3 + gsl-x86-64-v4))) + (properties `((tunable? . #f))))) -- cgit v1.2.3 From 0d8ed9f6c2ca7804a73028c947db0a2a6a61e7cb Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 26 Sep 2023 16:18:12 +0300 Subject: pggb: wrap with more packages --- gn/packages/bioinformatics.scm | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 95dda00..535018b 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2692,7 +2692,11 @@ multiple sequence alignment.") ("scripts" "bin/scripts")) #:phases #~(modify-phases %standard-phases - (add-before 'install 'patch-binary-path + (add-after 'unpack 'force-python3 + (lambda _ + (substitute* (find-files "scripts" "\\.py$") + (("/usr/bin/python") "/usr/bin/python3")))) + (add-before 'install 'patch-and-wrap-scripts (lambda* (#:key inputs #:allow-other-keys) (substitute* "scripts/vcf_preprocess.sh" (("bcftools ") @@ -2706,6 +2710,8 @@ multiple sequence alignment.") (for-each (lambda (file) (wrap-script file + `("R_LIBS_SITE" ":" prefix + (,(getenv "R_LIBS_SITE"))) `("PATH" ":" prefix ,(map (lambda (input) (string-append input "/bin")) '#$(map (lambda (label) @@ -2722,7 +2728,9 @@ multiple sequence alignment.") "pafplot" "parallel" "pigz" + "python" "r-data-table" + "r-minimal" "rtg-tools" "samtools" "seqwish" @@ -2756,9 +2764,10 @@ multiple sequence alignment.") pafplot parallel pigz + python python-igraph - python-wrapper r-data-table + r-minimal rtg-tools samtools seqwish -- cgit v1.2.3 From 29633fd3a8bafce405bba487802cd3720e642d11 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 26 Sep 2023 17:11:58 +0300 Subject: pggb: Add missing inputs --- gn/packages/bioinformatics.scm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 535018b..9de6ad8 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2717,7 +2717,8 @@ multiple sequence alignment.") '#$(map (lambda (label) (or (this-package-input (string-append label "-hwcaps")) (this-package-input label))) - (list "bcftools" + (list "bc" + "bcftools" "bedtools" "gfaffix" "htslib" @@ -2735,6 +2736,7 @@ multiple sequence alignment.") "samtools" "seqwish" "smoothxg" + "time" "vcfbub" "vcflib" "vg" @@ -2752,7 +2754,8 @@ multiple sequence alignment.") (("/usr/local/bin/nucmer2vcf.R") (string-append out "/bin/nucmer2vcf.R"))))))))) (inputs - (list bcftools + (list bc + bcftools bedtools gfaffix guile-3.0 ; for wrap-script @@ -2772,6 +2775,7 @@ multiple sequence alignment.") samtools seqwish smoothxg + time vcfbub vcflib vg -- cgit v1.2.3 From dab19f78848da9c176238cc1eec486964a54dfca Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Thu, 19 Oct 2023 13:32:19 +0100 Subject: Remove bh20-seq-resource. The package is broken, some of its inputs have been removed from upstream Guix, and we no longer need this package. * gn/packages/bioinformatics.scm (bh20-seq-resource): Delete variable. * gn/services/bh20-seq-resource-container.scm: Delete file. --- gn/packages/bioinformatics.scm | 73 ------------------ gn/services/bh20-seq-resource-container.scm | 110 ---------------------------- 2 files changed, 183 deletions(-) delete mode 100644 gn/services/bh20-seq-resource-container.scm (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 9de6ad8..2832ea6 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -2096,79 +2096,6 @@ reads, also called read-based phasing or haplotype assembly. It is especially suitable for long reads, but works also well with short reads.") (license license:expat))) -(define-public bh20-seq-resource - (let ((commit "2ae71911cd87ce4f2eabdff21e538267b3270d45") - (revision "4")) - (package - (name "bh20-seq-resource") - (version (git-version "1.0" revision commit)) - (source (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/pubseq/bh20-seq-resource") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 "1k6cc88hrcm77jwpdk2084q0zirv2vlbz3c07nmpbhk1lhqk5x0n")) - (modules '((guix build utils))) - (snippet - '(begin - (delete-file "gittaggers.py"))))) - (build-system python-build-system) - (arguments - (list - #:tests? #f ; Tests can't find pytest - #:phases - #~(modify-phases %standard-phases - (add-after 'unpack 'patch-program-calls - (lambda* (#:key inputs #:allow-other-keys) - (substitute* "bh20sequploader/qc_fasta.py" - (("\"minimap2\"") - (string-append "\"" (search-input-file - inputs "/bin/minimap2") - "\"")))))))) - (propagated-inputs - (list python-arvados-python-client - python-schema-salad - python-magic - python-pyshex - python-pyshexc-0.7 - python-py-dateutil - - ;; for the web - python-flask - python-pyyaml - python-redis - - ;; and for the service - python - gunicorn)) - (inputs - (list minimap2)) - (native-inputs - (list python-pytest-4 ; < 6 - python-pytest-runner-4)) ; < 5 - (home-page "https://github.com/pubseq/bh20-seq-resource") - (synopsis - "Tool to upload SARS-CoV-19 sequences and service to kick off analysis") - (description "This repository provides a sequence uploader for the -COVID-19 Virtual Biohackathon's Public Sequence Resource project. You can use -it to upload the genomes of SARS-CoV-2 samples to make them publicly and freely -available to other researchers.") - (license license:asl2.0)))) - -;; This version has no profile collisions. -(define-public bh20-seq-resource-for-service - (package - ;(inherit (fix-profile-collisions-for-bh20 bh20-seq-resource)) - (inherit - ((package-input-rewriting/spec - `(("python-google-api-core" . ,(const python-google-api-core-1)) - ("python-google-auth" . ,(const python-google-auth-1)) - ("python-pyparsing" . ,(const python-pyparsing-2.4.7)))) - bh20-seq-resource)) - (properties `((hidden? . #t))))) - (define-public python-scanpy-git (let ((commit "590d42309f9ed6550d7b887039990edfc1ac7648") ; April 22, 2020 (revision "1")) diff --git a/gn/services/bh20-seq-resource-container.scm b/gn/services/bh20-seq-resource-container.scm deleted file mode 100644 index e0eccf7..0000000 --- a/gn/services/bh20-seq-resource-container.scm +++ /dev/null @@ -1,110 +0,0 @@ -(define-module (gn services bh20-seq-resource-container)) - -(use-modules (gnu) - (gn packages bioinformatics) - (guix modules) - ((guix packages) #:select (package-source)) - (guix records) - (guix build-system python) ; for python-version from guix/build/python-build-system.scm - (ice-9 match)) -(use-service-modules shepherd) -(use-package-modules compression python python-web) - -(define-record-type* - covid19-pubseq-configuration - make-covid19-pubseq-configuration - covid19-pubseq-configuration? - (package covid19-pubseq-configuration-package ; package - (default bh20-seq-resource-for-service)) - (deploy-directory covid19-pubseq-deploy-directory ; string - (default "/srv/http")) - (port covid19-pubseq-configuration-port ; string - (default "5000"))) - -(define covid19-pubseq-activation-service - (match-lambda - (($ package deploy-directory port) - #~(begin - (let ((pkg-src #$(package-source package))) - (when (file-exists? #$deploy-directory) - (delete-file-recursively (mkdir-p #$deploy-directory))) - (mkdir-p #$deploy-directory) - (if (file-is-directory? pkg-src) - (copy-recursively pkg-src #$deploy-directory) - (begin - (copy-file pkg-src #$(string-append deploy-directory - "/src.tar.xz")) - (with-directory-excursion #$deploy-directory - (invoke #$(file-append xz "/bin/xz") "-d" "src.tar.xz") - (invoke #$(file-append tar "/bin/tar") "xvf" "src.tar" - "--strip-components=1"))))))))) - -(define covid19-pubseq-shepherd-service - (match-lambda - (($ package deploy-directory port) - (with-imported-modules (source-module-closure - '((gnu build shepherd) - (gnu system file-systems))) - (list (shepherd-service - (provision '(covid19-pubseq)) - (requirement '(networking)) - (modules '((gnu build shepherd) - (gnu system file-systems))) - ;(start #~(make-forkexec-constructor/container - (start #~(make-forkexec-constructor - (list - #$(file-append gunicorn "/bin/gunicorn") - (string-append "-blocalhost:" #$port) - "bh20simplewebuploader.main:app") - #:directory #$deploy-directory - #:log-file "/var/log/covid19-pubseq.log" - #:environment-variables - ;(let (pyversion (python-version (@ (gnu packages python) python))) - '("TMPDIR=/export/tmp" - "PYTHONPATH=/run/current-system/profile/lib/python3.9/site-packages") - ; (string-append "PYTHONPATH=/run/current-system/profile/lib/python" pyversion "/site-packages"))) - ;#:mappings - ;(list (file-system-mapping - ; (source "/export/tmp") - ; (target source) - ; (writable? #t)) - ; (file-system-mapping - ; ;; TODO: Don't hardcode python version! - ; (source "/run/current-system/profile/lib/python3.9/site-packages") - ; (target source))) - )) - (stop #~(make-kill-destructor)))))))) - -(define covid19-pubseq-service-type - (service-type - (name 'covid19-pubseq) - (extensions - (list - (service-extension shepherd-root-service-type - covid19-pubseq-shepherd-service) - (service-extension activation-service-type - covid19-pubseq-activation-service) - (service-extension profile-service-type - (compose list - covid19-pubseq-configuration-package)))) - (default-value (covid19-pubseq-configuration)) - (description - "Run a COVID-19 PubSeq: Public SARS-CoV-2 Sequence Resource Webserver."))) - -(operating-system - (host-name "covid19-pubseq") - (timezone "Etc/UTC") - (locale "en_US.utf8") - - (bootloader (bootloader-configuration - (bootloader grub-bootloader) - (targets '("does-not-matter")))) - (file-systems %base-file-systems) - ;; No firmware for VMs. - (firmware '()) - ;; We don't need any packages inside the container. - (packages '()) - - (services (list (service covid19-pubseq-service-type)))) - -;; guix system container /home/shepherd/guix-bioinformatics/gn/services/bh20-seq-resource-container.scm --share=/export/tmp=/export/tmp --network -- cgit v1.2.3 From 73f615b37829354a90a015caf8880e1c49503ddd Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 23 Feb 2024 21:27:09 +0000 Subject: gn: Add r-rrbgen. * gn/packages/bioinformatics.scm (r-rrbgen): New variable. --- gn/packages/bioinformatics.scm | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 2832ea6..c3e5020 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -4460,3 +4460,28 @@ automatically vectorize for different architectures without adapting the code.") (substitute-keyword-arguments (package-arguments wfa2-lib) ((#:make-flags flags ''()) #~(cons "CC_FLAGS+=-static" #$flags)))))) + +(define-public r-rrbgen + (package + (name "r-stitch") + (version "0.0.6") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/rwdavies/rrbgen/releases/download/" + version "/rrbgen_" version ".tar.gz")) + (sha256 + (base32 + "1vhqy8licl2pkzar4aag0q5fhnb3fdch8acyjh9445ia42z01z9c")))) + (build-system r-build-system) + (propagated-inputs + (list r-rcpp + r-rcpparmadillo)) + (home-page "https://github.com/rwdavies/rrbgen") + (synopsis "Lightweight limited functionality R bgen read/write library") + (description "@code{r-rrbgen} supports v1.3 of the bgen format. It supports reading +and writing using 8, 16, 24 or 32 bits per probability, using Layout = +2 and CompressedSNPBlocks = 1, for bi-allelic SNPs with samples of +ploidy 2. Any other format specification may crash unexpectedly +without a properly defined error.") + (license license:gpl3))) -- cgit v1.2.3 From 8de10edc74100b49589949778c5bd84c72e83deb Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 23 Feb 2024 21:28:00 +0000 Subject: gn: Add seqlib. * gn/packages/bioinformatics.scm (seqlib): New variable. --- gn/packages/bioinformatics.scm | 65 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index c3e5020..b0590ca 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -4485,3 +4485,68 @@ and writing using 8, 16, 24 or 32 bits per probability, using Layout = ploidy 2. Any other format specification may crash unexpectedly without a properly defined error.") (license license:gpl3))) + +(define-public seqlib + (package + (name "seqlib") + (version "0.1.4") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/Zilong-Li/SeqLib") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1hczg1swghnxm6af74l09crdgf7l282jabmyck9mi5bk6vg9s1pn")))) + (build-system gnu-build-system) + (arguments + (list #:phases + #~(modify-phases %standard-phases + ;; Patch build scripts to unbundle htslib and build a + ;; seqlib shared library using libtool. + (add-after 'unpack 'patch-build-scripts + (lambda _ + ;; Initialize libtool. + (substitute* "configure.ac" + (("AM_INIT_AUTOMAKE\\(foreign\\)\n" all) + (string-append all "LT_INIT\n"))) + (substitute* "Makefile.am" + ;; Install headers + (("^SUBDIRS" all) + (string-append "nobase_include_HEADERS = " + (string-join (find-files "SeqLib")) + "\n" all)) + ;; Do not recurse into htslib submodule. + (("htslib") "") + ;; Remove install target override. + (("^install:") "") + (("^\tmkdir -p lib && cp src/libseqlib.a /libhts.a lib") "")) + (substitute* "src/Makefile.am" + ;; Build libtool library. + (("noinst_LIBRARIES = libseqlib\\.a") + "lib_LTLIBRARIES = libseqlib.la\nlibseqlib_la_LIBADD = -ljsoncpp") + (("libseqlib\\.a") "libseqlib.la") + (("libseqlib_a") "libseqlib_la")) + (substitute* (list "SeqLib/BamHeader.h" + "SeqLib/BamRecord.h" + "SeqLib/RefGenome.h" + "src/ReadFilter.cpp") + ;; Patch path to htslib headers. + (("\"htslib/htslib/([^\"]*)\"" all header) + (string-append "")))))))) + (inputs + (list zlib)) + (native-inputs + (list autoconf automake libtool)) + ;; seqlib headers include headers from htslib and jsoncpp. So, + ;; they are propagated inputs. + (propagated-inputs + (list htslib jsoncpp)) + (home-page "https://github.com/Zilong-Li/SeqLib") + (synopsis "C++ htslib interface for manipulating sequence data and VCF") + (description "@code{seqlib} is a C++ htslib interface for manipulating sequence data +and VCF files.") + (license (list license:expat ; SeqLib/IntervalTree.h, SeqLib/aho_corasick.hpp, + ; json/json-forwards.h, json/json.h, src/jsoncpp.cpp, src/ssw.c, + license:asl2.0)))) ; main license -- cgit v1.2.3 From 9f4c479e6d21e7bb2d3590cbb2df5906060d2781 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 23 Feb 2024 21:28:12 +0000 Subject: gn: Add vcfpp. * gn/packages/bioinformatics.scm (vcfpp): New variable. --- gn/packages/bioinformatics.scm | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index b0590ca..8141ada 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -4550,3 +4550,24 @@ and VCF files.") (license (list license:expat ; SeqLib/IntervalTree.h, SeqLib/aho_corasick.hpp, ; json/json-forwards.h, json/json.h, src/jsoncpp.cpp, src/ssw.c, license:asl2.0)))) ; main license + +(define-public vcfpp + (package + (name "vcfpp") + (version "0.3.3") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/Zilong-Li/vcfpp/releases/download/v" + version "/vcfpp.h")) + (sha256 + (base32 + "1wq76wz81y09ic37z30vljqnczhwx2qijav0nfvg6xi8wd2c75n3")))) + (build-system copy-build-system) + (arguments + (list #:install-plan #~'(("vcfpp.h" "include/vcfpp/vcfpp.h")))) + (home-page "https://github.com/Zilong-Li/vcfpp") + (synopsis "C++ API of htslib") + (description "@code{vcfpp} is a single C++ file as +interface to the basic htslib. It can be easily included in a C++ +program for scripting high-performance genomic analyses.") + (license license:asl2.0))) -- cgit v1.2.3 From e1888bca8136a05438fcf33bc995531514e6cde0 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 23 Feb 2024 21:28:22 +0000 Subject: gn: Add r-stitch. * gn/packages/bioinformatics.scm (r-stitch): New variable. --- gn/packages/bioinformatics.scm | 48 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 8141ada..b5fac4d 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -4571,3 +4571,51 @@ and VCF files.") interface to the basic htslib. It can be easily included in a C++ program for scripting high-performance genomic analyses.") (license license:asl2.0))) + +(define-public r-stitch + (package + (name "r-stitch") + (version "1.6.10") + (source + ;; The release tarball bundles dependencies. So, use git-fetch. + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/rwdavies/STITCH") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0iy5fq2l5a35xdxqaf9ypj56da57qmwppwqmh9nflbvmbc7kgbkf")))) + (build-system r-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'chdir + (lambda _ + (chdir "STITCH"))) + (add-after 'chdir 'patch-build-system + (lambda _ + (substitute* "src/Makevars" + (("\\$\\(SEQLIB_ROOT\\)/src/libseqlib.a") "-lseqlib") + (("\\$\\(SEQLIB_ROOT\\)/htslib/libhts.a") "-lhts") + ((": SeqLib") ":"))))))) + (inputs + (list curl htslib seqlib zlib)) + (native-inputs + (list autoconf automake vcfpp)) + (propagated-inputs + (list r-data-table r-rrbgen + ;; FIXME: These should be inputs that are substituted into + ;; the source. But, for some reason, the reference scanner + ;; does not pick them up that way. + coreutils findutils htslib rsync)) + (home-page "https://github.com/rwdavies/STITCH") + (synopsis "Sequencing to imputation through constructing haplotypes") + (description "@code{r-stitch} is an R program for reference panel free, +read aware, low coverage sequencing genotype imputation. STITCH runs +on a set of samples with sequencing reads in BAM format, as well as a +list of positions to genotype, and outputs imputed genotypes in VCF +format.") + (license license:gpl3))) -- cgit v1.2.3 From d3b01e82a43507f7388e6b0397c324ebdf44bcf7 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 23 Feb 2024 21:32:28 +0000 Subject: gn: bioinformatics: Import (guix build-system r). --- gn/packages/bioinformatics.scm | 1 + 1 file changed, 1 insertion(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index b5fac4d..82caab9 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -16,6 +16,7 @@ #:use-module (guix build-system meson) #:use-module (guix build-system ocaml) #:use-module (guix build-system python) + #:use-module (guix build-system r) #:use-module (guix build-system trivial) #:use-module (guix build-system waf) #:use-module (gnu packages) -- cgit v1.2.3 From 931c99dd43bd155013606cac563d203026d8d443 Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Fri, 19 Apr 2024 17:16:33 +0100 Subject: gn: Add hifiasm. * gn/packages/bioinformatics.scm (hifiasm): New variable. --- gn/packages/bioinformatics.scm | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'gn/packages/bioinformatics.scm') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 82caab9..ed6d4d4 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -4620,3 +4620,41 @@ on a set of samples with sequencing reads in BAM format, as well as a list of positions to genotype, and outputs imputed genotypes in VCF format.") (license license:gpl3))) + +(define-public hifiasm + (package + (name "hifiasm") + (version "0.19.8") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/chhylp123/hifiasm") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1g6m2qdc0224vjaic87669g7y9ky1yps07qbjkmbh1vakz4zmgvr")))) + (build-system gnu-build-system) + (arguments + (list #:tests? #f + #:phases + #~(modify-phases %standard-phases + (delete 'configure) + (replace 'install + (lambda _ + (install-file "hifiasm" (string-append #$output "/bin")) + (install-file "hifiasm.1" (string-append #$output "/share/man/man1"))))))) + (inputs + (list zlib)) + (home-page "https://github.com/chhylp123/hifiasm") + (synopsis "haplotype-resolved assembler for accurate Hifi reads") + (description "Hifiasm is a fast haplotype-resolved de-novo assembler originally +designed for PacBio HiFi reads. Its latest release supports the +telomere-to-telomere assembly by utilizing ultralong Oxford Nanopore +reads. Hifiasm produces arguably the best single-sample +telomere-to-telomere assemblies combing HiFi, ultralong and Hi-C +reads, and it is one of the best haplotype-resolved assemblers for the +trio-binning assembly given parental short reads. For a human genome, +hifiasm can produce the telomere-to-telomere assembly in one day.") + (license license:expat))) -- cgit v1.2.3