diff options
Diffstat (limited to 'gn/packages/bioinformatics.scm')
-rw-r--r-- | gn/packages/bioinformatics.scm | 1576 |
1 files changed, 1149 insertions, 427 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index db420b7..ed6d4d4 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -16,11 +16,14 @@ #:use-module (guix build-system meson) #:use-module (guix build-system ocaml) #:use-module (guix build-system python) + #:use-module (guix build-system r) #:use-module (guix build-system trivial) #:use-module (guix build-system waf) #:use-module (gnu packages) #:use-module (gn packages crates-io) + #:use-module (gn packages datastructures) #:use-module (gn packages java) + #:use-module (gn packages maths) #:use-module (gn packages ocaml) #:use-module (gn packages python) #:use-module (gnu packages algebra) @@ -39,6 +42,7 @@ #:use-module (gnu packages cpp) #:use-module (gnu packages cran) #:use-module (gnu packages crates-io) + #:use-module (gnu packages crates-graphics) #:use-module (gnu packages curl) #:use-module (gnu packages databases) #:use-module (gnu packages datastructures) @@ -47,6 +51,7 @@ #:use-module (gnu packages fontutils) #:use-module (gnu packages gcc) #:use-module (gnu packages ghostscript) + #:use-module (gnu packages graph) #:use-module (gnu packages gtk) #:use-module (gnu packages guile) #:use-module (gnu packages image) @@ -60,6 +65,7 @@ #:use-module (gnu packages mpi) #:use-module (gnu packages ncurses) #:use-module (gnu packages ocaml) + #:use-module (gnu packages parallel) #:use-module (gnu packages perl) #:use-module (gnu packages pkg-config) #:use-module (gnu packages protobuf) @@ -194,6 +200,100 @@ accurately delineate genomic rearrangements throughout the genome. Structural variants can be visualized using Delly-maze and Delly-suave.") (license license:gpl3))) +(define-public wfmash-x86-64-v2 + (package/inherit wfmash + (name "wfmash-x86-64-v2") + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v2")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-binary + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +(define-public wfmash-x86-64-v3 + (package/inherit wfmash + (name "wfmash-x86-64-v3") + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v3")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-binary + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +(define-public wfmash-x86-64-v4 + (package/inherit wfmash + (name "wfmash-x86-64-v4") + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v4")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-binary + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +;; This copy of wfmash will automatically use the libraries that target the +;; x86_64 psABI which the hardware supports. +(define-public wfmash-hwcaps + (package/inherit wfmash + (name "wfmash-hwcaps") + (arguments + (substitute-keyword-arguments (package-arguments wfmash) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'install-optimized-libraries + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((hwcaps "/lib/glibc-hwcaps")) + (copy-recursively + (string-append (assoc-ref inputs "wfmash-x86-64-v2") + hwcaps "/x86-64-v2") + (string-append #$output hwcaps "/x86-64-v2")) + (copy-recursively + (string-append (assoc-ref inputs "wfmash-x86-64-v3") + hwcaps "/x86-64-v3") + (string-append #$output hwcaps "/x86-64-v3")) + (copy-recursively + (string-append (assoc-ref inputs "wfmash-x86-64-v4") + hwcaps "/x86-64-v4") + (string-append #$output hwcaps "/x86-64-v4"))))))))) + (native-inputs + (modify-inputs (package-native-inputs wfmash) + (append wfmash-x86-64-v2 + wfmash-x86-64-v3 + wfmash-x86-64-v4))) + (properties `((tunable? . #f))))) + (define-public freec (package (name "control-freec") @@ -469,7 +569,7 @@ reads.") (define-public gfaffix (package (name "gfaffix") - (version "0.1.3") + (version "0.1.5") (source (origin (method git-fetch) @@ -478,12 +578,7 @@ reads.") (commit version))) (file-name (git-file-name name version)) (sha256 - (base32 "1biss5qv6ag1dfkn1nspwd528hpzgn8i4jydvbv2z7yv7sc685rh")) - (modules '((guix build utils))) - (snippet - '(begin - (substitute* "Cargo.toml" - (("^handlegraph.*") "handlegraph = \"0.7\"\n")))))) + (base32 "181jxl8ldj39jgscyqzhz4l4k5kxj1j9hvzi8dxj59h2zzznb0kb")))) (build-system cargo-build-system) (arguments `(#:install-source? #f @@ -491,19 +586,12 @@ reads.") (("rust-clap" ,rust-clap-3) ("rust-rustc-hash" ,rust-rustc-hash-1) ("rust-regex" ,rust-regex-1) - ("rust-handlegraph" ,rust-handlegraph-0.7) + ("rust-handlegraph" ,rust-handlegraph-0.7.0-alpha.9) ("rust-gfa" ,rust-gfa-0.10) ("rust-quick-csv", rust-quick-csv-0.1) + ("rust-rayon" ,rust-rayon-1) ("rust-log" ,rust-log-0.4) - ("rust-env-logger" ,rust-env-logger-0.7)) - #:phases - (modify-phases %standard-phases - (add-after 'unpack 'adjust-dependency-version - (lambda* (#:key inputs #:allow-other-keys) - (let ((handlebar-version ,(package-version rust-handlegraph-0.7))) - (substitute* "Cargo.toml" - (("\"0.7\"") - (string-append "{ version = \"" handlebar-version "\" }"))))))))) + ("rust-env-logger" ,rust-env-logger-0.7)))) (home-page "https://github.com/marschall-lab/GFAffix") (synopsis "Identify walk-preserving shared affixes in variation graphs") (description @@ -511,6 +599,117 @@ reads.") collapses them into a non-redundant graph structure.") (license license:expat))) +(define-public vcfbub + (package + (name "vcfbub") + (version "0.1.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pangenome/vcfbub") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0sk2ab22z6qa00j1w8a8f5kbb7q2xb10fhd32zy4lh351v3mqmyg")))) + (build-system cargo-build-system) + (arguments + `(#:install-source? #f + #:cargo-inputs + (("rust-clap" ,rust-clap-2) + ("rust-flate2" ,rust-flate2-1) + ("rust-vcf" ,rust-vcf-0.6)))) + (home-page "https://github.com/pangenome/vcfbub") + (synopsis "Popping bubbles in vg deconstruct VCFs") + (description + "The VCF output produced by a command like @command{vg deconstruct -e -a +-H '#' ...} includes information about the nesting of variants. With @code{-a}, +@code{--all-snarls}, we obtain not just the top level bubbles, but all nested +ones. This exposed snarl tree information can be used to filter the VCF to +obtain a set of non-overlapping sites (n.b. \"snarl\" is a generic model of +graph bubbles including tips and loops). +@code{vcfbub} lets us do two common operations on these VCFs: +@enumerate +@item We can filter sites by maximum level in the snarl tree. For instance, +@code{--max-level 0} would keep only sites with @code{LV=0}. In practice, vg's +snarl finder ensures that these are sites rooted on the main linear axis of the +pangenome graph. Those at higher levels occur within larger variants. +@item We can filter sites by maximum allele size, either for the reference +allele or any allele. In this case, @code{--max-ref-length 10000} would keep +only sites where the reference allele is less than 10kb long. Setting +@code{--max-ref-length} or @code{--max-allele-length} additionally ensures that +the output contains the bubbles nested inside of any popped bubble, even if +they are at greater than @code{--max-level}. +@end enumerate +@code{vcfbub} accomplishes a simple task: we keep sites that are the children +of those which we \"pop\" due to their size. These occur around complex large +SVs, such as multi-Mbp inversions and segmental duplications. We often need to +remove these, as they provide little information for many downstream +applications, such as haplotype panels or other imputation references.") + (license license:expat))) + +(define-public fastix + (package + (name "fastix") + (version "0.1.0") + (source (origin + (method url-fetch) + (uri (crate-uri "fastix" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 "1mzk65mg8vx0hz39xis6zqdmq56abhmza656gn9pgmlsn151gpx2")))) + (build-system cargo-build-system) + (arguments + `(#:install-source? #f + #:cargo-inputs + (("rust-clap" ,rust-clap-2)) + #:cargo-development-inputs + (("rust-assert-cmd" ,rust-assert-cmd-0.12) + ("rust-predicates" ,rust-predicates-1)))) + (home-page "https://github.com/ekg/fastix") + (synopsis "Prefix-renaming FASTA records") + (description "A command line tool to add prefixes to FASTA headers. The +idea is to support pangenomic applications, following the +@url{https://github.com/pangenome/PanSN-spec, PanSN} hierarchical naming +specification.") + (license license:expat))) + +(define-public pafplot + (let ((commit "7dda24c0aeba8556b600d53d748ae3103ec85501") + (revision "1")) + (package + (name "pafplot") + (version (git-version "0.0.0" revision commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/ekg/pafplot.git") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "04ffz0zfj4mvfxmrwgisv213fypgl02f7sim950a067pm7375g1l")))) + (build-system cargo-build-system) + (arguments + `(#:install-source? #f + #:cargo-inputs + (("rust-clap" ,rust-clap-2) + ("rust-boomphf" ,rust-boomphf-0.5) + ("rust-itertools" ,rust-itertools-0.10) + ("rust-fnv" ,rust-fnv-1) + ("rust-lodepng" ,rust-lodepng-3) + ("rust-rgb" ,rust-rgb-0.8) + ("rust-line-drawing" ,rust-line-drawing-0.8)))) + (home-page "https://github.com/ekg/pafplot.git") + (synopsis "Base-level dotplots from PAF alignments") + (description "In the process of generating alignments between whole +genomes, we often need to understand the base-level alignment between +particular sequences. @command{pafplot} allows us to do so by rasterizing the +matches alignment set. It draws a line on a raster image to represent each +match found in a set of alignments. The resulting image provides a high-level +view of the structure of the alignments, and in consequence the homology +relationships between the sequences in consideration.") + (license license:expat)))) + (define-public gafpack (let ((commit "ad31875b6914d964c6fd72d1bf334f0843538fb6") ; November 10, 2022 (revision "1")) @@ -1399,120 +1598,82 @@ runApp(launch.browser=0, port=4208)~%\n" (scRNA-seq) data analysis.") (license license:agpl3)))) -(define-public seqwish-0.1 +(define-public seqwish (package (name "seqwish") - (version "0.1") + (version "0.7.9") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/ekg/seqwish.git") - (commit (string-append "v" version)))) + (commit (string-append "v" version)) + (recursive? #t))) (file-name (git-file-name name version)) (sha256 - (base32 - "1gp72cmi13hbkmwwhgckmxkbx8w644jc5l6dvvvxdbl6sk8xsi5r")))) - (build-system gnu-build-system) + (base32 "0xnv40kjlb610bk67n4xdqz5dfsjhrqld5bxzblji57k6bb4n66x")) + (patches (search-patches "seqwish-paryfor-riscv.diff" + "seqwish-shared-library.diff")) + (snippet + #~(begin + (use-modules (guix build utils)) + (substitute* '("CMakeLists.txt" + "deps/atomic_queue/Makefile" + "deps/mmmulti/deps/DYNAMIC/CMakeLists.txt" + "deps/mmmulti/deps/atomic_queue/Makefile" + "deps/mmmulti/deps/ips4o/CMakeLists.txt") + (("-march=native") "") + (("-mcx16") "")) + (substitute* '("deps/mmmulti/deps/sdsl-lite/CMakeLists.txt" + "deps/sdsl-lite/CMakeLists.txt") + (("-msse4.2 -march=native") "")))))) + (build-system cmake-build-system) (arguments - `(#:phases + `(#:configure-flags + (cons* ,@(if (target-x86?) + ;; This is the minimum needed to compile on x86_64, and is a + ;; subset of any other optimizations which might be applied. + '("-DCMAKE_C_FLAGS=-mcx16" + "-DCMAKE_CXX_FLAGS=-mcx16") + '()) + '("-DSEQWISH_LINK_SHARED_LIBRARY=ON")) + #:phases (modify-phases %standard-phases - (delete 'configure) - (replace 'build + (add-after 'unpack 'set-version + (lambda _ + ;; This stashes the build version in the executable. + (mkdir "include") + (substitute* "CMakeLists.txt" + (("^execute_process") "#execute_process")) + (with-output-to-file "include/seqwish_git_version.hpp" + (lambda () + (format #t "#define SEQWISH_GIT_VERSION \"~a\"~%" ,version))))) + (add-after 'unpack 'link-with-some-shared-libraries (lambda* (#:key inputs #:allow-other-keys) - (let ((sdsl-lite (assoc-ref inputs "sdsl-lite")) - (sufsort (assoc-ref inputs "sufsort")) - (bsort (assoc-ref inputs "bsort")) - (mmap_allocator (assoc-ref inputs "mmap-allocator")) - (tayweeargs (assoc-ref inputs "tayweeargs-source")) - (gzipreader (assoc-ref inputs "gzipreader-source")) - (mmmultimap (assoc-ref inputs "mmmultimap-source")) - (iitii (assoc-ref inputs "iitii-source")) - (ips4o (assoc-ref inputs "ips4o-source"))) - (apply invoke "g++" "-o" "seqwish" - "-O3" "-g" "-std=c++14" "-fopenmp" - "-latomic" "-lz" - (string-append "-I" sdsl-lite "/include") - (string-append "-I" sdsl-lite "/include/sdsl") - (string-append "-I" bsort "/include") - (string-append "-I" tayweeargs) - (string-append "-I" gzipreader) - (string-append "-I" mmmultimap "/src") - (string-append "-I" iitii "/src") - (string-append "-I" mmap_allocator "/include") - (string-append "-I" ips4o) - (append - (find-files "src" ".") - (list - (string-append sdsl-lite "/lib/libsdsl.so") - (string-append sufsort "/lib/libdivsufsort.so") - (string-append sufsort "/lib/libdivsufsort64.so") - (string-append mmap_allocator "/lib/libmmap_allocator.a") - (string-append bsort "/lib/libbsort.a"))))))) + (substitute* '("CMakeLists.txt" + "deps/mmmulti/CMakeLists.txt") + (("\".*libsdsl\\.a\"") "\"-lsdsl\"") + (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") + (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") + (("\\$\\{sdsl-lite_INCLUDE\\}") + (search-input-directory inputs "/include/sdsl")) + (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") + (dirname + (search-input-file inputs "/include/divsufsort.h")))))) (replace 'check - (lambda _ + (lambda* (#:key tests? #:allow-other-keys) ;; Add seqwish to the PATH for the tests. (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH"))) - (with-directory-excursion "test" - (invoke "make")))) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (install-file "seqwish" (string-append out "/bin"))) - #t))))) + (when tests? + (with-directory-excursion "../source/test" + (invoke "make")))))))) (inputs - `(("bsort" ,ekg-bsort) - ("mmap-allocator" ,ekg-mmap-allocator) - ("openmpi" ,openmpi) - ("sdsl-lite" ,sdsl-lite) - ("sufsort" ,libdivsufsort) - ("zlib" ,zlib))) + (list jemalloc + libdivsufsort + openmpi + sdsl-lite + zlib)) (native-inputs - `(("prove" ,perl) - ("tayweeargs-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/Taywee/args.git") - (commit "3de44ec671db452cc0c4ef86399b108939768abb"))) - (file-name "tayweeargs-source-for-seqwish") - (sha256 - (base32 - "1v8kq1gvl5waysrfp0s58881rx39mnf3ifdsl6pb3y3c4zaki2xh")))) - ("gzipreader-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/gatoravi/gzip_reader.git") - (commit "0ef26c0399e926087f9d6c4a56067a7bf1fc4f5e"))) - (file-name "gzipreader-source-for-seqwish") - (sha256 - (base32 - "1wy84ksx900840c06w0f1mgzvr7zsfsgxq1b0jdjh8qka26z1r17")))) - ("mmmultimap-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/mmmultimap.git") - (commit "88c734c36563048b0f3acc04dd8856f19e02b75f"))) - (file-name "mmmultimap-source-for-seqwish") - (sha256 - (base32 - "06mnf3bd32s3ngxkl573ylg2qsvlw80r1ksdwamx3fzxa1a5yls0")))) - ("iitii-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/iitii.git") - (commit "85209e07a3ee403fb6557387a7f897cd76be4406"))) - (file-name "iitii-source-for-seqwish") - (sha256 - (base32 - "0sszvffkswf89nkbjmjg3wjwqvy2w0d3wgy3ngy33ma4sy4s025s")))) - ("ips4o-source" ,(origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/SaschaWitt/ips4o.git") - (commit "bff3ccf0bf349497f2bb10f825d160b792236367"))) - (file-name "ips4o-source-for-seqwish") - (sha256 - (base32 - "0yjfvrkiwgmy5cn0a7b9j8jwc3zp0l8j4dl5n0jgz68pdnhlp96h")))))) + (list perl)) (home-page "https://github.com/ekg/seqwish") (synopsis "Alignment to variation graph inducer") (description "Seqwish implements a lossless conversion from pairwise @@ -1525,77 +1686,212 @@ large inputs that are commonly encountered when working with large numbers of noisy input sequences. Memory usage during construction and traversal is limited by the use of sorted disk-backed arrays and succinct rank/select dictionaries to record a queryable version of the graph.") + (properties `((tunable? . #t))) (license license:expat))) -(define ekg-bsort - (let ((commit "c3ab0d3308424030e0a000645a26d2c10a59a124") - (revision "1")) - (package - (name "bsort") - (version (git-version "0.0.0" revision commit)) - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/bsort.git") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "0dgpflzcp3vdhbjwbjw347czi86gyk73hxcwjdqnaqh5vg61bdb6")))) - (build-system cmake-build-system) - (arguments - '(#:tests? #f ; no test target - #:out-of-source? #f - #:phases - (modify-phases %standard-phases - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (install-file "bin/bsort" (string-append out "/bin")) - (install-file "src/bsort.hpp" (string-append out "/include")) - (install-file "lib/libbsort.a" (string-append out "/lib"))) - #t))))) - (home-page "") - (synopsis "") - (description "") - (license license:gpl2)))) +(define-public seqwish-x86-64-v2 + (package/inherit seqwish + (name "seqwish-x86-64-v2") + (outputs '("out" "static")) + (arguments + (substitute-keyword-arguments (package-arguments seqwish) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v2")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))) + (add-after 'install 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v2/libseqwish.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) -(define ekg-mmap-allocator - (let ((commit "ed61daf094de1c2e1adbe8306287ad52da5f0264") - (revision "1")) - (package - (name "mmap-allocator") - (version (git-version "0.10.1" revision commit)) - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/mmap_allocator.git") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "1f30b2kpwwzh6333s0qi5samk458ghbnvyycf6rwx6n6j7xswhbw")))) - (build-system gnu-build-system) - (arguments - '(#:phases - (modify-phases %standard-phases - (delete 'configure) ; no configure script - (add-before 'install 'pre-install - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) +(define-public seqwish-x86-64-v3 + (package/inherit seqwish + (name "seqwish-x86-64-v3") + (outputs '("out" "static")) + (arguments + (substitute-keyword-arguments (package-arguments seqwish) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v3")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))) + (add-after 'install 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v3/libseqwish.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) + +(define-public seqwish-x86-64-v4 + (package/inherit seqwish + (name "seqwish-x86-64-v4") + (outputs '("out" "static")) + (arguments + (substitute-keyword-arguments (package-arguments seqwish) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v4")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")))) + (add-after 'install 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v4/libseqwish.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t) + (tunable? . #f))))) + +;; This copy of seqwish will automatically use the libraries that target the +;; x86_64 psABI which the hardware supports. +(define-public seqwish-hwcaps + (package/inherit seqwish + (name "seqwish-hwcaps") + (arguments + (substitute-keyword-arguments (package-arguments seqwish) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'install-optimized-libraries + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((hwcaps "/lib/glibc-hwcaps")) + (copy-recursively + (string-append (assoc-ref inputs "seqwish-x86-64-v2") + hwcaps "/x86-64-v2") + (string-append #$output hwcaps "/x86-64-v2")) + (copy-recursively + (string-append (assoc-ref inputs "seqwish-x86-64-v3") + hwcaps "/x86-64-v3") + (string-append #$output hwcaps "/x86-64-v3")) + (copy-recursively + (string-append (assoc-ref inputs "seqwish-x86-64-v4") + hwcaps "/x86-64-v4") + (string-append #$output hwcaps "/x86-64-v4"))))))))) + (native-inputs + (modify-inputs (package-native-inputs seqwish) + (append seqwish-x86-64-v2 + seqwish-x86-64-v3 + seqwish-x86-64-v4))) + (properties `((tunable? . #f))))) + +(define-public smoothxg + (package + (name "smoothxg") + (version "0.7.2") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/pangenome/smoothxg" + "/releases/download/v" version + "/smoothxg-v" version ".tar.gz")) + (sha256 + (base32 "1px8b5aaa23z85i7ximdamk2jj7wk5hb7bpbrgxsvkxc69zlwy38")) + (snippet + #~(begin + (use-modules (guix build utils)) + (substitute* (find-files "." "CMakeLists.txt") + (("spoa_optimize_for_native ON") + "spoa_optimize_for_native OFF") + (("-msse4\\.2") "") + (("-march=native") "")))))) + (build-system cmake-build-system) + (arguments + (list + #:make-flags + #~(list (string-append "CC = " #$(cc-for-target))) + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'link-with-some-shared-libraries + (lambda* (#:key inputs #:allow-other-keys) + (substitute* '("CMakeLists.txt" + "deps/mmmulti/CMakeLists.txt" + "deps/odgi/deps/mmmulti/CMakeLists.txt") + (("\".*libsdsl\\.a\"") "\"-lsdsl\"") + (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") + (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") + (("\".*libodgi\\.a\"") "\"-lodgi\"") + (("\\$\\{sdsl-lite_INCLUDE\\}") + (search-input-directory inputs "/include/sdsl")) + (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") + (dirname + (search-input-file inputs "/include/divsufsort.h"))) + (("\\$\\{odgi_INCLUDE\\}") + (search-input-directory inputs "/include/odgi"))))) + (add-before 'build 'build-abPOA + (lambda* (#:key make-flags #:allow-other-keys) + ;; This helps with portability to other architectures. + (with-directory-excursion + (string-append "../smoothxg-v" #$version "/deps/abPOA") (substitute* "Makefile" - (("HEADERS=") "HEADERS=mmappable_vector.h ") - (("/usr") out)) - (mkdir-p (string-append out "/lib")) - (mkdir (string-append out "/include")) - #t)))) - #:test-target "test")) - (home-page "") - (synopsis "") - (description "") - (license license:lgpl2.0+)))) ; README just says "lpgl". + (("-march=native") "")) + (apply invoke "make" "libabpoa" make-flags))))))) + (inputs + (list jemalloc + libdivsufsort + odgi + openmpi + pybind11 + python + sdsl-lite + zlib + (list zstd "lib"))) + (native-inputs + (list pkg-config)) + (home-page "https://github.com/ekg/smoothxg") + (synopsis + "Linearize and simplify variation graphs using blocked partial order alignment") + (description "Pangenome graphs built from raw sets of alignments may have +complex local structures generated by common patterns of genome variation. +These local nonlinearities can introduce difficulty in downstream analyses, +visualization, and interpretation of variation graphs. + +@command{smoothxg} finds blocks of paths that are collinear within a variation +graph. It applies partial order alignment to each block, yielding an acyclic +variation graph. Then, to yield a smoothed graph, it walks the original paths +to lace these subgraphs together. The resulting graph only contains cyclic or +inverting structures larger than the chosen block size, and is otherwise +manifold linear. In addition to providing a linear structure to the graph, +smoothxg can be used to extract the consensus pangenome graph by applying the +heaviest bundle algorithm to each chain. + +To find blocks, smoothxg applies a greedy algorithm that assumes that the graph +nodes are sorted according to their occurence in the graph's embedded paths. +The path-guided stochastic gradient descent based 1D sort implemented in +@command{odgi sort -Y} is designed to provide this kind of sort.") + (properties `((tunable? . #t))) + (license license:expat))) ;; TODO: Unbundle BBHash, parallel-hashmap, zstr (define-public graphaligner @@ -1665,19 +1961,30 @@ here}.") (define-public mummer (package (name "mummer") - (version "4.0.0beta2") + (version "4.0.0rc1") (source (origin (method url-fetch) (uri (string-append "https://github.com/mummer4/mummer/releases/" "download/v" version "/mummer-" version ".tar.gz")) (sha256 - (base32 - "14qvrmf0gkl4alnh8zgxlzmvwc027arfawl96i7jk75z33j7dknf")))) + (base32 "07bxw1vax1sai3g5xjn6sqngddlbnlabpqy373vw4fb55pdnl045")))) (build-system gnu-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'configure 'skip-test_md5-tests + (lambda _ + ;; There seems to be a bug with how these tests are called. + (substitute* "Makefile" + (("tests/mummer.sh") "") + (("tests/nucmer.sh") "") + (("tests/genome.sh") "") + (("tests/sam.sh") ""))))))) (inputs - `(("gnuplot" ,gnuplot) - ("perl" ,perl))) + (list gnuplot + perl)) (home-page "http://mummer.sourceforge.net/") (synopsis "Efficient sequence alignment of full genomes") (description "MUMmer is a versatil alignment tool for DNA and protein sequences.") @@ -1790,79 +2097,6 @@ reads, also called read-based phasing or haplotype assembly. It is especially suitable for long reads, but works also well with short reads.") (license license:expat))) -(define-public bh20-seq-resource - (let ((commit "2ae71911cd87ce4f2eabdff21e538267b3270d45") - (revision "4")) - (package - (name "bh20-seq-resource") - (version (git-version "1.0" revision commit)) - (source (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/pubseq/bh20-seq-resource") - (commit commit))) - (file-name (git-file-name name version)) - (sha256 - (base32 "1k6cc88hrcm77jwpdk2084q0zirv2vlbz3c07nmpbhk1lhqk5x0n")) - (modules '((guix build utils))) - (snippet - '(begin - (delete-file "gittaggers.py"))))) - (build-system python-build-system) - (arguments - (list - #:tests? #f ; Tests can't find pytest - #:phases - #~(modify-phases %standard-phases - (add-after 'unpack 'patch-program-calls - (lambda* (#:key inputs #:allow-other-keys) - (substitute* "bh20sequploader/qc_fasta.py" - (("\"minimap2\"") - (string-append "\"" (search-input-file - inputs "/bin/minimap2") - "\"")))))))) - (propagated-inputs - (list python-arvados-python-client - python-schema-salad - python-magic - python-pyshex - python-pyshexc-0.7 - python-py-dateutil - - ;; for the web - python-flask - python-pyyaml - python-redis - - ;; and for the service - python - gunicorn)) - (inputs - (list minimap2)) - (native-inputs - (list python-pytest-4 ; < 6 - python-pytest-runner-4)) ; < 5 - (home-page "https://github.com/pubseq/bh20-seq-resource") - (synopsis - "Tool to upload SARS-CoV-19 sequences and service to kick off analysis") - (description "This repository provides a sequence uploader for the -COVID-19 Virtual Biohackathon's Public Sequence Resource project. You can use -it to upload the genomes of SARS-CoV-2 samples to make them publicly and freely -available to other researchers.") - (license license:asl2.0)))) - -;; This version has no profile collisions. -(define-public bh20-seq-resource-for-service - (package - ;(inherit (fix-profile-collisions-for-bh20 bh20-seq-resource)) - (inherit - ((package-input-rewriting/spec - `(("python-google-api-core" . ,(const python-google-api-core-1)) - ("python-google-auth" . ,(const python-google-auth-1)) - ("python-pyparsing" . ,(const python-pyparsing-2.4.7)))) - bh20-seq-resource)) - (properties `((hidden? . #t))))) - (define-public python-scanpy-git (let ((commit "590d42309f9ed6550d7b887039990edfc1ac7648") ; April 22, 2020 (revision "1")) @@ -1903,32 +2137,60 @@ available to other researchers.") (delete-file "scanpy/tests/test_pca.py") #t))))))))) -;; TODO: Unbundle everything +;; TODO: Unbundle everything before upstreaming (define-public odgi (package (name "odgi") - (version "0.8.1") + (version "0.8.3") + (outputs '("out" "static")) (source (origin (method url-fetch) (uri (string-append "https://github.com/pangenome/odgi/releases" "/download/v" version "/odgi-v" version ".tar.gz")) (sha256 - (base32 "175083pb9hp0vn9a00hbxlayyk5a5j8p52yq5qfmbnfvndisbmbv")) + (base32 "1gw1xdb945z25rar6pba6kq5xdx8l7fkhxjyrvc1z1brva53p9hk")) (snippet #~(begin (use-modules (guix build utils)) (substitute* "CMakeLists.txt" (("-march=native") "") - (("-msse4\\.2") "")) - (delete-file-recursively "deps/pybind11") - (delete-file-recursively "deps/sdsl-lite"))))) + (("-msse4\\.2") "")))))) (build-system cmake-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'use-gnuinstalldirs-macros + (lambda _ + (substitute* "CMakeLists.txt" + (("project\\(odgi\\)" all) + (string-append all "\ninclude(GNUInstallDirs)")) + ;; This is different than the default. + ;(("PUBLIC_HEADER DESTINATION include/odgi") + ; "PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}") + (("LIBRARY DESTINATION lib") + "LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}") + (("ARCHIVE DESTINATION lib") + "ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}")))) + (add-after 'unpack 'link-to-libodgi + (lambda _ + ;; This lets us provide libraries for different psABI levels. + (substitute* "CMakeLists.txt" + (("^ \\$<TARGET_OBJECTS:odgi_objs>.*") "") + (("target_link_libraries\\(odgi " all) + (string-append all "libodgi_shared "))))) + (add-after 'install 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (mkdir-p (string-append #$output:static "/lib")) + (rename-file (string-append #$output "/lib/libodgi.a") + (string-append #$output:static "/lib/libodgi.a"))))))) (native-inputs (list pkg-config)) (inputs (list jemalloc libdivsufsort + openmpi pybind11 python sdsl-lite)) @@ -1953,189 +2215,280 @@ in-memory footprint at the cost of packing and unpacking.") (properties '((tunable? . #t))) (license license:expat))) +(define-public odgi-x86-64-v2 + (package/inherit odgi + (name "odgi-x86-64-v2") + (arguments + (substitute-keyword-arguments (package-arguments odgi) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v2")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))) + (replace 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v2/libodgi.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +(define-public odgi-x86-64-v3 + (package/inherit odgi + (name "odgi-x86-64-v3") + (arguments + (substitute-keyword-arguments (package-arguments odgi) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v3")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))) + (replace 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v3/libodgi.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +(define-public odgi-x86-64-v4 + (package/inherit odgi + (name "odgi-x86-64-v4") + (arguments + (substitute-keyword-arguments (package-arguments odgi) + ((#:configure-flags flags #~'()) + #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" + "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" + (string-append "-DCMAKE_INSTALL_RPATH=" #$output + "/lib/glibc-hwcaps/x86-64-v4")) + #$flags)) + ;; The building machine can't necessarily run the code produced. + ((#:tests? _ #t) #f) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'remove-extra-files + (lambda _ + (delete-file-recursively (string-append #$output "/bin")) + (delete-file-recursively (string-append #$output "/include")))) + (replace 'move-static-library + (lambda* (#:key outputs #:allow-other-keys) + (let ((lib "/lib/glibc-hwcaps/x86-64-v4/libodgi.a")) + (mkdir-p (dirname (string-append #$output:static lib))) + (rename-file (string-append #$output lib) + (string-append #$output:static lib))))))))) + (supported-systems '("x86_64-linux")) + (properties `((hidden? . #t))))) + +;; This copy of odgi will automatically use the libraries that target the +;; x86_64 psABI which the hardware supports. +(define-public odgi-hwcaps + (package/inherit odgi + (name "odgi-hwcaps") + (arguments + (substitute-keyword-arguments (package-arguments odgi) + ((#:phases phases #~%standard-phases) + #~(modify-phases #$phases + (add-after 'install 'install-optimized-libraries + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((hwcaps "/lib/glibc-hwcaps")) + (copy-recursively + (string-append (assoc-ref inputs "odgi-x86-64-v2") + hwcaps "/x86-64-v2") + (string-append #$output hwcaps "/x86-64-v2")) + (copy-recursively + (string-append (assoc-ref inputs "odgi-x86-64-v3") + hwcaps "/x86-64-v3") + (string-append #$output hwcaps "/x86-64-v3")) + (copy-recursively + (string-append (assoc-ref inputs "odgi-x86-64-v4") + hwcaps "/x86-64-v4") + (string-append #$output hwcaps "/x86-64-v4"))))))))) + (native-inputs + (modify-inputs (package-native-inputs odgi) + (append odgi-x86-64-v2 + odgi-x86-64-v3 + odgi-x86-64-v4))) + (properties `((tunable? . #f))))) + (define-public vg (package (name "vg") - (version "1.39.0") + (version "1.50.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/vgteam/vg/releases/download/v" version "/vg-v" version ".tar.gz")) (sha256 - (base32 "0cj575qr2jkingrm6r4ki7f89s7glrf18d4pvaa69smxh2vbajv3")) - (modules '((guix build utils))) + (base32 "1n06fh6qvffhbxy7m096r8cy16wi0nm6gfgi3rsjy9zrb7g1jzhs")) (snippet - '(begin - ;; List all the options, makes it easier to try to remove them. - ;(delete-file-recursively "deps/BBHash") - ;(delete-file-recursively "deps/DYNAMIC") - ;(delete-file-recursively "deps/FlameGraph") - ;(delete-file-recursively "deps/atomic_queue") - ;(delete-file-recursively "deps/backward-cpp") - (delete-file-recursively "deps/bash-tap") - ;(delete-file-recursively "deps/dozeu") - (delete-file-recursively "deps/elfutils") - (delete-file-recursively "deps/fastahack") - ;(delete-file-recursively "deps/fermi-lite") - ;(delete-file-recursively "deps/gbwt") - ;(delete-file-recursively "deps/gbwtgraph") - ;(delete-file-recursively "deps/gcsa2") - ;(delete-file-recursively "deps/gfakluge") - ;(delete-file-recursively "deps/gssw") - (delete-file-recursively "deps/htslib") - ;(delete-file-recursively "deps/ips4o") - (delete-file-recursively "deps/jemalloc") - ;(delete-file-recursively "deps/libVCFH") - ;(delete-file-recursively "deps/libbdsg") - ;(delete-file-recursively "deps/libbdsg/bdsg/deps") - (delete-file-recursively "deps/libbdsg/bdsg/deps/BBHash") - (delete-file-recursively "deps/libbdsg/bdsg/deps/DYNAMIC") - ;(delete-file-recursively "deps/libbdsg/bdsg/deps/DYNAMIC/deps/hopscotch-map") - ;(delete-file-recursively "deps/libbdsg/bdsg/deps/hopscotch-map") - (delete-file-recursively "deps/libbdsg/bdsg/deps/libhandlegraph") - ;(delete-file-recursively "deps/libbdsg/bdsg/deps/mio") - (delete-file-recursively "deps/libbdsg/bdsg/deps/pybind11") - (delete-file-recursively "deps/libbdsg/bdsg/deps/sdsl-lite") - (delete-file-recursively "deps/libbdsg/bdsg/deps/sparsepp") - ;(delete-file-recursively "deps/libdeflate") - ;(delete-file-recursively "deps/libhandlegraph") - ;(delete-file-recursively "deps/libVCFH") - ;(delete-file-recursively "deps/libvgio") - ;(delete-file-recursively "deps/libvgio/deps") ; libhandlegraph - ;(delete-file-recursively "deps/lru_cache") - ;(delete-file-recursively "deps/mio") - ;(delete-file-recursively "deps/mmmultimap") - (delete-file-recursively "deps/mmmultimap/deps/DYNAMIC") - (delete-file-recursively "deps/mmmultimap/deps/args") - (delete-file-recursively "deps/mmmultimap/deps/atomic_queue") - ;(delete-file-recursively "deps/mmmultimap/deps/hopscotch-map") - (delete-file-recursively "deps/mmmultimap/deps/ips4o") - (delete-file-recursively "deps/mmmultimap/deps/mio") - ;(delete-file-recursively "deps/mmmultimap/deps/paryfor") - (delete-file-recursively "deps/mmmultimap/deps/sdsl-lite") - ;(delete-file-recursively "deps/pinchesAndCacti") - ;(delete-file-recursively "deps/progress_bar") - (delete-file-recursively "deps/raptor") - ;(delete-file-recursively "deps/sdsl-lite") - ;(delete-file-recursively "deps/sha1") - (delete-file-recursively "deps/snappy") - ;(delete-file-recursively "deps/sonLib") - (delete-file-recursively "deps/sparsehash") - ;(delete-file-recursively "deps/sparsepp") - ;(delete-file-recursively "deps/ssw") - ;(delete-file-recursively "deps/structures") - ;(delete-file-recursively "deps/sublinear-Li-Stephens") - (delete-file-recursively "deps/sublinear-Li-Stephens/deps") - (delete-file-recursively "deps/tabixpp") - (delete-file-recursively "deps/vcflib") - ;(delete-file-recursively "deps/xg") - (delete-file-recursively "deps/xg/deps") - ;; libvgio doesn't search the correct include directory. - (copy-recursively "deps/libhandlegraph/src/include/handlegraph" - "deps/libvgio/include/handlegraph"))))) + #~(begin + (use-modules (guix build utils)) + (substitute* (find-files "." "(CMakeLists\\.txt|Makefile)") + (("-march=native") "") + (("-mtune=native") "") + (("-msse4.2") "") + (("-mcx16") "")))))) (build-system gnu-build-system) (arguments `(#:phases (modify-phases %standard-phases (delete 'configure) ; no configure script - ,@(if (target-riscv64?) - ;; riscv64 doesn't take '-march=native. This needs to be removed - ;; for all architectures if/when vg is upstreamed. - `((add-after 'unpack 'dont-build-native - (lambda _ - (substitute* (append (find-files "." "CMakeLists\\.txt") - (find-files "." "Makefile")) - (("-march=native") ""))))) - '()) (add-after 'unpack 'patch-source (lambda* (#:key inputs #:allow-other-keys) + ;; Most of these are so that we can skip bootstrapping some of the sources. (substitute* "Makefile" ;; PKG_CONFIG_DEPS needs to be substituted to actually link to everything. - (("cairo jansson") - "cairo htslib jansson libdw libelf protobuf raptor2 sdsl-lite tabixpp vcflib") + (("cairo libzstd") + "cairo htslib libzstd libdw libelf protobuf raptor2 sdsl-lite tabixpp vcflib fastahack libdeflate") ;; Skip the part where we link static libraries special. It doesn't like the changes we make (("-Wl,-B.*") "\n") (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libtabixpp\\.a") "$(LIB_DIR)/libtabixpp.a") ((" \\$\\(LIB_DIR\\)/libtabixpp\\.a") - (string-append " " (assoc-ref inputs "tabixpp") "/lib/libtabixpp.so")) + (string-append " " (search-input-file inputs "/lib/libtabixpp.so"))) (("\\$\\(LIB_DIR\\)/pkgconfig/tabixpp\\.pc") - (string-append " " (assoc-ref inputs "tabixpp") "/lib/pkgconfig/tabixpp.pc")) + (string-append " " (search-input-file inputs "/lib/pkgconfig/tabixpp.pc"))) (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libhts\\.a") "$(LIB_DIR)/libhts.a") ((" \\$\\(LIB_DIR\\)/libhts\\.a") - (string-append " " (assoc-ref inputs "htslib") "/lib/libhts.so")) + (string-append " " (search-input-file inputs "/lib/libhts.so"))) (("\\$\\(LIB_DIR\\)/pkgconfig/htslib\\.pc") - (string-append " " (assoc-ref inputs "htslib") "/lib/pkgconfig/htslib.pc")) + (string-append " " (search-input-file inputs "/lib/pkgconfig/htslib.pc"))) + + (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libdeflate\\.a") "$(LIB_DIR)/libdeflate.a") + ((" \\$\\(LIB_DIR\\)/libdeflate\\.a") + (string-append " " (search-input-file inputs "/lib/libdeflate.so"))) ((" \\$\\(LIB_DIR\\)/libvcflib.a") - (string-append " " (assoc-ref inputs "vcflib") "/lib/libvcflib.so")) + (string-append " " (search-input-file inputs "/lib/libvcflib.so"))) ((" \\$\\(BIN_DIR\\)/vcf2tsv") - (string-append " " (assoc-ref inputs "vcflib") "/bin/vcf2tsv")) - ((" \\$\\(VCFLIB_DIR\\)/bin/vcf2tsv") - (string-append " " (assoc-ref inputs "vcflib") "/bin/vcf2tsv")) + (string-append " " (search-input-file inputs "/bin/vcf2tsv"))) ((" \\$\\(FASTAHACK_DIR\\)/fastahack") - (string-append " " (assoc-ref inputs "fastahack") "/bin/fastahack")) - ((" \\$\\(FASTAHACK_DIR\\)/bin/fastahack") - (string-append " " (assoc-ref inputs "fastahack") "/bin/fastahack")) + (string-append " " (search-input-file inputs "/bin/fastahack"))) (("\\+= \\$\\(OBJ_DIR\\)/Fasta\\.o") - (string-append "+= " (assoc-ref inputs "fastahack") "/lib/libfastahack.so")) + (string-append "+= " (search-input-file inputs "/lib/libfastahack.so"))) ((" \\$\\(LIB_DIR\\)/libsnappy.a") - (string-append " " (assoc-ref inputs "snappy") "/lib/libsnappy.so")) + (string-append " " (search-input-file inputs "/lib/libsnappy.so"))) ;; Only link against the libraries in the elfutils package. (("-ldwfl -ldw -ldwelf -lelf -lebl") "-ldw -lelf") ((" \\$\\(LIB_DIR\\)/libelf.a") - (string-append " " (assoc-ref inputs "elfutils") "/lib/libelf.so")) + (string-append " " (search-input-file inputs "/lib/libelf.so"))) ((" \\$\\(LIB_DIR\\)/libdw.a") - (string-append " " (assoc-ref inputs "elfutils") "/lib/libdw.so")) + (string-append " " (search-input-file inputs "/lib/libdw.so"))) ;; We need the Make.helper file in SDSL_DIR for gcsa2 ;((" \\$\\(LIB_DIR\\)/libsdsl.a") - ; (string-append " " (assoc-ref inputs "sdsl-lite") "/lib/libsdsl.so")) + ; (string-append " " (search-input-file inputs "/lib/libsdsl.so"))) + ((" \\$\\(LIB_DIR\\)/%divsufsort.a") + (string-append " " (dirname + (search-input-file inputs "/lib/libdivsufsort.so")) + "%divsufsort.so")) ((" \\$\\(LIB_DIR\\)/libdivsufsort.a") - (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/libdivsufsort.so")) + (string-append " " (search-input-file inputs "/lib/libdivsufsort.so"))) + ((" \\$\\(LIB_DIR\\)/%divsufsort64.a") + (string-append " " (dirname + (search-input-file inputs "/lib/libdivsufsort64.so")) + "%divsufsort64.so")) ((" \\$\\(LIB_DIR\\)/libdivsufsort64.a") - (string-append " " (assoc-ref inputs "libdivsufsort") "/lib/libdivsufsort64.so")) + (string-append " " (search-input-file inputs "/lib/libdivsufsort64.so"))) ((" \\$\\(LIB_DIR\\)/libjemalloc.a") - (string-append " " (assoc-ref inputs "jemalloc") "/lib/libjemalloc.a")) + (string-append " " (search-input-file inputs "/lib/libjemalloc.a"))) ((" \\$\\(INC_DIR\\)/sparsehash") - (string-append " " (assoc-ref inputs "sparsehash") "/include/sparsehash")) + (string-append " " (search-input-directory inputs "/include/sparsehash"))) ((" \\$\\(INC_DIR\\)/raptor2") - (string-append " " (assoc-ref inputs "raptor2") "/include/raptor2")) + (string-append " " (search-input-directory inputs "/include/raptor2"))) ((" \\$\\(LIB_DIR\\)/libraptor2.a") - (string-append " " (assoc-ref inputs "raptor2") "/lib/libraptor2.so")) + (string-append " " (search-input-file inputs "/lib/libraptor2.so"))) ((" \\$\\(BIN_DIR\\)/rapper") - (string-append " " (assoc-ref inputs "raptor2") "/bin/rapper"))) - ;; vcf2tsv shows up in a couple of other places - (substitute* "test/t/02_vg_construct.t" - (("../deps/vcflib/bin/vcf2tsv") (which "vcf2tsv"))))) - (add-after 'unpack 'fix-fastahack-dependency + (string-append " " (search-input-file inputs "/bin/rapper")))))) + (add-after 'unpack 'link-with-some-shared-libraries + (lambda* (#:key inputs #:allow-other-keys) + (substitute* '("deps/mmmultimap/CMakeLists.txt" + "deps/xg/CMakeLists.txt" + "deps/xg/deps/mmmulti/CMakeLists.txt") + (("\".*libsdsl\\.a\"") "\"-lsdsl\"") + (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") + (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") + (("\\$\\{sdsl-lite_INCLUDE\\}") + (search-input-directory inputs "/include/sdsl")) + (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") + (dirname + (search-input-file inputs "/include/divsufsort.h")))))) + #; + (add-before 'patch-source 'use-shared-libvg + (lambda* (#:key inputs outputs #:allow-other-keys) + (substitute* "Makefile" + (("libvg\\.a") "libvg.so") + ;; Have the linker find the shared library. + (("\\$\\(LIB_DIR\\)/libvg.\\$\\(SHARED_SUFFIX\\) \\$\\(LDFLAGS\\)") + "-lvg $(LDFLAGS)") + (("\\$\\(LDFLAGS\\) \\$\\(LIB_DIR\\)/libvg.so") + "$(LDFLAGS) -lvg")) + (setenv "LDFLAGS" (string-append "-Wl,-rpath=" + (assoc-ref outputs "out") "/lib")) + + ;; We need to tell a number of dependencies to build with -fPIC. + (substitute* "Makefile" + (("^CXXFLAGS := -O3") + (string-append "CFLAGS := -fPIC\n" + "CXXFLAGS := -O3 -fPIC")) + (("^export CXXFLAGS") + (string-append "export CFLAGS\n" + "$(info CFLAGS are $(CFLAGS))\n" + "export CXXFLAGS")) + ((" \\$\\(LIB_DIR\\)/libjemalloc.a") + (string-append " " (assoc-ref inputs "jemalloc") + "/lib/libjemalloc_pic.a"))) + ;; We don't want to pull in all the global CXXFLAGS here. + (substitute* "deps/sublinear-Li-Stephens/makefile" + (("^CXXFLAGS:=") "CXXFLAGS:= -fPIC ")) + ;; CMAKE_CXX_FLAGS aren't set globally. + (substitute* "deps/kff-cpp-api/CMakeLists.txt" + (("CMAKE_CXX_FLAGS \"") "CMAKE_CXX_FLAGS \" -fPIC ")))) + (add-after 'unpack 'dont-build-shared-vgio (lambda _ - (substitute* "src/aligner.hpp" - (("Fasta.h") "fastahack/Fasta.h")))) - (add-after 'unpack 'fix-hopscotch-dependency + ;; vg will link with libvgio and fail the 'validate-runpath phase. + (substitute* "deps/libvgio/CMakeLists.txt" + (("TARGETS vgio vgio_static") "TARGETS vgio_static")))) + (add-after 'unpack 'fix-fastahack-dependency (lambda _ - (substitute* "Makefile" - ;; The build directory for hopscotch_map-prefix. - (("rm -Rf build && ") "")) - ;; Don't try to download hopscotch_map from the internet. - (substitute* "deps/DYNAMIC/CMakeLists.txt" - ((".*GIT_REPOSITORY.*") - "SOURCE_DIR \"../../libbdsg/bdsg/deps/hopscotch-map\"\n") - ((".*BUILD_IN_SOURCE.*") "")) - ;; We still need to copy it to the expected location. - (copy-recursively - "deps/libbdsg/bdsg/deps/hopscotch-map" - "deps/DYNAMIC/build/hopscotch_map-prefix/src/hopscotch_map"))) + (substitute* (append (list "src/aligner.hpp" + "src/vg.hpp") + (find-files "deps/vcflib/src" "\\.cpp$")) + (("Fasta.h") "fastahack/Fasta.h")) + (substitute* '("deps/vcflib/src/Variant.h" + "src/constructor.hpp" + "src/index_registry.cpp") + (("<Fasta.h>") "\"fastahack/Fasta.h\"")))) (add-after 'unpack 'adjust-tests (lambda* (#:key inputs #:allow-other-keys) (let ((bash-tap (assoc-ref inputs "bash-tap"))) @@ -2144,13 +2497,25 @@ in-memory footprint at the cost of packing and unpacking.") (string-append "BASH_TAP_ROOT=" bash-tap "/bin\n")) ((".*bash-tap-bootstrap") (string-append ". " bash-tap "/bin/bash-tap-bootstrap"))) - ;; Lets skip the 4 failing tests for now. They fail with our + (substitute* "test/t/02_vg_construct.t" + (("../deps/fastahack/fastahack") (which "fastahack")) + (("../bin/vcf2tsv") (which "vcf2tsv"))) + ;; Lets skip the 9 failing tests for now. They fail with our ;; bash-tap and the bundled one. (substitute* "test/t/02_vg_construct.t" - ((".*the graph contains.*") "is $(true) \"\" \"\"\n")) + ((".*self-inconsistent.*") "is $(true) \"\" \"\"\n")) + (substitute* "test/t/07_vg_map.t" + ;; Change in fasta's output + (("identity\\) 1 \"") "identity) 1.0 \"")) (substitute* '("test/t/07_vg_map.t" "test/t/33_vg_mpmap.t") ((".*node id.*") "is $(true) \"\" \"\"\n")) + (substitute* "test/t/48_vg_convert.t" + (("true \"vg.*") "true \"true\"\n")) + (substitute* "test/t/50_vg_giraffe.t" + ((".*A long read can.*") "is $(true) \"\" \"\"\n") + ((".*A long read has.*") "is $(true) \"\" \"\"\n") + ((".*Long read minimizer.*") "is $(true) \"\" \"\"\n")) ;; Don't test the docs, we're not providing npm (substitute* "Makefile" ((".*test-docs.*") ""))))) @@ -2162,47 +2527,50 @@ in-memory footprint at the cost of packing and unpacking.") (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (install-file "bin/vg" (string-append out "/bin")) - (install-file "lib/libvg.a" (string-append out "/lib")) + ;(install-file "lib/libvg.so" (string-append out "/lib")) (for-each (lambda (file) (install-file file (string-append out "/share/man/man1"))) (find-files "doc/man" "\\.1$")))))) #:test-target "test")) (native-inputs - `(,@(if (member (%current-system) - (package-transitive-supported-systems ruby-asciidoctor)) - `(("asciidoctor" ,ruby-asciidoctor)) - '()) - ("bash-tap" ,bash-tap) - ("bc" ,bc) - ("cmake" ,cmake-minimal) - ("jq" ,jq) - ("perl" ,perl) - ("pkg-config" ,pkg-config) - ("samtools" ,samtools) - ("util-linux" ,util-linux) - ("which" ,which) - ("xxd" ,xxd))) + (append + (if (supported-package? ruby-asciidoctor) + (list ruby-asciidoctor) + '()) + (list bash-tap + bc + cmake-minimal + jq + perl + pkg-config + samtools + util-linux + which + xxd))) (inputs - `(("boost" ,boost) - ("cairo" ,cairo) - ("curl" ,curl) - ("elfutils" ,elfutils) - ("fastahack" ,fastahack) - ("htslib" ,htslib) - ("jansson" ,jansson) - ("jemalloc" ,jemalloc) - ("libdivsufsort" ,libdivsufsort) - ("ncurses" ,ncurses) - ("protobuf" ,protobuf) - ("raptor2" ,raptor2) - ("sdsl-lite" ,sdsl-lite) - ("smithwaterman" ,smithwaterman) - ("snappy" ,snappy) - ("sparsehash" ,sparsehash) - ("tabixpp" ,tabixpp) - ("vcflib" ,vcflib) - ("zlib" ,zlib))) + (list boost + cairo + curl + elfutils + fastahack + htslib + jansson + jemalloc + libdeflate + libdivsufsort + ncurses + openmpi + protobuf + raptor2 + sdsl-lite + smithwaterman + snappy + sparsehash + tabixpp + vcflib + zlib + (list zstd "lib"))) (home-page "https://www.biostars.org/t/vg/") (synopsis "Tools for working with genome variation graphs") (description "Variation graphs provide a succinct encoding of the sequences @@ -2216,7 +2584,8 @@ gene models and transcripts) as walks through nodes connected by edges @end enumerate This model is similar to sequence graphs that have been used in assembly and multiple sequence alignment.") - (properties `((release-monitoring-url . "https://github.com/vgteam/vg/releases"))) + (properties `((release-monitoring-url . "https://github.com/vgteam/vg/releases") + (tunable? . #t))) (license (list license:expat ; main program @@ -2227,6 +2596,162 @@ multiple sequence alignment.") license:zlib ; deps/sonLib/externalTools/cutest license:boost1.0)))) ; catch.hpp +(define-public pggb + (let ((commit "9ebff27320382e470ed38a85b4448402e1e7c353") + (revision "1")) + (package + (name "pggb") + (version (git-version "0.5.1" revision commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pangenome/pggb") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0rgpj52q3ai7f1saqbilgx5gz4f403x3427wq649qwv84ivmi1sf")))) + (build-system copy-build-system) + (arguments + (list + #:install-plan + #~'(("pggb" "bin/") + ("partition-before-pggb" "bin/") + ("scripts/" "bin/") + ("scripts" "bin/scripts")) + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'force-python3 + (lambda _ + (substitute* (find-files "scripts" "\\.py$") + (("/usr/bin/python") "/usr/bin/python3")))) + (add-before 'install 'patch-and-wrap-scripts + (lambda* (#:key inputs #:allow-other-keys) + (substitute* "scripts/vcf_preprocess.sh" + (("bcftools ") + (string-append (search-input-file inputs "/bin/bcftools") " "))) + (wrap-script "scripts/net2communities.py" + `("GUIX_PYTHONPATH" ":" prefix + (,(getenv "GUIX_PYTHONPATH")))))) + (add-after 'install 'wrap-scripts + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (for-each + (lambda (file) + (wrap-script file + `("R_LIBS_SITE" ":" prefix + (,(getenv "R_LIBS_SITE"))) + `("PATH" ":" prefix + ,(map (lambda (input) (string-append input "/bin")) + '#$(map (lambda (label) + (or (this-package-input (string-append label "-hwcaps")) + (this-package-input label))) + (list "bc" + "bcftools" + "bedtools" + "gfaffix" + "htslib" + "fastix" + "multiqc" + "mummer" + "odgi" + "pafplot" + "parallel" + "pigz" + "python" + "r-data-table" + "r-minimal" + "rtg-tools" + "samtools" + "seqwish" + "smoothxg" + "time" + "vcfbub" + "vcflib" + "vg" + "wfmash")))))) + (list (string-append out "/bin/pggb") + (string-append out "/bin/partition-before-pggb") + (string-append out "/bin/gfa2evaluation.sh") + (string-append out "/bin/scripts/gfa2evaluation.sh")))))) + (add-after 'install 'substitute-file-paths + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (substitute* (string-append out "/bin/gfa2evaluation.sh") + (("/usr/local/bin/vcf_preprocess.sh") + (string-append out "/bin/vcf_preprocess.sh")) + (("/usr/local/bin/nucmer2vcf.R") + (string-append out "/bin/nucmer2vcf.R"))))))))) + (inputs + (list bc + bcftools + bedtools + gfaffix + guile-3.0 ; for wrap-script + htslib ; tabix + fastix + multiqc + mummer + odgi + pafplot + parallel + pigz + python + python-igraph + r-data-table + r-minimal + rtg-tools + samtools + seqwish + smoothxg + time + vcfbub + vcflib + vg + wfmash)) + (home-page "https://doi.org/10.1101/2023.04.05.535718") + (synopsis "PanGenome Graph Builder") + (description "@command{pggb} builds +@url{https://doi.org/10.1146%2Fannurev-genom-120219-080406, pangenome} +@url{https://doi.org/10.1038/nbt.4227, variation graphs} from a set of input +sequences. +A pangenome variation graph is a kind of generic multiple sequence alignment. +It lets us understand any kind of sequence variation between a collection of +genomes. It shows us similarity where genomes walk through the same parts of +the graph, and differences where they do not. +@command{pggb} generates this kind of graph using an all-to-all alignment of +input sequences (@url{https://github.com/waveygang/wfmash, wfmash}), graph +induction (@url{https://doi.org/10.1101/2022.02.14.480413, seqwish}), and +progressive normalization (@url{https://github.com/pangenome/smoothxg, +smoothxg}, @url{https://github.com/marschall-lab/GFAffix, gfaffix}). After +construction, @command{pggb} generates diagnostic visualizations of the graph +(@url{https://doi.org/10.1093/bioinformatics/btac308, odgi}). A variant call +report (in VCF) representing both small and large variants can be generated +based on any reference genome included in the graph +(@url{https://github.com/vgteam/vg, vg}). @command{pggb} writes its output in +@url{https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md, GFAv1} format, +which can be used as input by numerous \"genome graph\" and pangenome tools, +such as the @url{https://github.com/vgteam/vg, vg} and +@url{https://doi.org/10.1093/bioinformatics/btac308, odgi} toolkits. +@command{pggb} has been tested at scale in the @acronym{Human Pangenome +Reference Consortium, HPRC} as a method to build a graph from the +@url{https://doi.org/10.1101/2022.07.09.499321, draft human pangenome}.") + (license license:expat)))) + +(define use-glibc-hwcaps + (package-input-rewriting/spec + ;; Replace some packages with ones built targeting custom packages build + ;; with glibc-hwcaps support. + `(;("gsl" . ,(const gsl-hwcaps)) ; Causes too many rebuilds through multiqc + ("sdsl-lite" . ,(const sdsl-lite-hwcaps)) + ("seqwish" . ,(const seqwish-hwcaps)) + ("odgi" . ,(const odgi-hwcaps)) + ("wfmash" . ,(const wfmash-hwcaps))))) + +(define-public pggb-with-hwcaps + (package + (inherit (use-glibc-hwcaps pggb)) + (name "pggb-with-hwcaps"))) + (define-public ucsc-genome-browser (package (name "ucsc-genome-browser") @@ -3936,3 +4461,200 @@ automatically vectorize for different architectures without adapting the code.") (substitute-keyword-arguments (package-arguments wfa2-lib) ((#:make-flags flags ''()) #~(cons "CC_FLAGS+=-static" #$flags)))))) + +(define-public r-rrbgen + (package + (name "r-stitch") + (version "0.0.6") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/rwdavies/rrbgen/releases/download/" + version "/rrbgen_" version ".tar.gz")) + (sha256 + (base32 + "1vhqy8licl2pkzar4aag0q5fhnb3fdch8acyjh9445ia42z01z9c")))) + (build-system r-build-system) + (propagated-inputs + (list r-rcpp + r-rcpparmadillo)) + (home-page "https://github.com/rwdavies/rrbgen") + (synopsis "Lightweight limited functionality R bgen read/write library") + (description "@code{r-rrbgen} supports v1.3 of the bgen format. It supports reading +and writing using 8, 16, 24 or 32 bits per probability, using Layout = +2 and CompressedSNPBlocks = 1, for bi-allelic SNPs with samples of +ploidy 2. Any other format specification may crash unexpectedly +without a properly defined error.") + (license license:gpl3))) + +(define-public seqlib + (package + (name "seqlib") + (version "0.1.4") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/Zilong-Li/SeqLib") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1hczg1swghnxm6af74l09crdgf7l282jabmyck9mi5bk6vg9s1pn")))) + (build-system gnu-build-system) + (arguments + (list #:phases + #~(modify-phases %standard-phases + ;; Patch build scripts to unbundle htslib and build a + ;; seqlib shared library using libtool. + (add-after 'unpack 'patch-build-scripts + (lambda _ + ;; Initialize libtool. + (substitute* "configure.ac" + (("AM_INIT_AUTOMAKE\\(foreign\\)\n" all) + (string-append all "LT_INIT\n"))) + (substitute* "Makefile.am" + ;; Install headers + (("^SUBDIRS" all) + (string-append "nobase_include_HEADERS = " + (string-join (find-files "SeqLib")) + "\n" all)) + ;; Do not recurse into htslib submodule. + (("htslib") "") + ;; Remove install target override. + (("^install:") "") + (("^\tmkdir -p lib && cp src/libseqlib.a /libhts.a lib") "")) + (substitute* "src/Makefile.am" + ;; Build libtool library. + (("noinst_LIBRARIES = libseqlib\\.a") + "lib_LTLIBRARIES = libseqlib.la\nlibseqlib_la_LIBADD = -ljsoncpp") + (("libseqlib\\.a") "libseqlib.la") + (("libseqlib_a") "libseqlib_la")) + (substitute* (list "SeqLib/BamHeader.h" + "SeqLib/BamRecord.h" + "SeqLib/RefGenome.h" + "src/ReadFilter.cpp") + ;; Patch path to htslib headers. + (("\"htslib/htslib/([^\"]*)\"" all header) + (string-append "<htslib/" header ">")))))))) + (inputs + (list zlib)) + (native-inputs + (list autoconf automake libtool)) + ;; seqlib headers include headers from htslib and jsoncpp. So, + ;; they are propagated inputs. + (propagated-inputs + (list htslib jsoncpp)) + (home-page "https://github.com/Zilong-Li/SeqLib") + (synopsis "C++ htslib interface for manipulating sequence data and VCF") + (description "@code{seqlib} is a C++ htslib interface for manipulating sequence data +and VCF files.") + (license (list license:expat ; SeqLib/IntervalTree.h, SeqLib/aho_corasick.hpp, + ; json/json-forwards.h, json/json.h, src/jsoncpp.cpp, src/ssw.c, + license:asl2.0)))) ; main license + +(define-public vcfpp + (package + (name "vcfpp") + (version "0.3.3") + (source (origin + (method url-fetch) + (uri (string-append "https://github.com/Zilong-Li/vcfpp/releases/download/v" + version "/vcfpp.h")) + (sha256 + (base32 + "1wq76wz81y09ic37z30vljqnczhwx2qijav0nfvg6xi8wd2c75n3")))) + (build-system copy-build-system) + (arguments + (list #:install-plan #~'(("vcfpp.h" "include/vcfpp/vcfpp.h")))) + (home-page "https://github.com/Zilong-Li/vcfpp") + (synopsis "C++ API of htslib") + (description "@code{vcfpp} is a single C++ file as +interface to the basic htslib. It can be easily included in a C++ +program for scripting high-performance genomic analyses.") + (license license:asl2.0))) + +(define-public r-stitch + (package + (name "r-stitch") + (version "1.6.10") + (source + ;; The release tarball bundles dependencies. So, use git-fetch. + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/rwdavies/STITCH") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0iy5fq2l5a35xdxqaf9ypj56da57qmwppwqmh9nflbvmbc7kgbkf")))) + (build-system r-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'chdir + (lambda _ + (chdir "STITCH"))) + (add-after 'chdir 'patch-build-system + (lambda _ + (substitute* "src/Makevars" + (("\\$\\(SEQLIB_ROOT\\)/src/libseqlib.a") "-lseqlib") + (("\\$\\(SEQLIB_ROOT\\)/htslib/libhts.a") "-lhts") + ((": SeqLib") ":"))))))) + (inputs + (list curl htslib seqlib zlib)) + (native-inputs + (list autoconf automake vcfpp)) + (propagated-inputs + (list r-data-table r-rrbgen + ;; FIXME: These should be inputs that are substituted into + ;; the source. But, for some reason, the reference scanner + ;; does not pick them up that way. + coreutils findutils htslib rsync)) + (home-page "https://github.com/rwdavies/STITCH") + (synopsis "Sequencing to imputation through constructing haplotypes") + (description "@code{r-stitch} is an R program for reference panel free, +read aware, low coverage sequencing genotype imputation. STITCH runs +on a set of samples with sequencing reads in BAM format, as well as a +list of positions to genotype, and outputs imputed genotypes in VCF +format.") + (license license:gpl3))) + +(define-public hifiasm + (package + (name "hifiasm") + (version "0.19.8") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/chhylp123/hifiasm") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1g6m2qdc0224vjaic87669g7y9ky1yps07qbjkmbh1vakz4zmgvr")))) + (build-system gnu-build-system) + (arguments + (list #:tests? #f + #:phases + #~(modify-phases %standard-phases + (delete 'configure) + (replace 'install + (lambda _ + (install-file "hifiasm" (string-append #$output "/bin")) + (install-file "hifiasm.1" (string-append #$output "/share/man/man1"))))))) + (inputs + (list zlib)) + (home-page "https://github.com/chhylp123/hifiasm") + (synopsis "haplotype-resolved assembler for accurate Hifi reads") + (description "Hifiasm is a fast haplotype-resolved de-novo assembler originally +designed for PacBio HiFi reads. Its latest release supports the +telomere-to-telomere assembly by utilizing ultralong Oxford Nanopore +reads. Hifiasm produces arguably the best single-sample +telomere-to-telomere assemblies combing HiFi, ultralong and Hi-C +reads, and it is one of the best haplotype-resolved assemblers for the +trio-binning assembly given parental short reads. For a human genome, +hifiasm can produce the telomere-to-telomere assembly in one day.") + (license license:expat))) |