From c6ec09ebb994e6b281a513be8465d81ed0dfc003 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 10 Nov 2022 17:56:36 +0200 Subject: gn: Add pgr-tk --- gn/packages/bioinformatics.scm | 111 +++++++++++++++++++++++++ gn/packages/crates-io.scm | 182 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 293 insertions(+) (limited to 'gn') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 2d1b55f..4e3c92b 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -54,6 +54,7 @@ #:use-module (gnu packages java) #:use-module (gnu packages jemalloc) #:use-module (gnu packages linux) + #:use-module (gnu packages llvm) #:use-module (gnu packages machine-learning) #:use-module (gnu packages maths) #:use-module (gnu packages mpi) @@ -510,6 +511,116 @@ reads.") collapses them into a non-redundant graph structure.") (license license:expat))) +(define-public pgr-tk + (package + (name "pgr-tk") + (version "0.3.6") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/Sema4-Research/pgr-tk") + (commit (string-append "v" version)) + (recursive? #t))) ; agc, WFA + (file-name (git-file-name name version)) + (sha256 + (base32 "160ngqbi8cbgaafq8crhfqv039mxr9jgl8hxxpwz0fh8mrh4003y")) + (snippet + #~(begin + (use-modules (guix build utils)) + (substitute* (find-files "." "Cargo.toml") + ;; Only use the major+minor version to decrease the number of + ;; special version crates. + (("(.*= \")([[:digit:]]+\\.[[:digit:]]+)\\.[[:digit:]]+(\".*)" + _ name version tail) + (string-append name version tail)) + ;; Then fix the version string for the actual package. + (("^version = \".*") + (string-append "version = \"" #$version "\"\n"))))))) + (build-system cargo-build-system) + (arguments + `(#:install-source? #f + #:cargo-test-flags + (list "--release" "--" + "--skip=get_aln_segements" + "--skip=get_shmmr_dots" + "--skip=AGCFile" + "--skip=SeqIndexDB") + #:cargo-inputs + (("rust-bindgen" ,rust-bindgen-0.58) + ("rust-bgzip" ,rust-bgzip-0.2) + ("rust-byteorder" ,rust-byteorder-1) + ("rust-clap" ,rust-clap-3.1) + ("rust-cuckoofilter" ,rust-cuckoofilter-0.5) + ("rust-flate2" ,rust-flate2-1) + ("rust-libc" ,rust-libc-0.2) + ("rust-log" ,rust-log-0.4) + ("rust-petgraph" ,rust-petgraph-0.6) + ("rust-pyo3" ,rust-pyo3-0.14) + ("rust-rayon" ,rust-rayon-1) + ("rust-regex" ,rust-regex-1) + ("rust-rustc-hash" ,rust-rustc-hash-1) + ("rust-serde" ,rust-serde-1) + ("rust-serde-json" ,rust-serde-json-1) + ("rust-simple-logger" ,rust-simple-logger-1)) + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'insert-wfa-source + (lambda* (#:key inputs #:allow-other-keys) + (copy-recursively (assoc-ref inputs "wfa-src") + "rs-wfa/WFA"))) + (add-after 'unpack 'adjust-source + (lambda _ + (substitute* '("pgr-bin/build.rs" + "pgr-db/build.rs" + "pgr-tk/build.rs") + (("git") "ls") + (("bioconda") "Guix")) + ;; Build with zlib, not zlib-ng + (substitute* '("pgr-bin/Cargo.toml" + "pgr-db/Cargo.toml") + (("zlib-ng-compat") "zlib")))) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (with-directory-excursion "target/release" + (install-file "libpgrtk.so" (string-append out "/lib")) + (for-each + (lambda (file) + (install-file file (string-append out "/bin"))) + (list "pgr-filter" + "pgr-mdb" + "pgr-multifilter" + "pgr-probe-match" + "pgr-shmmr-pair-count"))))))))) + (inputs (list clang python zlib)) + (native-inputs + `(("pkg-config" ,pkg-config) + ("wfa-src" + ,(origin + (method git-fetch) + (uri (git-reference + ;; forPYO3 branch, 14-03-2021 + (url "https://github.com/cschin/WFA") + (commit "1f8c8d2905ed482cd2d306a1676d60c2a45cb098"))) + (file-name "wfa-for-pgr-tk") + (sha256 + (base32 "19h1cjp2bdlcfq5c6rsbk8bc0f8zn64b471dhj4xlfxd1prv2dpk")))))) + (home-page "https://github.com/Sema4-Research/pgr-tk") + (synopsis "Pangenome Research Tool Kit") + (description + "PGR-TK provides pangenome assembly management, query and +@acronym{Minimizer Anchored Pangenome, MAP} Graph Generation. It is a project +to provide Python and Rust libraries to facilitate pangenomics analysis. +Several algorithms and data structures used for the Peregrine Genome Assembler +are useful for Pangenomics analysis as well. This repo takes those algorithms +and data structure, combining other handy 3rd party tools to expose them as a +library in Python (with Rust code for those computing parts that need +performance.)") + (license (license:non-copyleft + "file:///LICENSE" + "CC-BY-NC-SA 4.0")))) + (define-public pangenie (let ((commit "e779076827022d1416ab9fabf99a03d8f4725956") ; September 2, 2021 from phasing-tests branch (revision "2")) diff --git a/gn/packages/crates-io.scm b/gn/packages/crates-io.scm index 8ea1765..847b2ef 100644 --- a/gn/packages/crates-io.scm +++ b/gn/packages/crates-io.scm @@ -9,6 +9,31 @@ #:use-module (gnu packages crates-io) #:use-module (gnu packages maths)) +(define-public rust-bgzip-0.2 + (package + (name "rust-bgzip") + (version "0.2.1") + (source (origin + (method url-fetch) + (uri (crate-uri "bgzip" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1kssq4hp8csg27rhggabpfiyn9xp5rh5b8al63dghk11vqs7hk5j")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-flate2" ,rust-flate2-1) + ("rust-thiserror" ,rust-thiserror-1)) + #:cargo-development-inputs + (("rust-clap" ,rust-clap-2) + ("rust-csv" ,rust-csv-1) + ("rust-tempfile" ,rust-tempfile-3)))) + (home-page "https://github.com/informationsea/bgzip-rs") + (synopsis "Rust implementation of bgzip") + (description "Rust implementation of bgzip") + (license license:expat))) + (define-public rust-boomphf-0.5 (package (name "rust-boomphf") @@ -40,6 +65,36 @@ for massive key sets}. It generates an @acronym{MPHF, minimal perfect hash functions} for a collection of hashable objects.") (license license:expat))) +(define-public rust-cuckoofilter-0.5 + (package + (name "rust-cuckoofilter") + (version "0.5.0") + (source (origin + (method url-fetch) + (uri (crate-uri "cuckoofilter" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "067fkr9dc118rqddr72xdldq05d31yyipvvyrmj9yrrik52ah45q")))) + (build-system cargo-build-system) + (arguments + `(#:skip-build? #t ; Not packaging dependencies + #:cargo-inputs + (("rust-byteorder" ,rust-byteorder-1) + ;("rust-clippy" ,rust-clippy-0.0.302) + ;("rust-farmhash" ,rust-farmhash-1) + ("rust-fnv" ,rust-fnv-1) + ("rust-rand" ,rust-rand-0.7) + ("rust-serde" ,rust-serde-1) + ("rust-serde-bytes" ,rust-serde-bytes-0.11) + ("rust-serde-derive" ,rust-serde-derive-1)) + #:cargo-development-inputs + (("rust-serde-json" ,rust-serde-json-1)))) + (home-page "http://axiom.co") + (synopsis "Cuckoo Filter: Practically Better Than Bloom") + (description "Cuckoo Filter: Practically Better Than Bloom") + (license license:expat))) + (define-public rust-gfa-0.6 (package (name "rust-gfa") @@ -146,6 +201,133 @@ functions} for a collection of hashable objects.") (description "Key String provides a Rust package optimized for map keys.") (license (list license:expat license:asl2.0)))) +(define-public rust-pyo3-0.14 + (package + (name "rust-pyo3") + (version "0.14.5") + (source (origin + (method url-fetch) + (uri (crate-uri "pyo3" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1j70b9bkncgpgnfxjxyvp4mk40rp55lk6qmacxm5c2k78y9hy41m")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-cfg-if" ,rust-cfg-if-1) + ("rust-hashbrown" ,rust-hashbrown-0.11) + ("rust-indexmap" ,rust-indexmap-1) + ("rust-indoc" ,rust-indoc-0.3) + ("rust-inventory" ,rust-inventory-0.1) + ("rust-libc" ,rust-libc-0.2) + ("rust-num-bigint" ,rust-num-bigint-0.4) + ("rust-num-complex" ,rust-num-complex-0.4) + ("rust-parking-lot" ,rust-parking-lot-0.11) + ("rust-paste" ,rust-paste-0.1) + ("rust-pyo3-build-config" ,rust-pyo3-build-config-0.14) + ("rust-pyo3-macros" ,rust-pyo3-macros-0.14) + ("rust-serde" ,rust-serde-1) + ("rust-unindent" ,rust-unindent-0.1)) + #:cargo-development-inputs + (("rust-assert-approx-eq" ,rust-assert-approx-eq-1) + ("rust-criterion" ,rust-criterion-0.3) + ("rust-proptest" ,rust-proptest-0.10) + ("rust-rustversion" ,rust-rustversion-1) + ("rust-serde-json" ,rust-serde-json-1) + ("rust-trybuild" ,rust-trybuild-1)))) + (inputs (list python)) + (home-page "https://github.com/pyo3/pyo3") + (synopsis "Rust bindings for the Python interpreter") + (description + "This package provides Rust bindings for Python, including tools for +creating native Python extension modules. Running and interacting with +Python code from a Rust binary is also supported.") + (license license:asl2.0))) + +(define-public rust-pyo3-build-config-0.14 + (package + (name "rust-pyo3-build-config") + (version "0.14.5") + (source (origin + (method url-fetch) + (uri (crate-uri "pyo3-build-config" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "13vkcl49b2x81azb613ss256k8pazrfc4fy4ny8pzgdciirn2afi")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs (("rust-once-cell" ,rust-once-cell-1)))) + (home-page "https://github.com/pyo3/pyo3") + (synopsis "Build configuration for the PyO3 ecosystem") + (description "Build configuration for the PyO3 ecosystem") + (license license:asl2.0))) + +(define-public rust-pyo3-macros-0.14 + (package + (name "rust-pyo3-macros") + (version "0.14.5") + (source (origin + (method url-fetch) + (uri (crate-uri "pyo3-macros" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1a4fh24c5q85f31n2rwbqrai2bjprf9kzh6xvpgj8j3hblhwa2zw")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-pyo3-macros-backend" ,rust-pyo3-macros-backend-0.14) + ("rust-quote" ,rust-quote-1) + ("rust-syn" ,rust-syn-1)))) + (home-page "https://github.com/pyo3/pyo3") + (synopsis "Proc macros for PyO3 package") + (description "Proc macros for PyO3 package") + (license license:asl2.0))) + +(define-public rust-pyo3-macros-backend-0.14 + (package + (name "rust-pyo3-macros-backend") + (version "0.14.5") + (source (origin + (method url-fetch) + (uri (crate-uri "pyo3-macros-backend" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "1ps068jqyq1275zxxbzn6hyz9lkfz35az8waj6mzlji2jg2kyqki")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs + (("rust-proc-macro2" ,rust-proc-macro2-1) + ("rust-pyo3-build-config" ,rust-pyo3-build-config-0.14) + ("rust-quote" ,rust-quote-1) + ("rust-syn" ,rust-syn-1)))) + (home-page "https://github.com/pyo3/pyo3") + (synopsis "Code generation for PyO3 package") + (description "Code generation for PyO3 package") + (license license:asl2.0))) + +(define-public rust-quick-csv-0.1 + (package + (name "rust-quick-csv") + (version "0.1.6") + (source (origin + (method url-fetch) + (uri (crate-uri "quick-csv" version)) + (file-name (string-append name "-" version ".tar.gz")) + (sha256 + (base32 + "131k8zzlplk2h62wz813jbvm0sk7v3mixwhhq34y9lmp3mqbgx7d")))) + (build-system cargo-build-system) + (arguments + `(#:cargo-inputs (("rust-rustc-serialize" ,rust-rustc-serialize-0.3)))) + (home-page "https://github.com/tafia/quick-csv") + (synopsis "quick csv reader and decoder") + (description "quick csv reader and decoder") + (license license:expat))) + (define-public rust-succinct-0.5 (package (name "rust-succinct") -- cgit v1.2.3