;; Bioinformatics module (define-module (gn packages bioinformatics) #:use-module ((guix licenses) #:prefix license:) #:use-module (guix packages) #:use-module (guix gexp) #:use-module (guix utils) #:use-module (guix download) #:use-module (guix git-download) #:use-module (guix hg-download) #:use-module (guix build-system ant) #:use-module (guix build-system cargo) #:use-module (guix build-system cmake) #:use-module (guix build-system copy) #:use-module (guix build-system gnu) #:use-module (guix build-system meson) #:use-module (guix build-system ocaml) #:use-module (guix build-system python) #:use-module (guix build-system r) #:use-module (guix build-system trivial) #:use-module (guix build-system waf) #:use-module (gnu packages) #:use-module (gn packages crates-io) #:use-module (gn packages datastructures) #:use-module (gn packages java) #:use-module (gn packages maths) #:use-module (gn packages ocaml) #:use-module (gn packages python) #:use-module (gnu packages algebra) #:use-module (gnu packages assembly) #:use-module (gnu packages autotools) #:use-module (gnu packages base) #:use-module (gnu packages bash) #:use-module (gnu packages bioconductor) #:use-module (gnu packages bioinformatics) #:use-module (gnu packages boost) #:use-module (gnu packages bootstrap) #:use-module (gnu packages c) #:use-module (gnu packages check) #:use-module (gnu packages cmake) #:use-module (gnu packages compression) #:use-module (gnu packages cpp) #:use-module (gnu packages cran) #:use-module (gnu packages crates-io) #:use-module (gnu packages crates-graphics) #:use-module (gnu packages curl) #:use-module (gnu packages databases) #:use-module (gnu packages datastructures) #:use-module (gnu packages digest) #:use-module (gnu packages elf) #:use-module (gnu packages fontutils) #:use-module (gnu packages gcc) #:use-module (gnu packages ghostscript) #:use-module (gnu packages graph) #:use-module (gnu packages gtk) #:use-module (gnu packages guile) #:use-module (gnu packages image) #:use-module (gnu packages imagemagick) #:use-module (gnu packages java) #:use-module (gnu packages jemalloc) #:use-module (gnu packages linux) #:use-module (gnu packages llvm) #:use-module (gnu packages machine-learning) #:use-module (gnu packages maths) #:use-module (gnu packages mpi) #:use-module (gnu packages ncurses) #:use-module (gnu packages ocaml) #:use-module (gnu packages parallel) #:use-module (gnu packages perl) #:use-module (gnu packages pkg-config) #:use-module (gnu packages protobuf) #:use-module (gnu packages python) #:use-module (gnu packages python-build) #:use-module (gnu packages python-science) #:use-module (gnu packages python-web) #:use-module ((gnu packages python-xyz) #:hide (python2-six)) #:use-module (gnu packages rdf) #:use-module (gnu packages readline) #:use-module (gnu packages rsync) #:use-module (gnu packages ruby) #:use-module (gnu packages rust) #:use-module (gnu packages serialization) #:use-module (gnu packages shells) #:use-module (gnu packages statistics) #:use-module (gnu packages sqlite) #:use-module (gnu packages tcl) #:use-module (gnu packages time) #:use-module (gnu packages tls) #:use-module (gnu packages vim) #:use-module (gnu packages web) #:use-module (past packages python27)) (define-public contra (package (name "contra") (version "2.0.6") (source (origin (method url-fetch) (uri (string-append "mirror://sourceforge/contra-cnv/CONTRA.V" (version-major+minor version) "/CONTRA.v" version ".tar.gz")) (sha256 (base32 "0agpcm2xh5f0i9n9sx1kvln6mzdksddmh11bvzj6bh76yw5pnw91")) (modules '((guix build utils))) (snippet '(begin (delete-file "BEDTools.v2.11.2.tar.gz") #t)))) (build-system gnu-build-system) (propagated-inputs `(("python" ,python-2) ("r" ,r) ;; ("r-dnacopy" ,r-dnacopy) <-- missing in Pjotr's tree ("bedtools" ,bedtools) ("samtools" ,samtools))) (arguments `(#:tests? #f ; There are no tests. #:phases (modify-phases %standard-phases (delete 'configure) (delete 'build) ; We can use Guix's BEDtools instead. (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (bin (string-append out "/bin")) (doc (string-append out "/share/doc/contra"))) (copy-recursively "scripts" (string-append bin "/scripts")) (install-file "contra.py" bin) (install-file "baseline.py" bin) ;; There's only a pre-built PDF available. (install-file "CONTRA_User_Guide.2.0.pdf" doc)) #t))))) (home-page "http://contra-cnv.sourceforge.net/") (synopsis "Tool for copy number variation (CNV) detection for targeted resequencing data") (description "CONTRA is a tool for copy number variation (CNV) detection for targeted resequencing data such as those from whole-exome capture data. CONTRA calls copy number gains and losses for each target region with key strategies including the use of base-level log-ratios to remove GC-content bias, correction for an imbalanced library size effect on log-ratios, and the estimation of log-ratio variations via binning and interpolation. It takes standard alignment formats (BAM/SAM) and outputs in variant call format (VCF 4.0) for easy integration with other next generation sequencing analysis package.") (license license:gpl3+))) (define boost-delly (package (inherit boost) (name "boost-delly") (version "1.57.0") (source (origin (method url-fetch) (uri (string-append "mirror://sourceforge/boost/boost_" (string-map (lambda (x) (if (eq? x #\.) #\_ x)) version) ".tar.bz2")) (sha256 (base32 "0rs94vdmg34bwwj23fllva6mhrml2i7mvmlb11zyrk1k5818q34i")))))) (define-public delly (package (name "delly") (version "0.7.2") (source (origin (method url-fetch) (uri (string-append "https://github.com/tobiasrausch/delly/archive/v" version ".tar.gz")) (sha256 (base32 "173mmg43dbxqkyq0kiffz63xbmggr2kzd55mwxci9yfh5md1zprn")) (patches (list (search-patch "delly-use-system-libraries.patch"))))) (build-system gnu-build-system) (native-inputs `(("python" ,python-2))) (inputs `(("boost" ,boost-delly) ; Use version 1.57.0 instead. ("htslib" ,htslib) ("zlib" ,zlib) ("bzip2" ,bzip2))) (arguments `(#:tests? #f ; There are no tests to run. #:phases (modify-phases %standard-phases (delete 'configure) ; There is no configure phase. (replace 'install (lambda _ (let ((bin (string-append (assoc-ref %outputs "out") "/bin"))) (install-file "src/cov" bin) (install-file "src/delly" bin) (install-file "src/extract" bin) (install-file "src/iover" bin) (install-file "src/stats" bin))))))) (home-page "https://github.com/tobiasrausch/delly") (synopsis "Integrated structural variant prediction method") (description "Delly is an integrated structural variant prediction method that can discover and genotype deletions, tandem duplications, inversions and translocations at single-nucleotide resolution in short-read massively parallel sequencing data. It uses paired-ends and split-reads to sensitively and accurately delineate genomic rearrangements throughout the genome. Structural variants can be visualized using Delly-maze and Delly-suave.") (license license:gpl3))) (define-public wfmash-x86-64-v2 (package/inherit wfmash (name "wfmash-x86-64-v2") (arguments (substitute-keyword-arguments (package-arguments wfmash) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v2")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-binary (lambda _ (delete-file-recursively (string-append #$output "/bin")))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t))))) (define-public wfmash-x86-64-v3 (package/inherit wfmash (name "wfmash-x86-64-v3") (arguments (substitute-keyword-arguments (package-arguments wfmash) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v3")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-binary (lambda _ (delete-file-recursively (string-append #$output "/bin")))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t))))) (define-public wfmash-x86-64-v4 (package/inherit wfmash (name "wfmash-x86-64-v4") (arguments (substitute-keyword-arguments (package-arguments wfmash) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v4")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-binary (lambda _ (delete-file-recursively (string-append #$output "/bin")))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t))))) ;; This copy of wfmash will automatically use the libraries that target the ;; x86_64 psABI which the hardware supports. (define-public wfmash-hwcaps (package/inherit wfmash (name "wfmash-hwcaps") (arguments (substitute-keyword-arguments (package-arguments wfmash) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'install-optimized-libraries (lambda* (#:key inputs outputs #:allow-other-keys) (let ((hwcaps "/lib/glibc-hwcaps")) (copy-recursively (string-append (assoc-ref inputs "wfmash-x86-64-v2") hwcaps "/x86-64-v2") (string-append #$output hwcaps "/x86-64-v2")) (copy-recursively (string-append (assoc-ref inputs "wfmash-x86-64-v3") hwcaps "/x86-64-v3") (string-append #$output hwcaps "/x86-64-v3")) (copy-recursively (string-append (assoc-ref inputs "wfmash-x86-64-v4") hwcaps "/x86-64-v4") (string-append #$output hwcaps "/x86-64-v4"))))))))) (native-inputs (modify-inputs (package-native-inputs wfmash) (append wfmash-x86-64-v2 wfmash-x86-64-v3 wfmash-x86-64-v4))) (properties `((tunable? . #f))))) (define-public freec (package (name "control-freec") (version "8.7") (source (origin (method url-fetch) (uri "http://bioinfo-out.curie.fr/projects/freec/src/FREEC_Linux64.tar.gz") (file-name (string-append name "-" version ".tar.gz")) (sha256 (base32 "12sl7gxbklhvv0687qjhml1z4lwpcn159zcyxvawvclsrzqjmv0h")))) (build-system gnu-build-system) ;; The source code's filename indicates only a 64-bit Linux build. ;; We need to investigate whether this is true. (supported-systems '("x86_64-linux")) (arguments `(#:phases (modify-phases %standard-phases ;; There's no configure phase because there are no external ;; dependencies. (delete 'configure) ;; There are no tests. (delete 'check) (replace 'unpack (lambda* (#:key source #:allow-other-keys) (and (zero? (system* "mkdir" "source")) (with-directory-excursion "source" (zero? (system* "tar" "xvf" source)))))) (replace 'build (lambda* (#:key inputs #:allow-other-keys) (with-directory-excursion "source" (zero? (system* "make"))))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) (install-file "source/freec" bin))))))) (home-page "http://bioinfo-out.curie.fr/projects/freec/") (synopsis "Tool for detection of copy-number changes and allelic imbalances (including LOH) using deep-sequencing data") (description "Control-FREEC automatically computes, normalizes, segments copy number and beta allele frequency (BAF) profiles, then calls copy number alterations and LOH. The control (matched normal) sample is optional for whole genome sequencing data but mandatory for whole exome or targeted sequencing data. For whole genome sequencing data analysis, the program can also use mappability data (files created by GEM). ") (license license:gpl2+))) (define-public plink2 (package (name "plink2") (version "1.90b3") (source (origin (method url-fetch) ;; https://github.com/chrchang/plink-ng/archive/v1.90b3.tar.gz (uri (string-append "https://github.com/chrchang/plink-ng/archive/v" version ".tar.gz")) (sha256 (base32 "03fzib1al5qkr9vxv63wxmv6y2pfb1rmir0h8jpi72r87hczqjig")) (patches (list (search-patch "plink-ng-Makefile-zlib.patch"))))) (build-system gnu-build-system) (arguments '(#:tests? #f ;no "check" target #:phases (modify-phases %standard-phases (delete 'configure) (replace 'build (lambda _ (zero? (system* "make" "-f" "Makefile.std")) )) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) (install-file "plink2" bin) #t)))))) (inputs `(("zlib" ,zlib) ("openblas" ,openblas) ;; ("atlas" ,atlas) ;; ("lapack" ,lapack) ("gfortran" ,gfortran) )) (native-inputs `(("unzip" ,unzip))) (home-page "https://www.cog-genomics.org/plink2") (synopsis "Whole genome association analysis toolset") (description "PLINK is a whole genome association analysis toolset, designed to perform a range of basic, large-scale analyses in a computationally efficient manner. The focus of PLINK is purely on analysis of genotype/phenotype data, so there is no support for steps prior to this (e.g. study design and planning, generating genotype or CNV calls from raw data). Through integration with gPLINK and Haploview, there is some support for the subsequent visualization, annotation and storage of results.") ;; Code is released under GPLv2, except for fisher.h, which is under ;; LGPLv2.1+ (license (list license:gpl2 license:lgpl2.1+)))) (define-public plink-ng-gn (let ((commit "5d1db4313ba0cc976562da233db4aced78975d10")) (package (name "plink-ng-gn") (version (string-append "1.90b3-" commit )) ; Aug 11, 2016 (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/genenetwork/plink-ng.git") (commit commit))) (file-name (string-append name "-" commit)) (sha256 (base32 "1366li3ks9076bblvd1rpzkjq4j8f8f08lhga4c1ckrkil3xww4m")))) ;; no longer (patches (list (search-patch "plink-ng-Makefile-zlib-git.patch"))))) (inputs `(("zlib" ,zlib) ("openblas" ,openblas) ;; ("atlas" ,atlas) ; openblas replaces atlas ("lapack" ,lapack) ; lapack is disabled in GUIX openblas ;; ("gfortran" ,gfortran) ;; ("python" ,python-2) ;; for tests - currently disabled )) (native-inputs `(("unzip" ,unzip))) (build-system gnu-build-system) (arguments '(#:tests? #f ;no "check" target. Some of the python-based tests fail #:phases (modify-phases %standard-phases (delete 'configure) (replace 'build (lambda _ (zero? (system* "make" "-f" "Makefile.guix")) )) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((bin (string-append (assoc-ref outputs "out") "/bin/"))) (install-file "plink2" bin) #t)))))) (home-page "https://www.cog-genomics.org/plink2") (synopsis "Whole genome association analysis toolset") (description "PLINK is a whole genome association analysis toolset, designed to perform a range of basic, large-scale analyses in a computationally efficient manner. The focus of PLINK is purely on analysis of genotype/phenotype data, so there is no support for steps prior to this (e.g. study design and planning, generating genotype or CNV calls from raw data). Through integration with gPLINK and Haploview, there is some support for the subsequent visualization, annotation and storage of results.") (license license:gpl3+)))) (define-public pindel (package (name "pindel") (version "0.2.5b8") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/genome/pindel.git") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "16a32fbgv1n58nfcxa1nyphrdrad80sgpinfa9p028n6plwycpww")))) (build-system gnu-build-system) (inputs `(("samtools" ,samtools) ("htslib" ,htslib) ("zlib" ,zlib))) (native-inputs `(("cppcheck" ,cppcheck) ("python" ,python-2) ("perl" ,perl))) (arguments `(#:phases (modify-phases %standard-phases (delete 'configure) ; There is no configure phase. ;; The build phase needs to run 'make' twice for the reasons described ;; below. (replace 'build (lambda* (#:key inputs #:allow-other-keys) ;; The first run creates a Makefile.local file. Make will report ;; the failure to find Makefile.local, but we can ignore this error. (system* "make" (string-append "SAMTOOLS=" (assoc-ref inputs "samtools"))) ;; The second run actually compiles the program. Now Makefile.local ;; is available, and we should treat an exiting make with an error as ;; a true error. (invoke "make"))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) (install-file "src/pindel" bin) (install-file "src/pindel2vcf" bin) (install-file "src/pindel2vcf4tcga" bin) (install-file "src/sam2pindel" bin)))) ;; There are multiple test targets, so in order to run all ;; tests, we must run the separate make targets. (replace 'check (lambda _ (for-each (lambda (target) (invoke "make" target)) '("acceptance-tests" "coverage-tests" "cppcheck" "functional-tests" "regression-tests"))))))) (home-page "https://github.com/genome/pindel") (synopsis "Structural variants detector for next-gen sequencing data") (description "Pindel can detect breakpoints of large deletions, medium sized insertions, inversions, tandem duplications and other structural variants at single-based resolution from next-gen sequence data. It uses a pattern growth approach to identify the breakpoints of these variants from paired-end short reads.") (license license:gpl3+))) (define-public varscan (package (name "varscan") (version "2.4.1") (source (origin (method url-fetch) (uri (string-append "https://github.com/dkoboldt/varscan/releases/download/v" version "/VarScan.v" version ".source.jar")) (sha256 (base32 "0y45ympkza7qwcbcisg006286pwjbr5978n03hx5nvl09f0mapk8")))) (build-system ant-build-system) (arguments `(#:tests? #f ; build.xml does not exist #:phases (modify-phases %standard-phases (replace 'unpack (lambda _ (mkdir "source") (chdir "source") ;; Unpack the Java archive containing the source files. (invoke "jar" "xf" (assoc-ref %build-inputs "source")) ;; Remove existing compiled output. (with-directory-excursion "net/sf/varscan/" (for-each (lambda (file) (delete-file file)) (find-files "." "^.java$" #:directories? #f))) #t)) (replace 'build (lambda _ ;; Compile the source files. (with-directory-excursion "net/sf/varscan/" (for-each (lambda (file) (invoke "javac" file)) (find-files "." ".java$" #:directories? #f))) ;; Construct the new Java archive. (apply invoke "jar" "cfm" (string-append "varscan-" ,version ".jar") "META-INF/MANIFEST.MF" (find-files "net/sf/varscan" ".java$")))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (string-append (assoc-ref outputs "out") "/share/java/varscan/"))) (install-file (string-append "varscan-" ,version ".jar") out)) #t))))) (home-page "https://dkoboldt.github.io/varscan/") (synopsis "Variant detection in massively parallel sequencing data") (description "Variant detection in massively parallel sequencing data.") ;; Free for non-commercial use by academic, government, and ;; non-profit/not-for-profit institutions (license (license:non-copyleft "file:///LICENSE")))) (define-public edirect-gn (deprecated-package "edirect-gn" edirect)) (define-public gfaffix (package (name "gfaffix") (version "0.1.5") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/marschall-lab/GFAffix") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 "181jxl8ldj39jgscyqzhz4l4k5kxj1j9hvzi8dxj59h2zzznb0kb")))) (build-system cargo-build-system) (arguments `(#:install-source? #f #:cargo-inputs (("rust-clap" ,rust-clap-3) ("rust-rustc-hash" ,rust-rustc-hash-1) ("rust-regex" ,rust-regex-1) ("rust-handlegraph" ,rust-handlegraph-0.7.0-alpha.9) ("rust-gfa" ,rust-gfa-0.10) ("rust-quick-csv", rust-quick-csv-0.1) ("rust-rayon" ,rust-rayon-1) ("rust-log" ,rust-log-0.4) ("rust-env-logger" ,rust-env-logger-0.7)))) (home-page "https://github.com/marschall-lab/GFAffix") (synopsis "Identify walk-preserving shared affixes in variation graphs") (description "GFAffix identifies walk-preserving shared affixes in variation graphs and collapses them into a non-redundant graph structure.") (license license:expat))) (define-public vcfbub (package (name "vcfbub") (version "0.1.0") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/pangenome/vcfbub") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "0sk2ab22z6qa00j1w8a8f5kbb7q2xb10fhd32zy4lh351v3mqmyg")))) (build-system cargo-build-system) (arguments `(#:install-source? #f #:cargo-inputs (("rust-clap" ,rust-clap-2) ("rust-flate2" ,rust-flate2-1) ("rust-vcf" ,rust-vcf-0.6)))) (home-page "https://github.com/pangenome/vcfbub") (synopsis "Popping bubbles in vg deconstruct VCFs") (description "The VCF output produced by a command like @command{vg deconstruct -e -a -H '#' ...} includes information about the nesting of variants. With @code{-a}, @code{--all-snarls}, we obtain not just the top level bubbles, but all nested ones. This exposed snarl tree information can be used to filter the VCF to obtain a set of non-overlapping sites (n.b. \"snarl\" is a generic model of graph bubbles including tips and loops). @code{vcfbub} lets us do two common operations on these VCFs: @enumerate @item We can filter sites by maximum level in the snarl tree. For instance, @code{--max-level 0} would keep only sites with @code{LV=0}. In practice, vg's snarl finder ensures that these are sites rooted on the main linear axis of the pangenome graph. Those at higher levels occur within larger variants. @item We can filter sites by maximum allele size, either for the reference allele or any allele. In this case, @code{--max-ref-length 10000} would keep only sites where the reference allele is less than 10kb long. Setting @code{--max-ref-length} or @code{--max-allele-length} additionally ensures that the output contains the bubbles nested inside of any popped bubble, even if they are at greater than @code{--max-level}. @end enumerate @code{vcfbub} accomplishes a simple task: we keep sites that are the children of those which we \"pop\" due to their size. These occur around complex large SVs, such as multi-Mbp inversions and segmental duplications. We often need to remove these, as they provide little information for many downstream applications, such as haplotype panels or other imputation references.") (license license:expat))) (define-public fastix (package (name "fastix") (version "0.1.0") (source (origin (method url-fetch) (uri (crate-uri "fastix" version)) (file-name (string-append name "-" version ".tar.gz")) (sha256 (base32 "1mzk65mg8vx0hz39xis6zqdmq56abhmza656gn9pgmlsn151gpx2")))) (build-system cargo-build-system) (arguments `(#:install-source? #f #:cargo-inputs (("rust-clap" ,rust-clap-2)) #:cargo-development-inputs (("rust-assert-cmd" ,rust-assert-cmd-0.12) ("rust-predicates" ,rust-predicates-1)))) (home-page "https://github.com/ekg/fastix") (synopsis "Prefix-renaming FASTA records") (description "A command line tool to add prefixes to FASTA headers. The idea is to support pangenomic applications, following the @url{https://github.com/pangenome/PanSN-spec, PanSN} hierarchical naming specification.") (license license:expat))) (define-public pafplot (let ((commit "7dda24c0aeba8556b600d53d748ae3103ec85501") (revision "1")) (package (name "pafplot") (version (git-version "0.0.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/ekg/pafplot.git") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "04ffz0zfj4mvfxmrwgisv213fypgl02f7sim950a067pm7375g1l")))) (build-system cargo-build-system) (arguments `(#:install-source? #f #:cargo-inputs (("rust-clap" ,rust-clap-2) ("rust-boomphf" ,rust-boomphf-0.5) ("rust-itertools" ,rust-itertools-0.10) ("rust-fnv" ,rust-fnv-1) ("rust-lodepng" ,rust-lodepng-3) ("rust-rgb" ,rust-rgb-0.8) ("rust-line-drawing" ,rust-line-drawing-0.8)))) (home-page "https://github.com/ekg/pafplot.git") (synopsis "Base-level dotplots from PAF alignments") (description "In the process of generating alignments between whole genomes, we often need to understand the base-level alignment between particular sequences. @command{pafplot} allows us to do so by rasterizing the matches alignment set. It draws a line on a raster image to represent each match found in a set of alignments. The resulting image provides a high-level view of the structure of the alignments, and in consequence the homology relationships between the sequences in consideration.") (license license:expat)))) (define-public gafpack (let ((commit "ad31875b6914d964c6fd72d1bf334f0843538fb6") ; November 10, 2022 (revision "1")) (package (name "gafpack") (version (git-version "0.0.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/ekg/gafpack") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "0di2psh0ls7jlbnqs7k71p55f73pn23a09k1h3ril7gwjcrzr3rk")))) (build-system cargo-build-system) (arguments `(#:install-source? #f #:cargo-inputs (("rust-clap" ,rust-clap-4) ("rust-gfa" ,rust-gfa-0.10)))) (home-page "https://github.com/ekg/gafpack") (synopsis "Convert variation graph alignments to coverage maps over nodes") (description "Gafpack converts alignments to pangenome variation graphs to coverage maps useful in haplotype-based genotyping.") (license license:expat)))) (define-public agc-for-pgr-tk (let ((commit "453c0afdc54b4aa00fa8e97a63f196931fdb81c4") ; April 26, 2022 (revision "1")) (package (name "agc") (version (git-version "2.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/cschin/agc") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "1v5s79rl38dcyy5h1lykbp6clcbqq9winn533j54y49q1jp8chix")) (snippet #~(begin (use-modules (guix build utils)) ;; Copy the two radul files we can't find a replacement for: ;; https://github.com/refresh-bio/RADULS (mkdir "keep-libs") (rename-file "libs/raduls.h" "keep-libs/raduls.h") (rename-file "libs/libraduls.a" "keep-libs/libraduls.a") (delete-file-recursively "libs") (rename-file "keep-libs" "libs") (delete-file-recursively "py_agc_api/pybind11-2.8.1") (substitute* '("makefile" "makefile.release") (("-mavx") "") (("-m64") "") (("\\$\\(AGC_LIBS_DIR)\\/mimalloc/\\$\\(LIB_ALLOC\\)") "$(pkg-config --cflags --libs mimalloc) /usr/lib/libmimalloc.so") (("\\$\\(AGC_LIBS_DIR)\\/\\$\\(LIB_ZLIB\\)") "$(pkg-config --cflags --libs zlib) /usr/lib/libz.so") (("\\$\\(AGC_LIBS_DIR)\\/\\$\\(LIB_ZSTD\\)") "$(pkg-config --cflags --libs libzstd) /usr/lib/libzstd.so") (("^PYBIND11_LIB = .*") "PYBIND11_LIB = /usr/include/pybind11") (("\\$\\(PYBIND11_LIB\\)/include") "$(PYBIND11_LIB)")) (substitute* (find-files "src" "\\.(h|cpp)$") (("../../libs/ketopt.h") "ketopt.h") (("../../libs/zlib.h") "zlib.h") (("../../libs/zstd.h") "zstd.h")))))) (build-system gnu-build-system) (arguments `(#:tests? #f ; No tests. #:phases (modify-phases %standard-phases (delete 'configure) ; No configure script. (add-after 'unpack 'adjust-sources (lambda* (#:key inputs #:allow-other-keys) (let ((mimalloc (assoc-ref inputs "mimalloc"))) (substitute* '("makefile" "makefile.release") (("/usr/include/pybind11") (search-input-directory inputs "/include/pybind11")) (("/usr/lib/libmimalloc.so") (search-input-file inputs "/lib/libmimalloc.so")) (("/usr/lib/libz.so") (search-input-file inputs "/lib/libz.so")) (("/usr/lib/libzstd.so") (search-input-file inputs "/lib/libzstd.so")) (("pkg-config") ,(pkg-config-for-target)))))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (include (string-append out "/include/"))) (install-file "agc" (string-append out "/bin")) (install-file "libagc.so" (string-append out "/lib")) (mkdir-p (string-append include "app")) (mkdir-p (string-append include "core")) (mkdir-p (string-append include "lib-cxx")) (with-directory-excursion "src" (for-each (lambda (file) (copy-file file (string-append include file))) (find-files "." "\\.h$"))))))))) (native-inputs (list minimap2 ; for ketopt.h pkg-config)) (inputs (list mimalloc python pybind11 zlib (list zstd "lib"))) (home-page "https://github.com/cschin/agc") (synopsis "Assembled Genomes Compressor") (description "@acronym{Assembled Genomes Compressor, AGC} is a tool designed to compress collections of de-novo assembled genomes. It can be used for various types of datasets: short genomes (viruses) as well as long (humans).") (license license:expat)))) (define-public pgr-tk (package (name "pgr-tk") (version "0.3.6") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/Sema4-Research/pgr-tk") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "0vm1k63v91zd0pfbg2zmwskajylz8xg83m63qxwaiwny5f4y6f1j")) (snippet #~(begin (use-modules (guix build utils)) (substitute* (find-files "." "Cargo.toml") ;; Only use the major+minor version to decrease the number of ;; special version crates. (("(.*= \")([[:digit:]]+\\.[[:digit:]]+)\\.[[:digit:]]+(\".*)" _ name version tail) (string-append name version tail)) ;; Then fix the version string for the actual package. (("^version = \".*") (string-append "version = \"" #$version "\"\n"))))))) (build-system cargo-build-system) (arguments `(#:install-source? #f #:cargo-test-flags (list "--release" "--" "--skip=get_aln_segements" "--skip=get_shmmr_dots" "--skip=AGCFile" "--skip=SeqIndexDB") #:cargo-inputs (("rust-bindgen" ,rust-bindgen-0.58) ("rust-bgzip" ,rust-bgzip-0.2) ("rust-byteorder" ,rust-byteorder-1) ("rust-clap" ,rust-clap-3) ("rust-cuckoofilter" ,rust-cuckoofilter-0.5) ("rust-flate2" ,rust-flate2-1) ("rust-libc" ,rust-libc-0.2) ("rust-log" ,rust-log-0.4) ("rust-petgraph" ,rust-petgraph-0.6) ("rust-pyo3" ,rust-pyo3-0.14) ("rust-rayon" ,rust-rayon-1) ("rust-regex" ,rust-regex-1) ("rust-rustc-hash" ,rust-rustc-hash-1) ("rust-serde" ,rust-serde-1) ("rust-serde-json" ,rust-serde-json-1) ("rust-simple-logger" ,rust-simple-logger-1)) #:phases (modify-phases %standard-phases (add-after 'unpack 'insert-wfa-source (lambda* (#:key inputs #:allow-other-keys) (copy-recursively (assoc-ref inputs "wfa-src") "rs-wfa/WFA"))) (add-after 'unpack 'adjust-source (lambda* (#:key inputs #:allow-other-keys) (substitute* '("pgr-bin/build.rs" "pgr-db/build.rs" "pgr-tk/build.rs") (("git") "ls") (("bioconda") "Guix")) ;; Build with zlib, not zlib-ng (substitute* '("pgr-bin/Cargo.toml" "pgr-db/Cargo.toml") (("zlib-ng-compat") "zlib")) ;; Don't look for agc to be bundled. (substitute* "pgr-db/wrapper.h" (("../agc/src/lib-cxx/agc-api.h") "lib-cxx/agc-api.h")) (substitute* "pgr-db/build.rs" ((".*panic!\\(\"Error.*") "")) (mkdir-p "target/release") (symlink (search-input-file inputs "/bin/agc") "target/release/agc") (symlink (search-input-file inputs "/lib/libagc.so") "target/release/libagc"))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (with-directory-excursion "target/release" (install-file "libpgrtk.so" (string-append out "/lib")) (for-each (lambda (file) (install-file file (string-append out "/bin"))) (list "pgr-filter" "pgr-mdb" "pgr-multifilter" "pgr-probe-match" "pgr-shmmr-pair-count"))))))))) (inputs (list agc-for-pgr-tk clang python zlib (list zstd "lib"))) (native-inputs `(("pkg-config" ,pkg-config) ("wfa-src" ,(origin (method git-fetch) (uri (git-reference ;; forPYO3 branch, 14-03-2021 (url "https://github.com/cschin/WFA") (commit "1f8c8d2905ed482cd2d306a1676d60c2a45cb098"))) (file-name "wfa-for-pgr-tk") (sha256 (base32 "19h1cjp2bdlcfq5c6rsbk8bc0f8zn64b471dhj4xlfxd1prv2dpk")))))) (home-page "https://github.com/Sema4-Research/pgr-tk") (synopsis "Pangenome Research Tool Kit") (description "PGR-TK provides pangenome assembly management, query and @acronym{Minimizer Anchored Pangenome, MAP} Graph Generation. It is a project to provide Python and Rust libraries to facilitate pangenomics analysis. Several algorithms and data structures used for the Peregrine Genome Assembler are useful for Pangenomics analysis as well. This repo takes those algorithms and data structure, combining other handy 3rd party tools to expose them as a library in Python (with Rust code for those computing parts that need performance.)") (license (license:non-copyleft "file:///LICENSE" "CC-BY-NC-SA 4.0")))) (define-public graph-genotyper (let ((commit "e7cc6b43a5b1f389d76bf9aac7f2ee02f92caeaf") ; October 17, 2022 (revision "13")) (package (name "graph-genotyper") (version (git-version "0.0.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/davidebolo1993/graph_genotyper") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "1l8yjpkqamiqr1q5i7vr5z04aba7skpbcwyc9dx5fiklvljjfhcx")))) (build-system copy-build-system) (arguments `(#:install-plan '(("genotype.py" "bin/") ("genotype.sh" "bin/")) #:phases (modify-phases %standard-phases (add-after 'install 'wrap-genotype (lambda* (#:key inputs outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (wrap-script (string-append out "/bin/genotype.sh") `("GUIX_PYTHONPATH" ":" prefix (,(getenv "GUIX_PYTHONPATH"))) `("PATH" ":" prefix ,(map (lambda (file-name) (string-append (assoc-ref inputs file-name) "/bin")) (list "gafpack" "odgi" "python" "samtools" "vg")))))))))) (inputs (list gafpack guile-3.0 odgi python python-numpy python-pandas python-scipy samtools vg)) (home-page "https://bitbucket.org/jana_ebler") (synopsis "Genotyping based on k-mers and pangenome graphs") (description "This package provides a genotyper for various types of genetic variants (such as SNPs, indels and structural variants). Genotypes are computed based on read k-mer counts and a panel of known haplotypes. A description of the method can be found @url{https://www.biorxiv.org/content/10.1101/2020.11.11.378133v1, here}.") (license (license:non-copyleft "No license listed"))))) (define-public pangenie (let ((commit "e779076827022d1416ab9fabf99a03d8f4725956") ; September 2, 2021 from phasing-tests branch (revision "2")) (package (name "pangenie") (version (git-version "0.0.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://bitbucket.org/jana_ebler/pangenie.git") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "1mphrvidaz328kcwrjgz8q5i4iwnz6ygl6279lm2acv4zgqhmp5i")))) (build-system cmake-build-system) (arguments `(#:configure-flags (list (string-append "-DCMAKE_BUILD_RPATH=" (assoc-ref %outputs "out") "/lib")) #:phases (modify-phases %standard-phases (replace 'check (lambda* (#:key tests? #:allow-other-keys) (when tests? (invoke "make" "-C" "tests")) #t)) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (with-directory-excursion "src" (install-file "PanGenie" (string-append out "/bin")) (install-file "PanGenie-graph" (string-append out "/bin")) (install-file "libPanGenieLib.so" (string-append out "/lib")) ) #t)))))) (native-inputs `(("pkg-config" ,pkg-config))) (inputs `(("jellyfish" ,jellyfish))) (home-page "https://bitbucket.org/jana_ebler") (synopsis "Genotyping based on k-mers and pangenome graphs") (description "This package provides a genotyper for various types of genetic variants (such as SNPs, indels and structural variants). Genotypes are computed based on read k-mer counts and a panel of known haplotypes. A description of the method can be found @url{https://www.biorxiv.org/content/10.1101/2020.11.11.378133v1, here}.") (license license:expat)))) (define-public pbsim2 (let ((commit "e71f7892aea0bd3c963b4f1f5628db4f830ee475") ; Dec 2, 2020 (revision "1")) (package (name "pbsim2") (version (git-version "0.0.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/yukiteruono/pbsim2") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "13d3mhdp3rs58w14j9a5sbda4q8k9vzic7rgfa8223m7cm5ih6y7")))) (build-system gnu-build-system) (home-page "https://github.com/yukiteruono/pbsim2") (synopsis "Simulator for long read sequencers") (description "PBSIM simulates @acronym{Continuous Long Reads, CLRs} of PacBio, and Nanopore reads. In it sampling-based and model-based simulations are implemented.") (license license:gpl2)))) (define-public pirs (let ((commit "bee9b594f4d0e10580aae77ec411cecec4a77219") ; Sept 7, 2017 (revision "1")) (package (name "pirs") (version (git-version "2.0.2" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/galaxy001/pirs") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "0pn74h98cqcr5qayp4riss982n4272p35y5dp472cmqpwjjil9cd")))) (build-system gnu-build-system) (arguments `(#:configure-flags (list "--enable-pirs-diploid" ;; TODO: Enable after core-updates merge, late 2021. ;,@(if (not (or (target-x86-64?) ; (target-x86-32?))) ; `("--disable-sse2") ; '()) ) #:phases (modify-phases %standard-phases (add-before 'bootstrap 'chdir (lambda _ (chdir "src"))) (add-after 'unpack 'patch-source (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (substitute* "src/configure.ac" (("ssse2") "sse2")) (substitute* "src/stator/gcContCvgBias/Makefile" (("gzstream.o ") "") (("-lz")"-lgzstream -lz") (("-static") "") (("-mtune=generic") "")) (substitute* "src/pirs/gccMakefile" (("/usr/local") out))))) (replace 'check (lambda* (#:key tests? test-target #:allow-other-keys #:rest args) (when tests? (apply (assoc-ref %standard-phases 'check) args) (with-directory-excursion "stator/gcContCvgBias" ; ((assoc-ref %standard-phases 'check) ; #:test-target "test" args)) (invoke "make" "test"))))) (add-after 'build 'build-more (lambda* (#:key #:allow-other-keys #:rest args) (with-directory-excursion "stator/gcContCvgBias" (apply (assoc-ref %standard-phases 'build) args)))) (replace 'install (lambda* (#:key outputs #:allow-other-keys #:rest args) (let ((out (assoc-ref outputs "out"))) (apply (assoc-ref %standard-phases 'install) args) (with-directory-excursion "stator/gcContCvgBias" ;(apply (assoc-ref %standard-phases 'install) args) (install-file "gc_coverage_bias" (string-append out "/bin"))))))))) (inputs `(("gnuplot" ,gnuplot) ("perl" ,perl) ("zlib" ,zlib))) (native-inputs `(("autoconf" ,autoconf) ("automake" ,automake) ("boost" ,boost) ("gzstream" ,gzstream) ("libtool" ,libtool))) (home-page "https://github.com/galaxy001/pirs") (synopsis "Profile based Illumina pair-end Reads Simulator") (description "@code{pIRS} is a program for simulating paired-end reads from a reference genome. It is optimized for simulating reads similar to those generated from the Illumina platform.") (license license:gpl2)))) ;; TODO: Unbundle zlib, bamtools, tclap (define-public sniffles (package (name "sniffles") (version "1.0.11") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/fritzsedlazeck/Sniffles.git") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 "0rkwqn1ycckfzrg2wdid4cqahq8q2jmmgi7vvl8qxgpsihqfbq0j")))) (build-system cmake-build-system) (arguments `(#:phases (modify-phases %standard-phases (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (install-file (string-append "../source/bin/sniffles-core-" ,version "/sniffles") (string-append out "/bin"))) #t)) (replace 'check (lambda _ (with-directory-excursion "../source/test_set" (for-each make-file-writable (find-files ".")) (invoke (string-append "../bin/sniffles-core-" ,version "/sniffles") "-m" "reads_region.bam" "-v" "test.vcf"))))))) (native-inputs `(("zlib" ,zlib))) (home-page "https://github.com/fritzsedlazeck/Sniffles") (synopsis "Structural variation caller using third generation sequencing") (description "Sniffles is a structural variation caller using third generation sequencing (PacBio or Oxford Nanopore). It detects all types of SVs (10bp+) using evidence from split-read alignments, high-mismatch regions, and coverage analysis.") (license license:expat))) ;; TODO: Unbundle Complete-Striped-Smith-Waterman-Library (define-public ngmlr (package (name "ngmlr") (version "0.2.7") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/philres/ngmlr.git") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "0lmsy8w0kxbyfnrln7lxgmnx3d82sv2b20n2yw5742rvfhq1v31n")))) (build-system cmake-build-system) (arguments `(#:phases (modify-phases %standard-phases (add-after 'patch-source-shebangs 'patch-more-tools (lambda* (#:key inputs #:allow-other-keys) (let ((bed (assoc-ref inputs "bedtools")) (sam (assoc-ref inputs "samtools"))) (substitute* (find-files "test" "\\.sh$") (("bedtools") (string-append bed "/bin/bedtools")) (("samtools") (string-append sam "/bin/samtools"))) #t))) (replace 'check (lambda _ (with-directory-excursion "../source" (invoke "sh" "test/test_travis.sh"))))))) (native-inputs `(("bedtools" ,bedtools) ("samtools" ,samtools))) (inputs `(("zlib" ,zlib))) (home-page "https://github.com/philres/ngmlr") (synopsis "Long-read mapper designed to align PacBio or Oxford Nanopore") (description "NGMLR is a long-read mapper designed to align PacBio or Oxford Nanopore (standard and ultra-long) to a reference genome with a focus on reads that span structural variations.") (license license:expat))) (define-public svim (package (name "svim") (version "1.2.0") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/eldariont/svim.git") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "08j02in9jbq41b67dna1apnc3y30i37v44d1khml1xlx0iga720s")))) (build-system python-build-system) (arguments '(#:phases (modify-phases %standard-phases (replace 'check (lambda _ (invoke "python3" "-m" "unittest" "discover" "-s" "src/")))))) (propagated-inputs `(("python-matplotlib" ,python-matplotlib) ("python-numpy" ,python-numpy) ("python-pysam" ,python-pysam) ("python-scipy" ,python-scipy) ("minimap2" ,minimap2) ("ngmlr" ,ngmlr) ("samtools" ,samtools))) (home-page "https://github.com/eldariont/svim") (synopsis "Structural Variant Identification Method using Long Reads") (description "SVIM (pronounced SWIM) is a structural variant caller for long reads. It is able to detect, classify and genotype five different classes of structural variants. Unlike existing methods, SVIM integrates information from across the genome to precisely distinguish similar events, such as tandem and interspersed duplications and novel element insertions.") (license license:gpl3))) (define-public bamaddrg (let ((commit "3fccbf057eef21f6304fade6c306c5bb64158865") ; May 26, 2012 (revision "1")) (package (name "bamaddrg") (version (git-version "0.0.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/ekg/bamaddrg.git") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "14hq66cc7f4cssagb6079fmd2i6hfr9vmpcw5vi5kzsqr3ifc5yk")))) (build-system gnu-build-system) (arguments `(#:tests? #f ; no tests #:phases (modify-phases %standard-phases (delete 'configure) ;; The Makefile wants to vendor bamtools' source so we mimic it. (replace 'build (lambda* (#:key inputs #:allow-other-keys) (let ((bam (assoc-ref inputs "bamtools"))) (apply invoke `("g++" "-O3" ,(string-append "-I" bam "/include/bamtools") ,(string-append "-L" bam "/lib/libbamtools.a") "bamaddrg.cpp" "-o" "bamaddrg" "-lbamtools" "-lz"))) #t)) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) (install-file "bamaddrg" bin) #t)))))) (native-inputs `(("bamtools" ,bamtools))) (inputs `(("zlib" ,zlib))) (home-page "https://github.com/ekg/bamaddrg") (synopsis "Adds read groups to input BAM files, streams BAM output on stdout") (description "This is intended for use \"fixing up\" RG tags on the fly so that they reflect the source file from which the aligment originated from. This allows the \"safe\" merging of many files from many individuals into one stream, suitable for input into downstream processing systems such as freebayes ( population variant detector).") (license #f)))) ; no license listed (define-public qctool (let ((changeset "73662f5f6e1e6efe75796bc64e342fb5d5d35e54") ; May 30, 2019 (revision "1")) (package (name "qctool") (version (string-append "2.0.5-" revision "." (string-take changeset 7))) (source (origin (method hg-fetch) (uri (hg-reference (url "https://bitbucket.org/gavinband/qctool") (changeset changeset))) (file-name (string-append name "-" version "-checkout")) (sha256 (base32 "0lcir6jdw1gsi1l0yrsyqgrb8dryxxw3gyncfx5bx34qbhd6f5dv")))) (build-system waf-build-system) (arguments `(#:python ,python-2 #:tests? #f ; no check command #:phases (modify-phases %standard-phases (add-after 'unpack 'rename-waf (lambda _ (rename-file "waf-1.5.18" "waf") #t))))) (native-inputs `(("readline" ,readline) ("zlib" ,zlib))) (inputs `(("lapack" ,lapack) ("openblas" ,openblas))) (home-page "https://www.well.ox.ac.uk/~gav/qctool_v2/") (synopsis "Quality control and analysis of gwas datasets") (description "QCTOOL is a command-line utility program for manipulation and quality control of gwas datasets and other genome-wide data. QCTOOL can be used @enumerate @item To compute per-variant and per-sample QC metrics. @item To filter out samples or variants. @item To merge datasets in various ways. @item To convert dataset between file formats. (In particular QCTOOL can read and write BGEN files, including full support for the BGEN v1.2 format that has been used for the UK Biobank imputed data full release). @item To manipulate datasets in various ways - e.g. by updating data fields or aligning alleles to a reference sequence based on information in a strand file. @item To annotate variants with information from BED files, sequence from FASTA files, or with genetic map positions. @item To compute LD metrics between variants. @item To compare genotypes for individuals typed or imputed or phased in different datasets. @item To compute between-sample relatedness and principal components. @item To compute 'genetic risk predictor' scores. @end enumerate") (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))) (define-public rn6-assembly-error-app (package (name "rn6-assembly-error-app") (version "0.12") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/chen42/rn6_assembly_error_app.git") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "0ilmn6w0l17041dlizf4dy4pqn26k7956k7fjx4fvssb525g4gi6")))) (build-system trivial-build-system) (arguments `(#:modules ((guix build utils)) #:builder (begin (use-modules (guix build utils)) (let* ((out (assoc-ref %outputs "out")) (targetdir (string-append out "/share/" ,name)) (app (string-append out "/bin/" ,name)) (Rbin (string-append (assoc-ref %build-inputs "r-min") "/bin/Rscript")) (convert (string-append (assoc-ref %build-inputs "imagemagick") "/bin/convert")) (cp (string-append (assoc-ref %build-inputs "coreutils") "/bin/cp")) (source (assoc-ref %build-inputs "source"))) (copy-recursively source targetdir) (substitute* (string-append targetdir "/server.r") ;; This version is ideal for deploying with the included PNGs. ;; But we want all of them, so we use a local copy in shepherd's $HOME. ;;(("./pngs") (string-append targetdir "/pngs")) (("./pngs") "/home/shepherd/rn6app/pngs") (("cp") cp) (("convert") convert)) (mkdir-p (string-append out "/bin")) (call-with-output-file app (lambda (port) (format port "#!~a library(shiny) setwd(\"~a\") runApp(launch.browser=0, port=4202)~%\n" Rbin targetdir))) (chmod app #o555) #t)))) (native-inputs `(("source" ,source))) (inputs `(("coreutils" ,coreutils-minimal) ("imagemagick" ,imagemagick) ("r-min" ,r-minimal))) (propagated-inputs `(("freetype" ,freetype) ("r" ,r) ("r-ggplot2" ,r-ggplot2) ("r-shiny" ,r-shiny))) (home-page "http://rn6err.opar.io/") (synopsis "Display potential assembly errors in rn6") (description "Display potential assembly errors in rn6.") (license license:expat))) (define-public bxd-power-calculator-app (let ((commit "7cdd73daa9a7aa79af1de04dc314c325f9706fb8") (revision "1")) (package (name "bxd-power-calculator-app") (version (git-version "0.7" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/Dashbrook/BXD_power_calculator_app/") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "0vdfilzy78jalkh9w9xxvarnzgnlaz943crmhlds8bcrvwbmf6yh")))) (build-system trivial-build-system) (arguments `(#:modules ((guix build utils)) #:builder (begin (use-modules (guix build utils)) (let* ((out (assoc-ref %outputs "out")) (targetdir (string-append out "/share/" ,name)) (app (string-append out "/bin/" ,name)) (Rbin (string-append (assoc-ref %build-inputs "r-min") "/bin/Rscript")) (datasets (assoc-ref %build-inputs "datasets")) (source (assoc-ref %build-inputs "source"))) (copy-recursively source targetdir) (mkdir-p (string-append out "/bin")) (call-with-output-file app (lambda (port) (format port "#!~a library(shiny) setwd(\"~a\") runApp(launch.browser=0, port=3978)~%\n" Rbin targetdir))) (chmod app #o555) (substitute* (string-append targetdir "/server.R") (("read.csv.*") (string-append "read.csv(url(\"file://" datasets "\"), header = TRUE)\n"))) #t)))) (native-inputs `(("source" ,source))) (propagated-inputs `(("r" ,r) ("r-data-table" ,r-data-table) ("r-dt" ,r-dt) ("r-dplyr" ,r-dplyr) ("r-ggplot2" ,r-ggplot2) ("r-rcolorbrewer" ,r-rcolorbrewer) ("r-shiny" ,r-shiny))) (inputs `(("r-min" ,r-minimal) ;; Also available from ipfs ;; ipfs get Qma3LWJBoks77btTmp6rn6jGSBcuBoPgcPCmofY2RRKEKf ("datasets" ,(origin (method url-fetch) (uri "https://web.archive.org/web/20191016132922/http://individual.utoronto.ca/D_Ashbrook/Effect_size_analysis_heritability_28th_Nov_2018_recalc.csv") (sha256 (base32 "1ldr9infavd0vak8n8ry9smcnrir3xgs1bahmmx7n2csx4n6qx2x")))))) (home-page "https://dashbrook1.shinyapps.io/bxd_power_calculator_app/") (synopsis "Visualize probability (beta) of detecting a QTL") (description "The BXD power app seeks to provide a quick and easy graphical interface for users to calculate the theortical power to detect an effect in a two parent recombinant inbred population. A power calculator such as this is needed as all grants require a calculation of the applications power to detect the effect of interest, and this app can provide values and figures for applicants to use.") (license license:gpl3)))) (define-public singlecellrshiny (let ((commit "bdca74f4819d11e8fe7b15d9ab91b853f6542f7a") (revision "3")) (package (name "singlecellrshiny") (version (git-version "0.0.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/genenetwork/singleCellRshiny") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "1rxj933s9p9r7358vnp15f7ag6c0j65r4hgr8kyirfhmp1i8xdlw")))) (build-system trivial-build-system) (arguments `(#:modules ((guix build utils)) #:builder (begin (use-modules (guix build utils)) (let* ((out (assoc-ref %outputs "out")) (targetdir (string-append out "/share/" ,name)) (app (string-append out "/bin/" ,name)) (Rbin (string-append (assoc-ref %build-inputs "r-min") "/bin/Rscript")) (top1001 (assoc-ref %build-inputs "RobTop1001.csv")) (celltypes (assoc-ref %build-inputs "CellTypes_RGC_Master_08Dec2018.csv")) (800-H1 (assoc-ref %build-inputs "800-H1-H20-RNA-Seq.csv")) (source (assoc-ref %build-inputs "source"))) (copy-recursively source targetdir) (substitute* (string-append targetdir "/app.R") ;; As seen in https://github.com/genenetwork/singleCellRshiny/commit/6b2a344dd0d02f65228ad8c350bac0ced5850d05.patch (("library\\(DT\\)") "library(DT)\nlibrary(multtest)")) (substitute* (string-append targetdir "/global.R") (("800-H1-H20-RNA-Seq-SingleCell-Retina-OMRF-03-29-19_FPKM_v2_SiamakPlay.csv") 800-H1) (("CellTypes_RGC_Master_08Dec2018.csv") celltypes) (("RobTop1001.csv") top1001) ;; As seen in https://github.com/genenetwork/singleCellRshiny/commit/6b2a344dd0d02f65228ad8c350bac0ced5850d05.patch (("dim\\(sc.object.1") "dim(sc.object")) (mkdir-p (string-append out "/bin")) (call-with-output-file app (lambda (port) (format port "#!~a library(shiny) setwd(\"~a\") runApp(launch.browser=0, port=4208)~%\n" Rbin targetdir))) (chmod app #o555) #t)))) (inputs `(("r-min" ,r-minimal) ("RobTop1001.csv" ,(origin (method url-fetch) (uri "https://archive.org/download/celltypesrgcmaster08dec2018/RobTop1001.csv") (file-name "RobTop1001.csv") (sha256 (base32 "0pa73kc1p8417sjvvvhp9xsbh2h8g7h85pnmm16mnv4wjalhq0gn")))) ("CellTypes_RGC_Master_08Dec2018.csv" ,(origin (method url-fetch) (uri "https://archive.org/download/celltypesrgcmaster08dec2018/CellTypes_RGC_Master_08Dec2018.csv") (file-name "CellTypes_RGC_Master_08Dec2018.csv") (sha256 (base32 "0y411968np1f5g21iym9xc9yj5c1jsn94rpkwkxh9pw2z43gvghn")))) ("800-H1-H20-RNA-Seq.csv" ,(origin (method url-fetch) (uri "https://archive.org/download/celltypesrgcmaster08dec2018/800-H1-H20-RNA-Seq-SingleCell-Retina-OMRF-03-29-19_FPKM_v2_SiamakPlay.csv") (file-name "800-H1-H20-RNA-Seq-SingleCell-Retina-OMRF-03-29-19_FPKM_v2_SiamakPlay.csv") (sha256 (base32 "1b1y4lfs8drypm04i1rypbmk67rdqgs27nfh05pwnv3sja2nanam")))))) (propagated-inputs `(("r" ,r) ("r-dt" ,r-dt) ("r-multtest" ,r-multtest) ("r-seurat" ,r-seurat) ("r-shiny" ,r-shiny))) (home-page "http://singlecell.opar.io/") (synopsis "RNA sequencing data analysis") (description "This is the R-Shiny programs to run some basic single cell RNA sequencing (scRNA-seq) data analysis.") (license license:agpl3)))) (define-public seqwish (package (name "seqwish") (version "0.7.11") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/ekg/seqwish.git") (commit (string-append "v" version)) (recursive? #t))) (file-name (git-file-name name version)) (sha256 (base32 "18wsrvqf0nsfk29v3ggdq2r4q15d4n4sq8v228qq1jsybbjlkgsa")) (patches (search-patches "seqwish-paryfor-riscv.diff" "seqwish-shared-library.diff")) (snippet #~(begin (use-modules (guix build utils)) (substitute* '("CMakeLists.txt" "deps/atomic_queue/Makefile" "deps/mmmulti/deps/DYNAMIC/CMakeLists.txt" "deps/mmmulti/deps/atomic_queue/Makefile" "deps/mmmulti/deps/ips4o/CMakeLists.txt") (("-march=native") "") (("-mcx16") "")) (substitute* '("deps/mmmulti/deps/sdsl-lite/CMakeLists.txt" "deps/sdsl-lite/CMakeLists.txt") (("-msse4.2 -march=native") "")))))) (build-system cmake-build-system) (arguments `(#:configure-flags (cons* ,@(if (target-x86?) ;; This is the minimum needed to compile on x86_64, and is a ;; subset of any other optimizations which might be applied. '("-DCMAKE_C_FLAGS=-mcx16" "-DCMAKE_CXX_FLAGS=-mcx16") '()) '("-DSEQWISH_LINK_SHARED_LIBRARY=ON")) #:phases (modify-phases %standard-phases (add-after 'unpack 'set-version (lambda _ ;; This stashes the build version in the executable. (mkdir "include") (substitute* "CMakeLists.txt" (("^execute_process") "#execute_process")) (with-output-to-file "include/seqwish_git_version.hpp" (lambda () (format #t "#define SEQWISH_GIT_VERSION \"~a\"~%" ,version))))) (add-after 'unpack 'link-with-some-shared-libraries (lambda* (#:key inputs #:allow-other-keys) (substitute* '("CMakeLists.txt" "deps/mmmulti/CMakeLists.txt") (("\".*libsdsl\\.a\"") "\"-lsdsl\"") (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") (("\\$\\{sdsl-lite_INCLUDE\\}") (search-input-directory inputs "/include/sdsl")) (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") (dirname (search-input-file inputs "/include/divsufsort.h")))))) (replace 'check (lambda* (#:key tests? #:allow-other-keys) ;; Add seqwish to the PATH for the tests. (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH"))) (when tests? (with-directory-excursion "../source/test" (invoke "make")))))))) (inputs (list jemalloc libdivsufsort openmpi sdsl-lite zlib)) (native-inputs (list perl)) (home-page "https://github.com/ekg/seqwish") (synopsis "Alignment to variation graph inducer") (description "Seqwish implements a lossless conversion from pairwise alignments between sequences to a variation graph encoding the sequences and their alignments. As input we typically take all-versus-all alignments, but the exact structure of the alignment set may be defined in an application specific way. This algorithm uses a series of disk-backed sorts and passes over the alignment and sequence inputs to allow the graph to be constructed from very large inputs that are commonly encountered when working with large numbers of noisy input sequences. Memory usage during construction and traversal is limited by the use of sorted disk-backed arrays and succinct rank/select dictionaries to record a queryable version of the graph.") (properties `((tunable? . #t))) (license license:expat))) (define-public seqwish-x86-64-v2 (package/inherit seqwish (name "seqwish-x86-64-v2") (outputs '("out" "static")) (arguments (substitute-keyword-arguments (package-arguments seqwish) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v2")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-extra-files (lambda _ (delete-file-recursively (string-append #$output "/bin")))) (add-after 'install 'move-static-library (lambda* (#:key outputs #:allow-other-keys) (let ((lib "/lib/glibc-hwcaps/x86-64-v2/libseqwish.a")) (mkdir-p (dirname (string-append #$output:static lib))) (rename-file (string-append #$output lib) (string-append #$output:static lib))))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t) (tunable? . #f))))) (define-public seqwish-x86-64-v3 (package/inherit seqwish (name "seqwish-x86-64-v3") (outputs '("out" "static")) (arguments (substitute-keyword-arguments (package-arguments seqwish) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v3")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-extra-files (lambda _ (delete-file-recursively (string-append #$output "/bin")))) (add-after 'install 'move-static-library (lambda* (#:key outputs #:allow-other-keys) (let ((lib "/lib/glibc-hwcaps/x86-64-v3/libseqwish.a")) (mkdir-p (dirname (string-append #$output:static lib))) (rename-file (string-append #$output lib) (string-append #$output:static lib))))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t) (tunable? . #f))))) (define-public seqwish-x86-64-v4 (package/inherit seqwish (name "seqwish-x86-64-v4") (outputs '("out" "static")) (arguments (substitute-keyword-arguments (package-arguments seqwish) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v4")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-extra-files (lambda _ (delete-file-recursively (string-append #$output "/bin")))) (add-after 'install 'move-static-library (lambda* (#:key outputs #:allow-other-keys) (let ((lib "/lib/glibc-hwcaps/x86-64-v4/libseqwish.a")) (mkdir-p (dirname (string-append #$output:static lib))) (rename-file (string-append #$output lib) (string-append #$output:static lib))))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t) (tunable? . #f))))) ;; This copy of seqwish will automatically use the libraries that target the ;; x86_64 psABI which the hardware supports. (define-public seqwish-hwcaps (package/inherit seqwish (name "seqwish-hwcaps") (arguments (substitute-keyword-arguments (package-arguments seqwish) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'install-optimized-libraries (lambda* (#:key inputs outputs #:allow-other-keys) (let ((hwcaps "/lib/glibc-hwcaps")) (copy-recursively (string-append (assoc-ref inputs "seqwish-x86-64-v2") hwcaps "/x86-64-v2") (string-append #$output hwcaps "/x86-64-v2")) (copy-recursively (string-append (assoc-ref inputs "seqwish-x86-64-v3") hwcaps "/x86-64-v3") (string-append #$output hwcaps "/x86-64-v3")) (copy-recursively (string-append (assoc-ref inputs "seqwish-x86-64-v4") hwcaps "/x86-64-v4") (string-append #$output hwcaps "/x86-64-v4"))))))))) (native-inputs (modify-inputs (package-native-inputs seqwish) (append seqwish-x86-64-v2 seqwish-x86-64-v3 seqwish-x86-64-v4))) (properties `((tunable? . #f))))) (define-public smoothxg (package (name "smoothxg") (version "0.8.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/pangenome/smoothxg" "/releases/download/v" version "/smoothxg-v" version ".tar.gz")) (sha256 (base32 "1gl4dskj81hlma7wsjrwwg77hgzk7dc9iaqqnf2wzrf2f4ll1cdj")) (snippet #~(begin (use-modules (guix build utils)) (substitute* (find-files "." "CMakeLists.txt") (("spoa_optimize_for_native ON") "spoa_optimize_for_native OFF") (("-msse4\\.2") "") (("-march=native") "")))))) (build-system cmake-build-system) (arguments (list #:make-flags #~(list (string-append "CC = " #$(cc-for-target))) #:phases #~(modify-phases %standard-phases (add-after 'unpack 'link-with-some-shared-libraries (lambda* (#:key inputs #:allow-other-keys) (substitute* '("CMakeLists.txt" "deps/mmmulti/CMakeLists.txt" "deps/odgi/deps/mmmulti/CMakeLists.txt") (("\".*libsdsl\\.a\"") "\"-lsdsl\"") (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") (("\".*libodgi\\.a\"") "\"-lodgi\"") (("\\$\\{sdsl-lite_INCLUDE\\}") (search-input-directory inputs "/include/sdsl")) (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") (dirname (search-input-file inputs "/include/divsufsort.h"))) (("\\$\\{odgi_INCLUDE\\}") (search-input-directory inputs "/include/odgi"))))) (add-before 'build 'build-abPOA (lambda* (#:key make-flags #:allow-other-keys) ;; This helps with portability to other architectures. (with-directory-excursion (string-append "../smoothxg-v" #$version "/deps/abPOA") (substitute* "Makefile" (("-march=native") "") (("-march=armv8-a\\+simd") "")) (apply invoke "make" "libabpoa" make-flags))))))) (inputs (list jemalloc libdivsufsort odgi openmpi pybind11 python sdsl-lite zlib (list zstd "lib"))) (native-inputs (list pkg-config)) (home-page "https://github.com/ekg/smoothxg") (synopsis "Linearize and simplify variation graphs using blocked partial order alignment") (description "Pangenome graphs built from raw sets of alignments may have complex local structures generated by common patterns of genome variation. These local nonlinearities can introduce difficulty in downstream analyses, visualization, and interpretation of variation graphs. @command{smoothxg} finds blocks of paths that are collinear within a variation graph. It applies partial order alignment to each block, yielding an acyclic variation graph. Then, to yield a smoothed graph, it walks the original paths to lace these subgraphs together. The resulting graph only contains cyclic or inverting structures larger than the chosen block size, and is otherwise manifold linear. In addition to providing a linear structure to the graph, smoothxg can be used to extract the consensus pangenome graph by applying the heaviest bundle algorithm to each chain. To find blocks, smoothxg applies a greedy algorithm that assumes that the graph nodes are sorted according to their occurence in the graph's embedded paths. The path-guided stochastic gradient descent based 1D sort implemented in @command{odgi sort -Y} is designed to provide this kind of sort.") (properties `((tunable? . #t))) (license license:expat))) ;; TODO: Unbundle BBHash, parallel-hashmap, zstr (define-public graphaligner (package (name "graphaligner") (version "1.0.19") (source (origin (method url-fetch) (uri (string-append "https://github.com/maickrau/GraphAligner/files/" "14037134/GraphAligner.tar.gz")) (file-name (string-append name "-" version ".tar.gz")) (sha256 (base32 "1z1rxvl2pmiqbh670phkx7vma36w90ylp27wyadlbzf32pa2cpdn")))) (build-system gnu-build-system) (arguments (list #:tests? #f ; no tests #:make-flags #~(list (string-append "VERSION=" #$version)) #:phases #~(modify-phases %standard-phases (delete 'configure) ; no configure phase (add-after 'unpack 'patch-source (lambda* (#:key inputs #:allow-other-keys) (let ((concurrentqueue (assoc-ref inputs "concurrentqueue"))) (delete-file-recursively "concurrentqueue") (substitute* "makefile" (("-Iconcurrentqueue") (string-append "-I" concurrentqueue "/include/concurrentqueue")) (("^JEMALLOCFLAGS.*") "JEMALLOCFLAGS= `pkg-config --libs jemalloc`\n") ;; No need to build statically. (("-Wl,-Bstatic") "") (("-static-libstdc\\+\\+") ""))))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (for-each (lambda (program) (install-file program (string-append out "/bin"))) (find-files "bin")) (for-each (lambda (header) (install-file header (string-append out "/include"))) (find-files "src" "\\.h(pp)?$")))))))) (native-inputs (list (list jemalloc "bin") pkg-config sparsehash)) (inputs (list boost concurrentqueue jemalloc libdivsufsort mummer protobuf sdsl-lite zlib)) (home-page "https://github.com/maickrau/GraphAligner") (synopsis "Seed-and-extend program for aligning genome graphs") (description "Seed-and-extend program for aligning long error-prone reads to genome graphs. For a description of the bitvector alignment extension algorithm, see @url{https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btz162/5372677 here}.") (license license:expat))) (define-public mummer (package (name "mummer") (version "4.0.0rc1") (source (origin (method url-fetch) (uri (string-append "https://github.com/mummer4/mummer/releases/" "download/v" version "/mummer-" version ".tar.gz")) (sha256 (base32 "07bxw1vax1sai3g5xjn6sqngddlbnlabpqy373vw4fb55pdnl045")))) (build-system gnu-build-system) (arguments (list #:phases #~(modify-phases %standard-phases (add-after 'configure 'skip-test_md5-tests (lambda _ ;; There seems to be a bug with how these tests are called. (substitute* "Makefile" (("tests/mummer.sh") "") (("tests/nucmer.sh") "") (("tests/genome.sh") "") (("tests/sam.sh") ""))))))) (inputs (list gnuplot perl)) (home-page "http://mummer.sourceforge.net/") (synopsis "Efficient sequence alignment of full genomes") (description "MUMmer is a versatil alignment tool for DNA and protein sequences.") (license license:artistic2.0))) (define-public diagnostic-slider (let ((commit "514d65d4982133e4869e578c5553fced4c6d506c") (revision "1")) (package (name "diagnostic-slider") (version (git-version "0.0.0" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/sens/diagnostic-slider") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "04g8if32g8syg6v0bd3jjn05i3d394nx8i3ccl0883p8mlmdvlmx")))) (build-system trivial-build-system) (arguments `(#:modules ((guix build utils)) #:builder (begin (use-modules (guix build utils)) (let* ((out (assoc-ref %outputs "out")) (targetdir (string-append out "/share/" ,name)) (app (string-append out "/bin/" ,name)) (Rbin (string-append (assoc-ref %build-inputs "r-min") "/bin/Rscript")) (source (assoc-ref %build-inputs "source"))) (copy-recursively source targetdir) (mkdir-p (string-append out "/bin")) (call-with-output-file app (lambda (port) (format port "#!~a library(shiny) setwd(\"~a\") runApp(launch.browser=0, port=4206)~%\n" Rbin targetdir))) (chmod app #o555) #t)))) (native-inputs `(("source" ,source))) (inputs `(("r-min" ,r-minimal))) (propagated-inputs `(("r" ,r) ("r-shiny" ,r-shiny))) (home-page "https://github.com/sens/diagnostic-slider") (synopsis "") (description "") (license #f)))) (define-public clustalw (package (name "clustalw") (version "2.1") (source (origin (method url-fetch) (uri "http://www.clustal.org/download/current/clustalw-2.1.tar.gz") (file-name (string-append name "-" version ".tar.gz")) (sha256 (base32 "11llyj08liq0bg6vqan8728qjrbam3xhna2wd6g8rzdbhydhalp0")))) (arguments `(#:phases (modify-phases %standard-phases (add-after 'install 'post-install (lambda* (#:key inputs outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out"))) (rename-file (string-append out "/bin/clustalw2") (string-append out "/bin/clustalw")) )))))) (build-system gnu-build-system) (home-page "http://www.clustal.org/") (synopsis "") (description "") (license #f))) (define-public python-whatshap (package (name "python-whatshap") (version "1.1") (source (origin (method url-fetch) (uri (pypi-uri "whatshap" version)) (sha256 (base32 "0vxv6y8sg25yii106j6k55vc5z7n1l1y1nax49dgbarbrvk8cr2f")))) (build-system python-build-system) (inputs `(("python-biopython" ,python-biopython) ("python-networkx" ,python-networkx) ("python-pyfaidx" ,python-pyfaidx) ("python-pysam" ,python-pysam) ("python-scipy" ,python-scipy) ("python-xopen" ,python-xopen))) (native-inputs `(("python-cython" ,python-cython) ("python-setuptools-scm" ,python-setuptools-scm))) (home-page "https://github.com/whatshap/whatshap/") (synopsis "Read-based phasing of genomic variants") (description "WhatsHap is a software for phasing genomic variants using DNA sequencing reads, also called read-based phasing or haplotype assembly. It is especially suitable for long reads, but works also well with short reads.") (license license:expat))) (define-public python-scanpy-git (let ((commit "590d42309f9ed6550d7b887039990edfc1ac7648") ; April 22, 2020 (revision "1")) (package (inherit python-scanpy) (name "python-scanpy-git") (version (git-version "1.4.6" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/theislab/scanpy") (commit commit))) (file-name (git-file-name "python-scanpy" version)) (sha256 (base32 "0z3pk9vh4b7fqq7fs262i6z0pm1dnn6bf49a4r7r73k6gjj6namd")))) (arguments (substitute-keyword-arguments (package-arguments python-scanpy) ((#:phases phases) `(modify-phases ,phases (add-before 'build 'fix-build (lambda* (#:key inputs outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out")) (pyv (python-version (assoc-ref inputs "python")))) (substitute* "setup.py" (("use_scm_version=True") "use_scm_version=False")) (substitute* "scanpy/__init__.py" (("__version__.*") (string-append "__version__ = '" ,version "'\n"))) (mkdir-p (string-append out "/lib/python" pyv "/site-packages")) (setenv "PYTHONPATH" (string-append out "/lib/python" pyv "/site-packages/:" (getenv "PYTHONPATH")))) ;; These tests fail on this git revision (delete-file "scanpy/tests/test_neighbors_key_added.py") (delete-file "scanpy/tests/test_pca.py") #t))))))))) ;; TODO: Unbundle everything before upstreaming (define-public odgi (package (name "odgi") (version "0.9.0") (outputs '("out" "static")) (source (origin (method url-fetch) (uri (string-append "https://github.com/pangenome/odgi/releases" "/download/v" version "/odgi-v" version ".tar.gz")) (sha256 (base32 "0brg0sz45v1wv4ld3p4jwiab10nyp2f691zfwpiva6g6f71q3cbk")) (snippet #~(begin (use-modules (guix build utils)) (substitute* "CMakeLists.txt" (("-march=native") "") (("-msse4\\.2") "")))))) (build-system cmake-build-system) (arguments (list #:phases #~(modify-phases %standard-phases (add-after 'unpack 'use-gnuinstalldirs-macros (lambda _ (substitute* "CMakeLists.txt" (("project\\(odgi\\)" all) (string-append all "\ninclude(GNUInstallDirs)")) ;; This is different than the default. ;(("PUBLIC_HEADER DESTINATION include/odgi") ; "PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}") (("LIBRARY DESTINATION lib") "LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}") (("ARCHIVE DESTINATION lib") "ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}")))) (add-after 'unpack 'link-to-libodgi (lambda _ ;; This lets us provide libraries for different psABI levels. (substitute* "CMakeLists.txt" (("^ \\$.*") "") (("target_link_libraries\\(odgi " all) (string-append all "libodgi_shared "))))) (add-after 'install 'move-static-library (lambda* (#:key outputs #:allow-other-keys) (mkdir-p (string-append #$output:static "/lib")) (rename-file (string-append #$output "/lib/libodgi.a") (string-append #$output:static "/lib/libodgi.a"))))))) (native-inputs (list pkg-config)) (inputs (list jemalloc libdivsufsort openmpi pybind11 python sdsl-lite)) (home-page "https://github.com/vgteam/odgi") (synopsis "Optimized Dynamic Genome/Graph Implementation") (description "@acronym{Optimized Dynamic Genome/Graph Implementation, odgi} provides an efficient and succinct dynamic DNA sequence graph model, as well as a host of algorithms that allow the use of such graphs in bioinformatic analyses. Careful encoding of graph entities allows odgi to efficiently compute and transform pangenomes with minimal overheads. @command{odgi} implements a dynamic data structure that leveraged multi-core CPUs and can be updated on the fly. The edges and path steps are recorded as deltas between the current node id and the target node id, where the node id corresponds to the rank in the global array of nodes. Graphs built from biological data sets tend to have local partial order and, when sorted, the deltas be small. This allows them to be compressed with a variable length integer representation, resulting in a small in-memory footprint at the cost of packing and unpacking.") (properties '((tunable? . #t))) (license license:expat))) (define-public odgi-x86-64-v2 (package/inherit odgi (name "odgi-x86-64-v2") (arguments (substitute-keyword-arguments (package-arguments odgi) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v2" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v2" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v2")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-extra-files (lambda _ (delete-file-recursively (string-append #$output "/bin")) (delete-file-recursively (string-append #$output "/include")))) (replace 'move-static-library (lambda* (#:key outputs #:allow-other-keys) (let ((lib "/lib/glibc-hwcaps/x86-64-v2/libodgi.a")) (mkdir-p (dirname (string-append #$output:static lib))) (rename-file (string-append #$output lib) (string-append #$output:static lib))))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t))))) (define-public odgi-x86-64-v3 (package/inherit odgi (name "odgi-x86-64-v3") (arguments (substitute-keyword-arguments (package-arguments odgi) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v3" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v3" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v3")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-extra-files (lambda _ (delete-file-recursively (string-append #$output "/bin")) (delete-file-recursively (string-append #$output "/include")))) (replace 'move-static-library (lambda* (#:key outputs #:allow-other-keys) (let ((lib "/lib/glibc-hwcaps/x86-64-v3/libodgi.a")) (mkdir-p (dirname (string-append #$output:static lib))) (rename-file (string-append #$output lib) (string-append #$output:static lib))))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t))))) (define-public odgi-x86-64-v4 (package/inherit odgi (name "odgi-x86-64-v4") (arguments (substitute-keyword-arguments (package-arguments odgi) ((#:configure-flags flags #~'()) #~(append (list "-DEXTRA_FLAGS=-march=x86-64-v4" "-DCMAKE_INSTALL_LIBDIR=lib/glibc-hwcaps/x86-64-v4" (string-append "-DCMAKE_INSTALL_RPATH=" #$output "/lib/glibc-hwcaps/x86-64-v4")) #$flags)) ;; The building machine can't necessarily run the code produced. ((#:tests? _ #t) #f) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'remove-extra-files (lambda _ (delete-file-recursively (string-append #$output "/bin")) (delete-file-recursively (string-append #$output "/include")))) (replace 'move-static-library (lambda* (#:key outputs #:allow-other-keys) (let ((lib "/lib/glibc-hwcaps/x86-64-v4/libodgi.a")) (mkdir-p (dirname (string-append #$output:static lib))) (rename-file (string-append #$output lib) (string-append #$output:static lib))))))))) (supported-systems '("x86_64-linux")) (properties `((hidden? . #t))))) ;; This copy of odgi will automatically use the libraries that target the ;; x86_64 psABI which the hardware supports. (define-public odgi-hwcaps (package/inherit odgi (name "odgi-hwcaps") (arguments (substitute-keyword-arguments (package-arguments odgi) ((#:phases phases #~%standard-phases) #~(modify-phases #$phases (add-after 'install 'install-optimized-libraries (lambda* (#:key inputs outputs #:allow-other-keys) (let ((hwcaps "/lib/glibc-hwcaps")) (copy-recursively (string-append (assoc-ref inputs "odgi-x86-64-v2") hwcaps "/x86-64-v2") (string-append #$output hwcaps "/x86-64-v2")) (copy-recursively (string-append (assoc-ref inputs "odgi-x86-64-v3") hwcaps "/x86-64-v3") (string-append #$output hwcaps "/x86-64-v3")) (copy-recursively (string-append (assoc-ref inputs "odgi-x86-64-v4") hwcaps "/x86-64-v4") (string-append #$output hwcaps "/x86-64-v4"))))))))) (native-inputs (modify-inputs (package-native-inputs odgi) (append odgi-x86-64-v2 odgi-x86-64-v3 odgi-x86-64-v4))) (properties `((tunable? . #f))))) (define-public vg (package (name "vg") (version "1.50.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/vgteam/vg/releases/download/v" version "/vg-v" version ".tar.gz")) (sha256 (base32 "1n06fh6qvffhbxy7m096r8cy16wi0nm6gfgi3rsjy9zrb7g1jzhs")) (snippet #~(begin (use-modules (guix build utils)) (substitute* (find-files "." "(CMakeLists\\.txt|Makefile)") (("-march=native") "") (("-mtune=native") "") (("-msse4.2") "") (("-mcx16") "")))))) (build-system gnu-build-system) (arguments `(#:phases (modify-phases %standard-phases (delete 'configure) ; no configure script (add-after 'unpack 'patch-source (lambda* (#:key inputs #:allow-other-keys) ;; Most of these are so that we can skip bootstrapping some of the sources. (substitute* "Makefile" ;; PKG_CONFIG_DEPS needs to be substituted to actually link to everything. (("cairo libzstd") "cairo htslib libzstd libdw libelf protobuf raptor2 sdsl-lite tabixpp vcflib fastahack libdeflate") ;; Skip the part where we link static libraries special. It doesn't like the changes we make (("-Wl,-B.*") "\n") (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libtabixpp\\.a") "$(LIB_DIR)/libtabixpp.a") ((" \\$\\(LIB_DIR\\)/libtabixpp\\.a") (string-append " " (search-input-file inputs "/lib/libtabixpp.so"))) (("\\$\\(LIB_DIR\\)/pkgconfig/tabixpp\\.pc") (string-append " " (search-input-file inputs "/lib/pkgconfig/tabixpp.pc"))) (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libhts\\.a") "$(LIB_DIR)/libhts.a") ((" \\$\\(LIB_DIR\\)/libhts\\.a") (string-append " " (search-input-file inputs "/lib/libhts.so"))) (("\\$\\(LIB_DIR\\)/pkgconfig/htslib\\.pc") (string-append " " (search-input-file inputs "/lib/pkgconfig/htslib.pc"))) (("\\$\\(CWD\\)/\\$\\(LIB_DIR\\)/libdeflate\\.a") "$(LIB_DIR)/libdeflate.a") ((" \\$\\(LIB_DIR\\)/libdeflate\\.a") (string-append " " (search-input-file inputs "/lib/libdeflate.so"))) ((" \\$\\(LIB_DIR\\)/libvcflib.a") (string-append " " (search-input-file inputs "/lib/libvcflib.so"))) ((" \\$\\(BIN_DIR\\)/vcf2tsv") (string-append " " (search-input-file inputs "/bin/vcf2tsv"))) ((" \\$\\(FASTAHACK_DIR\\)/fastahack") (string-append " " (search-input-file inputs "/bin/fastahack"))) (("\\+= \\$\\(OBJ_DIR\\)/Fasta\\.o") (string-append "+= " (search-input-file inputs "/lib/libfastahack.so"))) ((" \\$\\(LIB_DIR\\)/libsnappy.a") (string-append " " (search-input-file inputs "/lib/libsnappy.so"))) ;; Only link against the libraries in the elfutils package. (("-ldwfl -ldw -ldwelf -lelf -lebl") "-ldw -lelf") ((" \\$\\(LIB_DIR\\)/libelf.a") (string-append " " (search-input-file inputs "/lib/libelf.so"))) ((" \\$\\(LIB_DIR\\)/libdw.a") (string-append " " (search-input-file inputs "/lib/libdw.so"))) ((" \\$\\(LIB_DIR\\)/%divsufsort.a") (string-append " " (dirname (search-input-file inputs "/lib/libdivsufsort.so")) "%divsufsort.so")) ((" \\$\\(LIB_DIR\\)/libdivsufsort.a") (string-append " " (search-input-file inputs "/lib/libdivsufsort.so"))) ((" \\$\\(LIB_DIR\\)/%divsufsort64.a") (string-append " " (dirname (search-input-file inputs "/lib/libdivsufsort64.so")) "%divsufsort64.so")) ((" \\$\\(LIB_DIR\\)/libdivsufsort64.a") (string-append " " (search-input-file inputs "/lib/libdivsufsort64.so"))) ((" \\$\\(LIB_DIR\\)/libjemalloc.a") (string-append " " (search-input-file inputs "/lib/libjemalloc.a"))) ((" \\$\\(INC_DIR\\)/sparsehash") (string-append " " (search-input-directory inputs "/include/sparsehash"))) ((" \\$\\(INC_DIR\\)/raptor2") (string-append " " (search-input-directory inputs "/include/raptor2"))) ((" \\$\\(LIB_DIR\\)/libraptor2.a") (string-append " " (search-input-file inputs "/lib/libraptor2.so"))) ((" \\$\\(BIN_DIR\\)/rapper") (string-append " " (search-input-file inputs "/bin/rapper")))))) (add-after 'unpack 'link-with-some-shared-libraries (lambda* (#:key inputs #:allow-other-keys) (substitute* '("deps/mmmultimap/CMakeLists.txt" "deps/xg/CMakeLists.txt" "deps/xg/deps/mmmulti/CMakeLists.txt") (("\".*libsdsl\\.a\"") "\"-lsdsl\"") (("\".*libdivsufsort\\.a\"") "\"-ldivsufsort\"") (("\".*libdivsufsort64\\.a\"") "\"-ldivsufsort64\"") (("\\$\\{sdsl-lite_INCLUDE\\}") (search-input-directory inputs "/include/sdsl")) (("\\$\\{sdsl-lite-divsufsort_INCLUDE\\}") (dirname (search-input-file inputs "/include/divsufsort.h")))))) (add-after 'unpack 'dont-build-shared-vgio (lambda _ ;; vg will link with libvgio and fail the 'validate-runpath phase. (substitute* "deps/libvgio/CMakeLists.txt" (("TARGETS vgio vgio_static") "TARGETS vgio_static")))) (add-after 'unpack 'fix-fastahack-dependency (lambda _ (substitute* (append (list "src/aligner.hpp" "src/vg.hpp") (find-files "deps/vcflib/src" "\\.cpp$")) (("Fasta.h") "fastahack/Fasta.h")) (substitute* '("deps/vcflib/src/Variant.h" "src/constructor.hpp" "src/index_registry.cpp") (("") "\"fastahack/Fasta.h\"")))) (add-after 'unpack 'adjust-tests (lambda* (#:key inputs #:allow-other-keys) (let ((bash-tap (assoc-ref inputs "bash-tap"))) (substitute* (find-files "test/t") (("BASH_TAP_ROOT.*") (string-append "BASH_TAP_ROOT=" bash-tap "/bin\n")) ((".*bash-tap-bootstrap") (string-append ". " bash-tap "/bin/bash-tap-bootstrap"))) (substitute* "test/t/02_vg_construct.t" (("../deps/fastahack/fastahack") (which "fastahack")) (("../bin/vcf2tsv") (which "vcf2tsv"))) ;; Lets skip the 9 failing tests for now. They fail with our ;; bash-tap and the bundled one. (substitute* "test/t/02_vg_construct.t" ((".*self-inconsistent.*") "is $(true) \"\" \"\"\n")) (substitute* "test/t/07_vg_map.t" ;; Change in fasta's output (("identity\\) 1 \"") "identity) 1.0 \"")) (substitute* '("test/t/07_vg_map.t" "test/t/33_vg_mpmap.t") ((".*node id.*") "is $(true) \"\" \"\"\n")) (substitute* "test/t/48_vg_convert.t" (("true \"vg.*") "true \"true\"\n")) (substitute* "test/t/50_vg_giraffe.t" ((".*A long read can.*") "is $(true) \"\" \"\"\n") ((".*A long read has.*") "is $(true) \"\" \"\"\n") ((".*Long read minimizer.*") "is $(true) \"\" \"\"\n")) ;; Don't test the docs, we're not providing npm (substitute* "Makefile" ((".*test-docs.*") ""))))) (add-after 'build 'build-manpages (lambda* (#:key inputs #:allow-other-keys) (when (assoc-ref inputs "asciidoctor") (invoke "make" "man")))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (install-file "bin/vg" (string-append out "/bin")) (for-each (lambda (file) (install-file file (string-append out "/share/man/man1"))) (find-files "doc/man" "\\.1$")))))) #:test-target "test")) (native-inputs (append (if (supported-package? ruby-asciidoctor) (list ruby-asciidoctor) '()) (list bash-tap bc cmake-minimal jq perl pkg-config samtools util-linux which xxd))) (inputs (list boost cairo curl elfutils fastahack htslib jansson jemalloc libdeflate libdivsufsort ncurses openmpi protobuf raptor2 sdsl-lite smithwaterman snappy sparsehash tabixpp vcflib zlib (list zstd "lib"))) (home-page "https://www.biostars.org/t/vg/") (synopsis "Tools for working with genome variation graphs") (description "Variation graphs provide a succinct encoding of the sequences of many genomes. A variation graph (in particular as implemented in vg) is composed of: @enumerate @item nodes, which are labeled by sequences and ids @item edges, which connect two nodes via either of their respective ends @item paths, describe genomes, sequence alignments, and annotations (such as gene models and transcripts) as walks through nodes connected by edges @end enumerate This model is similar to sequence graphs that have been used in assembly and multiple sequence alignment.") (properties `((release-monitoring-url . "https://github.com/vgteam/vg/releases") (tunable? . #t))) (license (list license:expat ; main program license:bsd-2 ; deps/xg/deps/ips4o license:bsd-3 ; deps/sparsepp, deps/sonLib/C/{impl,inc} license:asl2.0 ; deps/sonLib/externalTools/quicktree_1.1, deps/structures license:gpl3+ ; all sdsl-lite copies license:zlib ; deps/sonLib/externalTools/cutest license:boost1.0)))) ; catch.hpp (define-public pggb (let ((commit "9ebff27320382e470ed38a85b4448402e1e7c353") (revision "1")) (package (name "pggb") (version (git-version "0.5.1" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/pangenome/pggb") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "0rgpj52q3ai7f1saqbilgx5gz4f403x3427wq649qwv84ivmi1sf")))) (build-system copy-build-system) (arguments (list #:install-plan #~'(("pggb" "bin/") ("partition-before-pggb" "bin/") ("scripts/" "bin/") ("scripts" "bin/scripts")) #:phases #~(modify-phases %standard-phases (add-after 'unpack 'force-python3 (lambda _ (substitute* (find-files "scripts" "\\.py$") (("/usr/bin/python") "/usr/bin/python3")))) (add-before 'install 'patch-and-wrap-scripts (lambda* (#:key inputs #:allow-other-keys) (substitute* "scripts/vcf_preprocess.sh" (("bcftools ") (string-append (search-input-file inputs "/bin/bcftools") " "))) (wrap-script "scripts/net2communities.py" `("GUIX_PYTHONPATH" ":" prefix (,(getenv "GUIX_PYTHONPATH")))))) (add-after 'install 'wrap-scripts (lambda* (#:key inputs outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (for-each (lambda (file) (wrap-script file `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE"))) `("PATH" ":" prefix ,(map (lambda (input) (string-append input "/bin")) '#$(map (lambda (label) (or (this-package-input (string-append label "-hwcaps")) (this-package-input label))) (list "bc" "bcftools" "bedtools" "gfaffix" "htslib" "fastix" "multiqc" "mummer" "odgi" "pafplot" "parallel" "pigz" "python" "r-data-table" "r-minimal" "rtg-tools" "samtools" "seqwish" "smoothxg" "time" "vcfbub" "vcflib" "vg" "wfmash")))))) (list (string-append out "/bin/pggb") (string-append out "/bin/partition-before-pggb") (string-append out "/bin/gfa2evaluation.sh") (string-append out "/bin/scripts/gfa2evaluation.sh")))))) (add-after 'install 'substitute-file-paths (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (substitute* (string-append out "/bin/gfa2evaluation.sh") (("/usr/local/bin/vcf_preprocess.sh") (string-append out "/bin/vcf_preprocess.sh")) (("/usr/local/bin/nucmer2vcf.R") (string-append out "/bin/nucmer2vcf.R"))))))))) (inputs (list bc bcftools bedtools gfaffix guile-3.0 ; for wrap-script htslib ; tabix fastix multiqc mummer odgi pafplot parallel pigz python python-igraph r-data-table r-minimal rtg-tools samtools seqwish smoothxg time vcfbub vcflib vg wfmash)) (home-page "https://doi.org/10.1101/2023.04.05.535718") (synopsis "PanGenome Graph Builder") (description "@command{pggb} builds @url{https://doi.org/10.1146%2Fannurev-genom-120219-080406, pangenome} @url{https://doi.org/10.1038/nbt.4227, variation graphs} from a set of input sequences. A pangenome variation graph is a kind of generic multiple sequence alignment. It lets us understand any kind of sequence variation between a collection of genomes. It shows us similarity where genomes walk through the same parts of the graph, and differences where they do not. @command{pggb} generates this kind of graph using an all-to-all alignment of input sequences (@url{https://github.com/waveygang/wfmash, wfmash}), graph induction (@url{https://doi.org/10.1101/2022.02.14.480413, seqwish}), and progressive normalization (@url{https://github.com/pangenome/smoothxg, smoothxg}, @url{https://github.com/marschall-lab/GFAffix, gfaffix}). After construction, @command{pggb} generates diagnostic visualizations of the graph (@url{https://doi.org/10.1093/bioinformatics/btac308, odgi}). A variant call report (in VCF) representing both small and large variants can be generated based on any reference genome included in the graph (@url{https://github.com/vgteam/vg, vg}). @command{pggb} writes its output in @url{https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md, GFAv1} format, which can be used as input by numerous \"genome graph\" and pangenome tools, such as the @url{https://github.com/vgteam/vg, vg} and @url{https://doi.org/10.1093/bioinformatics/btac308, odgi} toolkits. @command{pggb} has been tested at scale in the @acronym{Human Pangenome Reference Consortium, HPRC} as a method to build a graph from the @url{https://doi.org/10.1101/2022.07.09.499321, draft human pangenome}.") (license license:expat)))) (define use-glibc-hwcaps (package-input-rewriting/spec ;; Replace some packages with ones built targeting custom packages build ;; with glibc-hwcaps support. `(;("gsl" . ,(const gsl-hwcaps)) ; Causes too many rebuilds through multiqc ("sdsl-lite" . ,(const sdsl-lite-hwcaps)) ("seqwish" . ,(const seqwish-hwcaps)) ("odgi" . ,(const odgi-hwcaps)) ("wfmash" . ,(const wfmash-hwcaps))))) (define-public pggb-with-hwcaps (package (inherit (use-glibc-hwcaps pggb)) (name "pggb-with-hwcaps"))) (define-public ucsc-genome-browser (package (name "ucsc-genome-browser") (version "413") (source (origin (method git-fetch) (uri (git-reference (url "https://genome-source.gi.ucsc.edu/kent.git/") (commit (string-append "v" version "_base")))) (file-name (git-file-name name version)) (sha256 (base32 "1qcjhd4wcajik71z5347fw2sfhfkv0p6y7yldrrkmycw2qhqmpzn")))) (build-system gnu-build-system) (arguments `(#:test-target "test" #:parallel-tests? #f ; not supported #:phases (modify-phases %standard-phases (delete 'configure) ; There is no configure phase. (add-before 'build 'pre-build (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) ;; Start by setting some variables. (chdir "src") (setenv "CC" ,(cc-for-target)) (setenv "HOME" (getcwd)) ;; And here we set the output directories (setenv "CGI_BIN" (string-append out "/cgi-bin")) (setenv "CGI_BIN_USER" (string-append out "/cgi-bin")) (setenv "DOCUMENTROOT" (string-append out "/html")) (setenv "DOCUMENTROOT_USER" (string-append out "/html")) (setenv "BINDIR" (string-append out "/bin")) ;; Now let's fix some errors: (mkdir-p (string-append out "/cgi-bin")) (substitute* "inc/cgi_build_rules.mk" (("rm -f.*") "")) (substitute* (cons* "inc/cgi_build_rules.mk" (find-files "." "makefile")) (("CGI_BIN\\}-\\$\\{USER") "CGI_BIN_USER")) ;; Force linking with freetype. (substitute* "inc/common.mk" (("libpng-config --ldflags") "pkg-config --libs libpng freetype2") (("libpng-config --I_opts") "pkg-config --cflags-only-I libpng freetype2") (("\\$\\{HG_INC\\}" hg_inc) (string-append hg_inc " -DUSE_FREETYPE"))) ;; Force the trash location. (substitute* (cons* "utils/qa/showTracks" "webBlat/webBlat.cfg" "hg/js/hgTracks.js" (find-files "." "\\.c$")) ;; This line is specifically needed as-is. (("\\.\\./trash") "/var/cache/genome")) #t))) (add-before 'check 'pre-check (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out")) (triplet ,(gnu-triplet->nix-system (%current-system)))) (setenv "PATH" (string-append (getenv "PATH") ":" out "/bin")) (setenv "MACHTYPE" (string-take triplet (string-index triplet #\-))) (for-each make-file-writable (find-files "utils/bedJoinTabOffset/tests")) (substitute* '("utils/bamToPsl/tests/makefile" "utils/trackDbIndexBb/tests/makefile") (("/cluster/bin/bedtools/bedtools") (which "bedtools"))) ;; These tests fail intermittently: (substitute* "utils/vcfFilter/tests/makefile" ((" testRenameNoGt ") " ") ((" testMinAc1NoGt ") " ")) ;; These tests can't find their database: (substitute* "hg/lib/tests/makefile" ((" spDbTest ") " ") ((" hdbTest ") " ")) (substitute* "hg/lib/tests/genePredTests.mk" ((" tableTests ") " ") ((" compatTblTests ") " ")) (substitute* "hg/lib/tests/pslReaderTests.mk" ((" tableTests") " ")) (substitute* "hg/lib/tests/makefile" ((" annoGratorTest ") " ") ((" customTrackTest ") " ") ((" hgvsTest") " ")) (substitute* "hg/autoSql/tests/makefile" ((" dbLinkTest ") " ") ((" symColsTest ") " ")) (delete-file "hg/checkTableCoords/tests/makefile") (delete-file "hg/hgGetAnn/tests/makefile") (substitute* "hg/sqlToXml/makefile" ((".*doTest.*") "")) (substitute* "hg/utils/genePredFilter/tests/makefile" ((" gencodeHackDbTest") " ")) (substitute* "hg/utils/refSeqGet/tests/makefile" (("^test::.*") "test:: mkout\n")) (delete-file "hg/utils/vcfToHgvs/tests/makefile") (substitute* "hg/bedItemOverlapCount/tests/makefile" ((".*RunTest.*") "")) ;; Depends on /cluster (substitute* "hg/liftOver/tests/makefile" (("^test:.*") "test: mkdirs scaffoldEndBug\n")) (delete-file "hg/mouseStuff/netToAxt/tests/makefile") (substitute* "hg/pslToChain/tests/makefile" ((" example1 ") " ")) ;; Depends on /gbdb (delete-file "hg/mouseStuff/chainBridge/tests/makefile") ;; Depends on /hive (delete-file "hg/utils/genePredToProt/tests/makefile") ;; Unclear why this fails (delete-file "hg/utils/hgvsToVcf/tests/makefile") ;; Wants network (substitute* "hg/utils/hubCheck/tests/makefile" (("^test::.*") "test:: one two\n")) #t))) ;; Install happens during the 'build phase. ;; Install the website files too (replace 'install (lambda _ (invoke "make" "doc-install") #t)) (add-after 'install 'create-hg-conf (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (with-output-to-file (string-append out "/cgi-bin/hg.conf") (lambda () (display "include /var/lib/genome/hg.conf\n"))) #t))) (add-after 'install 'create-symlink (lambda* (#:key inputs #:allow-other-keys) (mkdir-p "htdocs") ;; Fallback location for fonts. (symlink (string-append (assoc-ref inputs "gs-fonts") "/share/fonts/type1/ghostscript") "htdocs/urw-fonts") #t))))) (inputs `(("freetype" ,freetype) ("libpng" ,libpng) ("mysql:dev" ,mariadb "dev") ("mysql:lib" ,mariadb "lib") ("openssl" ,openssl) ("perl" ,perl) ("python2" ,python-2) ("zlib" ,zlib))) (native-inputs `(("bedtools" ,bedtools) ("gs-fonts" ,gs-fonts) ("pkg-config" ,pkg-config) ("python" ,python) ("rsync" ,rsync) ; For installing js files from the source checkout ("tcl" ,tcl) ("tcsh" ,tcsh) ("util-linux:lib" ,util-linux "lib") ("which" ,(@ (gnu packages base) which)))) (home-page "https://www.genome.ucsc.edu/") (synopsis "Structural variants detector for next-gen sequencing data") (description "The UCSC Genome Browser provides a rapid and reliable display of any requested portion of genomes at any scale, together with dozens of aligned annotation tracks (known genes, predicted genes, ESTs, mRNAs, CpG islands, assembly gaps and coverage, chromosomal bands, mouse homologies, and more). Half of the annotation tracks are computed at UCSC from publicly available sequence data. The remaining tracks are provided by collaborators worldwide. Users can also add their own custom tracks to the browser for educational or research purposes. The Genome Browser stacks annotation tracks beneath genome coordinate positions, allowing rapid visual correlation of different types of information. The user can look at a whole chromosome to get a feel for gene density, open a specific cytogenetic band to see a positionally mapped disease gene candidate, or zoom in to a particular gene to view its spliced ESTs and possible alternative splicing. The Genome Browser itself does not draw conclusions; rather, it collates all relevant information in one location, leaving the exploration and interpretation to the user.") (license (list ;; license:bsd-0 ; kent/src/{utils,lib,inc,tabStorm,parasol,hg/ausoSql,hg/autoXml} license:bsd-3 ; these two for bundled htslib-1.3 license:expat (license:non-copyleft "https://www.genome.ucsc.edu/license/" "Free for academic/non-profit/personal use only.") (license:non-copyleft ; Blat, In-Silico PCR "http://www.kentinformatics.com/index.html" "Free for universities and non-profit institutions."))))) (define-public bam2fastx (package (name "bam2fastx") (version "1.3.1") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/PacificBiosciences/bam2fastx") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 "0pyfmvh874w29kaq6gbb1bd86135qs2jc4f8giw98kxw1b2gjdh0")))) (build-system meson-build-system) (arguments `(#:configure-flags '("-Dtests=true"))) (inputs `(("boost" ,boost) ;("htslib" ,htslib) ("pbbam" ,pbbam) ("pbcopper" ,pbcopper) ("zlib" ,zlib))) (native-inputs `(("pkg-config" ,pkg-config) ("python-cram" ,python-cram) ("python-wrapper" ,python-wrapper))) (home-page "https://github.com/PacificBiosciences/bam2fastx") (synopsis "Converting and demultiplexing of PacBio BAM files into gzipped fasta and fastq files") (description "Conversion of PacBio BAM files into gzipped fasta and fastq files, including splitting of barcoded data.") (license license:bsd-3))) (define-public pbbam-1 (package (name "pbbam") (version "1.6.0") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/PacificBiosciences/pbbam") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "1z3sh9cmrap37ijrm0cv85j92r1xkq6kba2j10mrr4fv7fc9zzfb")))) (build-system meson-build-system) ;; These libraries are listed as "Required" in the pkg-config file. (propagated-inputs `(("htslib" ,htslib) ("pbcopper" ,pbcopper) ("zlib" ,zlib))) (inputs `(("boost" ,boost) ("samtools" ,samtools))) (native-inputs `(("cram" ,python-cram) ("googletest" ,googletest) ("pkg-config" ,pkg-config) ("python" ,python-wrapper))) ; for tests (home-page "https://github.com/PacificBiosciences/pbbam") (synopsis "Work with PacBio BAM files") (description "The pbbam software package provides components to create, query, and edit PacBio BAM files and associated indices. These components include a core C++ library, bindings for additional languages, and command-line utilities. This library is not intended to be used as a general-purpose BAM utility - all input and output BAMs must adhere to the PacBio BAM format specification. Non-PacBio BAMs will cause exceptions to be thrown.") (license license:bsd-3))) (define-public pbcopper (package (name "pbcopper") (version "1.9.1") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/PacificBiosciences/pbcopper") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "1pphklil5kn1ds796ch41bgvdf7yq03z6w5rgi572s8xg8k5b0xn")))) (build-system meson-build-system) (arguments `(#:phases (modify-phases %standard-phases (add-after 'unpack 'patch-source (lambda _ (substitute* "meson.build" ;; uncomment when upstreaming ;(("sse4\\.1") "nosse4.1") (("v8\\.2-a") "v8-a")) #t))))) (inputs `(("boost" ,boost))) (native-inputs `(("googletest" ,googletest))) (home-page "https://github.com/PacificBiosciences/pbcopper") (synopsis "Data structures, algorithms, and utilities for C++ applications") (description "The pbcopper library provides a suite of data structures, algorithms, and utilities for PacBio C++ applications.") (license license:bsd-3))) (define-public pbmm2 (package (name "pbmm2") (version "1.4.0") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/PacificBiosciences/pbmm2") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "0c01c647c7wvq5jzkf68xsf0bn8mlyw0hbz2fiyirxg7hj05jyac")))) (build-system meson-build-system) (arguments `(#:tests? #f)) ; TODO: Fix later. (inputs `(("boost" ,boost) ("htslib" ,htslib) ("minimap2" ,minimap2-for-pbmm2) ("pbbam" ,pbbam-1) ("pbcopper" ,pbcopper))) (native-inputs `(("cram" ,python-cram) ("googletest" ,googletest) ("pkg-config" ,pkg-config) ("samtools" ,samtools) ("util-linux" ,util-linux) ("zlib" ,zlib))) (home-page "https://github.com/PacificBiosciences/pbmm2") (synopsis "minimap2 frontend for PacBio native data formats") (description "pbmm2 is a SMRT C++ wrapper for minimap2's C API. Its purpose is to support native PacBio in- and output, provide sets of recommended parameters, generate sorted output on-the-fly, and postprocess alignments. Sorted output can be used directly for polishing using GenomicConsensus, if BAM has been used as input to pbmm2. Benchmarks show that pbmm2 outperforms BLASR in sequence identity, number of mapped bases, and especially runtime. pbmm2 is the official replacement for BLASR.") (license license:bsd-3))) (define minimap2-for-pbmm2 (package (name "minimap2") (version "2.17") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/pacificbiosciences/minimap2") (commit (string-append version "-meson")))) (file-name (git-file-name "minimap2-for-pbmm2" version)) (sha256 (base32 "1833y6xdcblz7k4fyclryd6lwibsisp4svp2mk9w6ivk64icl6jq")))) (build-system meson-build-system) (inputs `(("zlib" ,zlib))) (native-inputs `(("pkg-config" ,pkg-config))) (home-page "https://lh3.github.io/minimap2/") (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences") (description "Minimap2 is a versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database. Typical use cases include: @enumerate @item mapping PacBio or Oxford Nanopore genomic reads to the human genome; @item finding overlaps between long reads with error rate up to ~15%; @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA reads against a reference genome; @item aligning Illumina single- or paired-end reads; @item assembly-to-assembly alignment; @item full-genome alignment between two closely related species with divergence below ~15%. @end enumerate\n") (license license:expat))) ;; 0.8.9 is the last version which supports python2. (define-public python2-bx-python (let ((base (python2-package python-bx-python))) (package (inherit base) (name "python2-bx-python") (version "0.8.9") (source (origin ;; No cythonized files in the git repository. (method git-fetch) (uri (git-reference (url "https://github.com/bxlab/bx-python") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "0bsqnw8rv08586wksvx2a8dawvhyzvz5pzsh9y3217b6wxq98dnq")))) (propagated-inputs (modify-inputs (package-propagated-inputs base) (append python2-six)))))) (define-public hap.py (package (name "hap.py") (version "0.3.14") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/Illumina/hap.py") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "1bnm7s86651p3zf6wahz5pic7n8416fx677kj47lwckr3syp2x1h")) (modules '((guix build utils))) (snippet '(begin (delete-file-recursively "external/bcftools.tar.gz") (delete-file-recursively "external/boost_subset_1_58_0.tar.bz2") (delete-file-recursively "external/htslib.tar.gz") ;; TODO: Unbundle jsoncpp. ;(delete-file-recursively "external/jsoncpp") ;(delete-file-recursively "external/klib") (delete-file-recursively "external/samtools.tar.gz") (delete-file-recursively "external/virtualenv-12.0.7.tar.gz") (delete-file-recursively "external/zlib-1.2.8.tar.gz") #t)))) (build-system cmake-build-system) (arguments `(#:configure-flags (list "-DBUILD_VCFEVAL=ON") #:phases (modify-phases %standard-phases (add-after 'unpack 'set-package-version (lambda _ (substitute* "CMakeLists.txt" (("git describe --tags --always") (string-append "echo " ,version))) #t)) ;; A shared library conflicts with boost-static. ;; Not using boost-static causes linking errors. ;(add-after 'unpack 'build-dynamic-library ; (lambda _ ; (substitute* "src/c++/lib/CMakeLists.txt" ; (("STATIC") "SHARED")) ; #t)) (add-after 'unpack 'fix-build (lambda* (#:key inputs #:allow-other-keys) (let ((zlib (assoc-ref inputs "zlib")) (bcftools (assoc-ref inputs "bcftools")) (boost (assoc-ref inputs "boost")) (htslib (assoc-ref inputs "htslib")) (samtools (assoc-ref inputs "samtools"))) (mkdir-p "external/bin") (mkdir-p "external/lib") (mkdir-p "external/include") (mkdir-p "external/scratch/lib") (substitute* "external/make_dependencies.sh" (("zlib-1\\.2\\.8/libz\\.a") "lib/libz.so")) (substitute* "src/cmake/FindHTSLib.cmake" (("libhts\\.a") "libhts.so")) (substitute* "CMakeLists.txt" (("ZLIB_LIBRARIES .*\\)") (string-append "ZLIB_LIBRARIES \"" zlib "/lib/libz.so\")"))) (setenv "BOOST_ROOT" boost) (setenv "LDFLAGS" (string-append "-L" (assoc-ref %build-inputs "htslib") "/lib")) (symlink (string-append zlib "/lib/libz.so") "external/scratch/lib/libz.so") (symlink (string-append htslib "/include/htslib") "external/include/htslib") (symlink (string-append htslib "/lib/libhts.so") "external/lib/libhts.so") (symlink (string-append htslib "/lib/libhts.so") "external/lib/libhts.so.3") (symlink (string-append bcftools "/bin/bcftools") "external/bin/bcftools") (symlink (string-append samtools "/bin/samtools") "external/bin/samtools") #t))) (add-after 'fix-build 'insert-rtg-tools (lambda* (#:key inputs #:allow-other-keys) (let ((rtg-tools (assoc-ref inputs "rtg-tools")) (dest "external/libexec/rtg-tools-install")) (mkdir-p (dirname dest)) (symlink rtg-tools dest) #t))) (replace 'configure (lambda* (#:key outputs (configure-flags '()) (out-of-source? #t) build-type target #:allow-other-keys) "Configure the given package." (let* ((out (assoc-ref outputs "out")) (abs-srcdir (getcwd)) (srcdir (if out-of-source? (string-append "../" (basename abs-srcdir)) "."))) (format #t "source directory: ~s (relative from build: ~s)~%" abs-srcdir srcdir) (when out-of-source? (mkdir "../build") ;; Extra code added here!! (copy-recursively "external/scratch" "../build/scratch") (copy-recursively "external/libexec" "../build/libexec") (copy-recursively "external/lib" "../build/lib") (copy-recursively "external/bin" "../build/bin") (copy-recursively "external/include" "../build/include") (chdir "../build")) (format #t "build directory: ~s~%" (getcwd)) (let ((args `(,srcdir ,@(if build-type (list (string-append "-DCMAKE_BUILD_TYPE=" build-type)) '()) ,(string-append "-DCMAKE_INSTALL_PREFIX=" out) ;; ensure that the libraries are installed into /lib "-DCMAKE_INSTALL_LIBDIR=lib" ;; add input libraries to rpath "-DCMAKE_INSTALL_RPATH_USE_LINK_PATH=TRUE" ;; add (other) libraries of the project itself to rpath ,(string-append "-DCMAKE_INSTALL_RPATH=" out "/lib") ;; enable verbose output from builds "-DCMAKE_VERBOSE_MAKEFILE=ON" ;; Cross-build ,@(if target (list (string-append "-DCMAKE_C_COMPILER=" target "-gcc") (string-append "-DCMAKE_CXX_COMPILER=" target "-g++") (if (string-contains target "mingw") "-DCMAKE_SYSTEM_NAME=Windows" "-DCMAKE_SYSTEM_NAME=Linux")) '()) ,@configure-flags))) (format #t "running 'cmake' with arguments ~s~%" args) (apply invoke "cmake" args))))) (replace 'check (lambda* (#:key tests? #:allow-other-keys) (when tests? (invoke "./bin/test_haplotypes")) #t)) (add-before 'install 'remove-extra-files (lambda _ (delete-file "bin/bcftools") (delete-file "bin/samtools") (delete-file "bin/test_haplotypes") (delete-file "lib/libhts.so") (delete-file "lib/libhts.so.3") #t)) (add-after 'install 'wrap-programs (lambda* (#:key inputs outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out")) (bcftools (assoc-ref inputs "bcftools")) (samtools (assoc-ref inputs "samtools"))) (for-each (lambda (file) (wrap-script file `("GUIX_PYTHONPATH" ":" prefix (,(getenv "GUIX_PYTHONPATH"))) `("PATH" ":" prefix (,(string-append bcftools "/bin") ,(string-append samtools "/bin"))))) (find-files (string-append out "/bin") "\\.py$")) #t)))))) (inputs `(("bcftools" ,bcftools) ("boost" ,boost-static) ; has to be boost-static ("guile" ,guile-3.0) ; for wrap-script ("htslib" ,htslib) ;; The software specifically states python-2. ("python" ,python-2) ("python2-bx-python" ,python2-bx-python) ("python2-numpy" ,python2-numpy) ("python2-pandas" ,python2-pandas) ("python2-pysam" ,(python2-package python-pysam)) ("python2-scipy" ,python2-scipy) ("rtg-tools" ,rtg-tools) ("samtools" ,samtools) ("zlib" ,zlib))) (home-page "https://github.com/Illumina/hap.py") (synopsis "Haplotype VCF comparison tools") (description "This is a set of programs based on htslib to benchmark variant calls against gold standard truth datasets. The main two tools are @code{hap.py} (diploid precision/recall evaluation) and @code{som.py} (somatic precision/recall evaluation -- this ignores the GT and just checks for presence of alleles). Other tools are @code{qfy.py} (which just executes the quantification step of the analysis pipeline, this requires a GA4GH-intermediate VCF file), and @code{pre.py}, which is @code{hap.py}'s input cleaning and variant normalisation step. To run the bundled rtg-tools software you will also need java. The @code{icedtea:jdk} output should work nicely.") (license (list license:expat ; bundled jsoncpp, klib license:bsd-2)))) ;; TODO: ;; Unbundle gatb-core. (define-public minia (package (name "minia") (version "3.2.6") (source (origin (method url-fetch) (uri (string-append "https://github.com/GATB/minia/releases" "/download/v" version "/minia-v" version "-Source.tar.gz")) (sha256 (base32 "03zg1jh0yjw7546kax8xs0zwiqhaiqz044409jc3ss6nj968ay70")))) (build-system cmake-build-system) (arguments `(#:tests? #f ; Tests are expected to be run manually. #:configure-flags '("-DNO_SSE=ON") ; Can be removed after unbundling gatb-core. #:phases (modify-phases %standard-phases (add-after 'install 'remove-cruft (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (with-directory-excursion out (delete-file-recursively "lib") (delete-file-recursively "test") (delete-file "bin/h5cc") (delete-file "LICENSE") (delete-file "README.md") #t))))))) (inputs `(("zlib" ,zlib))) (home-page "https://gatb.inria.fr/software/minia") (synopsis "Short-read assembler based on a de Bruijn graph") (description "Minia is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day. The output of Minia is a set of contigs. Back when it was released, Minia produced results of similar contiguity and accuracy to other de Bruijn assemblers (e.g. Velvet).") (license license:agpl3+))) (define-public metaeuk (package (name "metaeuk") (version "5-34c21f2") ; As seen upstream. (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/soedinglab/metaeuk") (commit version) (recursive? #t))) ; Only contains the tests. (file-name (git-file-name name version)) (sha256 (base32 "0pqiqy3wycn9h3y699b5drd3y4zmz087bwgdxx6wbbqqipa6wk0j")) (modules '((guix build utils))) (snippet '(begin ;(delete-file-recursively "lib/mmseqs/lib/gzstream") (delete-file-recursively "lib/mmseqs/lib/simde") (delete-file-recursively "lib/mmseqs/lib/xxhash") (delete-file-recursively "lib/mmseqs/lib/zstd"))))) (build-system cmake-build-system) (arguments `(#:configure-flags '("-DUSE_SYSTEM_ZSTD=YES") #:substitutable? #f ; We want the native build. #:phases (modify-phases %standard-phases (add-after 'unpack 'use-shared-libraries (lambda* (#:key inputs #:allow-other-keys) (substitute* "lib/mmseqs/CMakeLists.txt" (("libzstd\\.a") "libzstd.so") (("libzstd_static") "libzstd_shared") ;(("lib/gzstream") ; (string-append (assoc-ref inputs "gzstream") "/include")) (("lib/xxhash") (string-append (assoc-ref inputs "xxhash") "/include")) (("lib/simde") (string-append (assoc-ref inputs "simde") "/include/simde"))) #t)) (replace 'check (lambda* (#:key tests? #:allow-other-keys) (when tests? (with-directory-excursion "../source/tests" (invoke "./run.sh" "../../build/src/metaeuk")))))))) (inputs `(("bzip2" ,bzip2) ("zlib" ,zlib) ("zstd:lib" ,zstd "lib"))) (native-inputs `(;("gzstream" ,gzstream) ("perl" ,perl) ("simde" ,simde) ("xxd" ,xxd) ("xxhash" ,xxhash))) (home-page "https://github.com/soedinglab/metaeuk") (synopsis "Gene discovery and annotation for large-scale eukaryotic metagenomics") (description "MetaEuk is a modular toolkit designed for large-scale gene discovery and annotation in eukaryotic metagenomic contigs. MetaEuk combines the fast and sensitive homology search capabilities of @url{https://github.com/soedinglab/MMseqs2, MMseqs2} with a dynamic programming procedure to recover optimal exons sets. It reduces redundancies in multiple discoveries of the same gene and resolves conflicting gene predictions on the same strand.") (license license:gpl3))) (define-public augustus (package (name "augustus") (version "3.4.0") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/Gaius-Augustus/Augustus") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "1nc4nddcxi98fb14vmgj7x5aw5vglm4amzraqibgzmigpqnca68f")))) (build-system gnu-build-system) (arguments `(#:make-flags (list (string-append "CC=" ,(cc-for-target)) (string-append "CXX=" ,(cxx-for-target))) #:phases (modify-phases %standard-phases (delete 'configure) ; No configure script (add-after 'unpack 'adjust-sources (lambda* (#:key inputs #:allow-other-keys) (substitute* "common.mk" ;; Has the wrong version. (("AUGVERSION = .*") (string-append "AUGVERSION = " ,version "\n")) ;; Looks for ancient version of mysql. (("COMPGENEPRED = ") "SQLITE=true\nMYSQL = false\nCOMPGENEPRED = ")) (substitute* "src/Makefile" (("/usr/include/lpsolve") (string-append (assoc-ref inputs "lpsolve") "/include/lpsolve"))) (substitute* (find-files "auxprogs" "Makefile") (("/usr/include/bamtools") (string-append (assoc-ref inputs "bamtools") "/include/bamtools")) (("/usr/include/htslib") (string-append (assoc-ref inputs "htslib") "/include/htslib")) (("/usr/include/boost") (string-append (assoc-ref inputs "boost") "/include/boost"))) #t)) (replace 'check (lambda args ;; These tests rely on mysql. ;(apply (assoc-ref %standard-phases 'check) ; `(,@args #:test-target "unit_test")) (apply (assoc-ref %standard-phases 'check) `(,@args #:test-target "test")))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (bin (string-append out "/bin")) (share (string-append out "/share/augstus")) (scripts (string-append share "/scripts"))) ;; Install targets taken from Debian. (install-file "auxprogs/bam2wig/bam2wig" bin) (install-file "auxprogs/compileSpliceCands/compileSpliceCands" bin) (install-file "auxprogs/homGeneMapping/src/homGeneMapping" bin) (install-file "auxprogs/joingenes/joingenes" bin) (mkdir-p scripts) (copy-recursively "scripts" scripts) (copy-recursively "config" share) (with-directory-excursion "scripts" (for-each delete-file (cons "executeTestCGP.py" (find-files "." "\\.txt$")))) (for-each make-file-writable (find-files out "\\.gz$")) #t)))))) (inputs `(("boost" ,boost) ("htslib" ,htslib) ("perl" ,perl) ("python" ,python) ("sqlite" ,sqlite) ("zlib" ,zlib))) (native-inputs `(("bamtools" ,bamtools) ("gsl" ,gsl) ("gzip" ,gzip) ("lpsolve" ,lpsolve) ("samtools" ,samtools) ("suitesparse" ,suitesparse))) (home-page "http://bioinf.uni-greifswald.de/webaugustus/") (synopsis "Genome annotation with AUGUSTUS") (description "Augustus can be used as an ab initio program, which means it bases its prediction purely on the sequence. AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic sources such as EST, MS/MS, protein alignments and syntenic genomic alignments.") (license (license:non-copyleft "https://opensource.org/licenses/artistic-license-1.0" "Artistic-license-1.0")))) (define-public pplacer (let ((commit "807f6f3")) (build-with-ocaml4.07 (package (name "pplacer") ;; The commit should be updated with each version change. (version "1.1.alpha19") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/matsen/pplacer") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn")))) (build-system ocaml-build-system) (arguments (list #:modules '((guix build ocaml-build-system) (guix build utils) (ice-9 ftw)) #:phases #~(modify-phases %standard-phases (delete 'configure) (add-after 'unpack 'fix-build-with-latest-ocaml (lambda _ (substitute* "myocamlbuild.ml" (("dep \\[\"c_pam\"\\]" m) (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n" m)) (("let run_and_read" m) (string-append " let split s ch = let x = ref [] in let rec go s = let pos = String.index s ch in x := (String.before s pos)::!x; go (String.after s (pos + 1)) in try go s with Not_found -> !x let split_nl s = split s '\\n' let before_space s = try String.before s (String.index s ' ') with Not_found -> s " m)) (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m) (string-append "List.map before_space (split_nl & " m ")")) ((" blank_sep_strings &") "") ((" Lexing.from_string &") "")))) (add-after 'unpack 'replace-bundled-cddlib (lambda* (#:key inputs #:allow-other-keys) (let* ((cddlib-src (assoc-ref inputs "cddlib-src")) (local-dir "cddlib_guix")) (mkdir local-dir) (with-directory-excursion local-dir (invoke "tar" "xvf" cddlib-src)) (let ((cddlib-src-folder (string-append local-dir "/" (list-ref (scandir local-dir) 2) "/lib-src"))) (for-each make-file-writable (find-files "cdd_src" ".*")) (for-each (lambda (file) (copy-file file (string-append "cdd_src/" (basename file)))) (find-files cddlib-src-folder ".*[ch]$")))))) (add-after 'unpack 'fix-makefile (lambda _ ;; Remove system calls to 'git'. (substitute* "Makefile" (("^DESCRIPT:=pplacer-.*") (string-append "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n"))) (substitute* "myocamlbuild.ml" (("git describe --tags --long .*\\\" with") (string-append "echo -n v" ,version "-" ,commit "\" with"))))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (bin (string-append out "/bin"))) (copy-recursively "bin" bin))))) #:ocaml ocaml-4.07 #:findlib ocaml4.07-findlib)) (inputs (list (list zlib "static") (list gsl "static") (package-with-ocaml4.07 ocaml-ounit) (package-with-ocaml4.07 ocaml-batteries) (package-with-ocaml4.07 camlzip) (package-with-ocaml4.07 ocaml-csv) (package-with-ocaml4.07 ocaml-sqlite3) (package-with-ocaml4.07 ocaml-xmlm) (package-with-ocaml4.07 ocaml-mcl) ocaml4.07-gsl-1 (list sqlite "static"))) (native-inputs (list (package-source cddlib) (package-with-ocaml4.07 ocamlbuild) pkg-config)) (propagated-inputs (list pplacer-scripts)) (synopsis "Phylogenetic placement of biological sequences") (description "Pplacer places query sequences on a fixed reference phylogenetic tree to maximize phylogenetic likelihood or posterior probability according to a reference alignment. Pplacer is designed to be fast, to give useful information about uncertainty, and to offer advanced visualization and downstream analysis.") (home-page "https://matsen.fhcrc.org/pplacer/") (license license:gpl3))))) (define-public python2-biopython (python2-package python-biopython)) ;; This package is installed alongside 'pplacer'. It is a separate package so ;; that it can use the python-build-system for the scripts that are ;; distributed alongside the main OCaml binaries. (define pplacer-scripts (package (inherit pplacer) (name "pplacer-scripts") (build-system python-build-system) (arguments `(#:python ,python-2 #:phases (modify-phases %standard-phases (add-after 'unpack 'enter-scripts-dir (lambda _ (chdir "scripts"))) (replace 'check (lambda* (#:key tests? #:allow-other-keys) (when tests? (invoke "python" "-m" "unittest" "discover" "-v")))) (add-after 'install 'wrap-executables (lambda* (#:key inputs outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (bin (string-append out "/bin")) (path (string-append (assoc-ref inputs "hmmer") "/bin:" (assoc-ref inputs "infernal") "/bin"))) (display path) (wrap-program (string-append bin "/refpkg_align.py") `("PATH" ":" prefix (,path))) (wrap-program (string-append bin "/hrefpkg_query.py") `("PATH" ":" prefix (,path))))))))) (inputs `(("infernal" ,infernal) ("hmmer" ,hmmer))) (propagated-inputs `(("python-biopython" ,python2-biopython) ("taxtastic" ,taxtastic))) (synopsis "Pplacer Python scripts"))) (define-public sepp (package (name "sepp") (version "4.5.1") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/smirarab/sepp") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 "1bw5gjhymq0a4slkk7pr5dl4jb9bnwv4qpn26mvwp8fx3aszvmij")) (modules '((guix build utils))) (snippet '(begin (delete-file-recursively "tools/bundled") (mkdir-p "tools/bundled/Linux") (for-each (lambda (file) (with-output-to-file (string-append "tools/bundled/Linux/" file) (lambda _ (format #t "")))) '("guppy-32" "guppy-64" "hmmalign-32" "hmmalign-64" "hmmbuild-32" "hmmbuild-64" "hmmsearch-32" "hmmsearch-64" "pplacer-32" "pplacer-64")) ;; TODO: Rebuild java blob. ;(delete-file-recursively "tools/merge/lib") ;(delete-file "tools/merge/seppJsonMerger.jar") ;; This is a circular reference to the top directory ... (delete-file-recursively "sepp-package/sepp"))))) (build-system python-build-system) (arguments `(#:tests? #f ; Test suite hangs. #:phases (modify-phases %standard-phases (add-before 'check 'pre-check (lambda* (#:key tests? inputs outputs #:allow-other-keys) (let ((hmmer (string-append (assoc-ref inputs "hmmer") "/bin/")) (pplacer (string-append (assoc-ref inputs "pplacer") "/bin/")) (tools "tools/bundled/Linux/")) (for-each (lambda (target package) (delete-file (string-append tools target)) (symlink (string-append package (string-drop-right target 3)) (string-append tools target))) (list "guppy-32" "guppy-64" "hmmalign-32" "hmmalign-64" "hmmbuild-32" "hmmbuild-64" "hmmsearch-32" "hmmsearch-64" "pplacer-32" "pplacer-64") (list pplacer pplacer hmmer hmmer hmmer hmmer hmmer hmmer pplacer pplacer)) #t))) (replace 'check (lambda* (#:key tests? inputs outputs #:allow-other-keys) (when tests? (add-installed-pythonpath inputs outputs) ;; This test is upset we removed the Darwin binary. (delete-file "test/unittest/testConfig.py") ;; This test is missing its config file. (delete-file "test/unittest/testUPP.py") (invoke "nosetests" "-w" "test/unittest")))) (replace 'build (lambda* (#:key outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (home.path (string-append out "/share/sepp"))) (setenv "HOME" home.path) (mkdir-p (string-append home.path "/.sepp")) ;; configure with '-c' so our pretend bundled ;; libraries aren't actually installed. (invoke "python" "setup.py" "config" "-c") #t))) (add-after 'install 'copy-home.path (lambda* (#:key inputs outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (home.path (string-append out "/share/sepp"))) (with-output-to-file (string-append out "/home.path") (lambda _ (format #t "~a/.sepp" home.path))) (with-output-to-file (string-append home.path "/.sepp/main.config") (lambda _ (format #t "[pplacer]~@ path=~a/bin/pplacer~@ ~@ [hmmalign]~@ path=~a/bin/hmmalign~@ ~@ [hmmsearch]~@ path=~a/bin/hmmsearch~@ piped=False~@ elim=10000~@ filters=True~@ ~@ [hmmbuild]~@ path=~a/bin/hmmbuild~@ ~@ [jsonmerger]~@ path=~a/share/sepp/seppJsonMerger.jar~@ ~@ [exhaustive]~@ strategy = centroid~@ minsubsetsize = 2~@ placementminsubsetsizefacotr = 4~@ placer = pplacer~@ weight_placement_by_alignment = True~%" (assoc-ref inputs "pplacer") (assoc-ref inputs "hmmer") (assoc-ref inputs "hmmer") (assoc-ref inputs "hmmer") out))) (install-file "tools/merge/seppJsonMerger.jar" home.path) (copy-file (string-append out "/home.path") (string-append (site-packages inputs outputs) "/home.path")) #t)))))) (inputs `(("hmmer" ,hmmer) ("java" ,icedtea-8) ("pplacer" ,pplacer) ("python-dendropy" ,python-dendropy))) (native-inputs `(("python-nose" ,python-nose))) (home-page "https://github.com/smirarab/sepp") (synopsis "SATe enabled phylogenetic placement") (description "SEPP operates by using a divide-and-conquer strategy adopted from SATe-II (@url{Liu et al. (Systematic Biology 2012), http://sysbio.oxfordjournals.org/content/61/1/90.full.pdf+html?sid=dd32838d-89dc-4bda-8008-6f948146341f} and @url{Liu et. al. (Science 2009), http://www.sciencemag.org/content/324/5934/1561.abstract}) to construct an Ensemble of @acronym{HMMs, Hidden Markov Models} to represent the input multiple sequence alignment `A`. It then computes the fit of each query sequence in `X` to each HMM in the ensemble, and uses the highest scoring HMM to add the sequence to the input tree `T`. This technique improves the accuracy of the placements of the query sequences compared to using a single HMM to represent the input alignment. SEPP uses tools in HMMER to construct HMMs, compute the fit of sequences to HMMs, and add sequences to the alignment `A`. SEPP uses @code{pplacer} to add query sequences to the input tree `T`, after they are added to the alignment `A`. SEPP is also used in other software, including @acronym{TIPP, taxonomic identical using phylogenetic placement} and @acronym{UPP, ultra-large alignments using phylogeny-aware profiles}.") (license license:gpl3+))) (define-public busco (package (name "busco") (version "5.4.1") (source (origin (method git-fetch) (uri (git-reference (url "https://gitlab.com/ezlab/busco") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 "0fnijr7q9jj8hq3q5v0jd73zcznqrg72idr3lmchx7x2c66mb9dz")))) (build-system python-build-system) (arguments '(#:phases (modify-phases %standard-phases (replace 'check (lambda* (#:key inputs outputs tests? #:allow-other-keys) (when tests? (begin (add-installed-pythonpath inputs outputs) (invoke "python" "setup.py" "check"))))) (add-after 'install 'wrap-binary (lambda* (#:key inputs outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (wrap-program (string-append out "/bin/busco") `("PATH" ":" prefix (,(string-append (assoc-ref inputs "augustus") "/bin") ,(string-append (assoc-ref inputs "blast") "/bin") ,(string-append (assoc-ref inputs "hmmer") "/bin") ,(string-append (assoc-ref inputs "metaeuk") "/bin") ,(string-append (assoc-ref inputs "prodigal") "/bin") ,(string-append (assoc-ref inputs "r") "/bin") ,(string-append (assoc-ref inputs "sepp") "/bin")))))))))) (inputs `(("augustus" ,augustus) ("blast" ,blast+) ("hmmer" ,hmmer) ("metaeuk" ,metaeuk) ("prodigal" ,prodigal) ("python-biopython" ,python-biopython) ("python-pandas" ,python-pandas) ("r" ,r) ("sepp" ,sepp))) (home-page "https://busco.ezlab.org/") (synopsis "Assess genome assembly and annotation completeness") (description "Assess genome assembly and annotation completeness with Benchmarking Universal Single-Copy Orthologs.") (license license:expat))) (define-public mutation-simulator (let ((commit "9cb6bd2acf8201151bc610be14963e65b41d8899") ; March 25, 2021 (revision "1")) (package (name "mutation-simulator") (version (git-version "2.0.3" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/mkpython3/mutation-simulator") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "1yxn5v5x804rm5ra1srmnph468yk7amsgfsj6h20rd6nmj2j0g9c")))) (build-system copy-build-system) (arguments `(#:install-plan '(("mutation-simulator.py" "bin/")) #:phases (modify-phases %standard-phases (add-after 'install 'wrap-script (lambda* (#:key outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (script (string-append out "/bin/mutation-simulator.py"))) ;; wrap-script doesn't accept arguments (wrap-program script `("PYTHONPATH" ":" prefix (,(getenv "PYTHONPATH")))) (chmod script #o555) ;; When using wrap-script. (when (file-exists? (string-append out "/bin/.mutation-simulator.py-real")) (chmod (string-append out "/bin/.mutation-simulator.py-real") #o555)) #t))) (add-after 'wrap-script 'check (lambda* (#:key tests? outputs #:allow-other-keys) (when tests? (invoke (string-append (assoc-ref outputs "out") "/bin/mutation-simulator.py") "Test/test.fa" "rmt" "Test/test.rmt"))))))) (inputs `(("bash" ,bash-minimal) ; for wrap-program ;("guile" ,(@ (gnu packages guile) guile-3.0)) ; for wrap-script ("python" ,python) ("python-blist" ,python-blist) ("python-pyfaidx" ,python-pyfaidx) ("python-numpy" ,python-numpy) ("python-tqdm" ,python-tqdm))) (home-page "https://github.com/mkpython3/mutation-simulator") (synopsis "Simulate mutations on given fasta files") (description "Mutation-Simulator is a Python tool for simulating SNPs and SVs in any reference genome with cohesive documentation about implemented mutations. With Mutation-Simulator, the new file format @acronym{RMT, Random Mutation Tables} is introduced, which gives more simulation power to the user by creating an interface for more natural simulations within specific genomes. Mutation-Simulator provides 3 different modes to simulate SNPs, insertions, deletions, tandem duplications, inversions, translocations and interchromosomal translocations from the commandline or with highly configureable RMT files.") (license license:gpl3+)))) (define-public python-blist (package (name "python-blist") (version "1.3.6") (source (origin (method url-fetch) (uri (pypi-uri "blist" version)) (sha256 (base32 "1hqz9pqbwx0czvq9bjdqjqh5bwfksva1is0anfazig81n18c84is")) (patches (search-patches "blist-stopiteration.patch")))) (build-system python-build-system) (home-page "http://stutzbachenterprises.com/blist/") (synopsis "List-like type for Python with better asymptotic performance") (description "This package provides a list-like type for Python with better asymptotic performance and similar performance on small lists.") (license license:bsd-3))) (define-public verkko (let ((commit "9323e71f46b0ea1725202ebe911142d0d1288c45") ; Jan 22, 2022 (revision "1")) (package (name "verkko") (version (git-version "1.0_beta" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/marbl/verkko") (commit commit) (recursive? #t))) ; Needs canu (file-name (git-file-name name version)) (sha256 (base32 "0pb66mlz8r9hrvlcfw9zwxqzzns7221pm2z9mrjisvniwq8ggqmh")))) (build-system gnu-build-system) (arguments (list #:make-flags #~(list (string-append "CC=" #$(cc-for-target)) (string-append "VERSION= verkko " #$version) "BUILDOPTIMIZED=1") #:phases #~(modify-phases %standard-phases (delete 'configure) ; No configure script. (add-after 'unpack 'chdir (lambda _ (chdir "src"))) (add-after 'chdir 'patch-source (lambda* (#:key inputs #:allow-other-keys) (substitute* "verkko.sh" (("\"#!/bin/sh\"") (string-append "\"#!" (which "sh") "\"")) ;; Hardcode the paths to some binaries (("\\$\\(which MBG\\)") (search-input-file inputs "/bin/MBG")) (("\\$\\(which GraphAligner\\)") (search-input-file inputs "/bin/GraphAligner")) (("snakemake --nocolor") (string-append (search-input-file inputs "/bin/snakemake") " --nocolor"))) (substitute* (find-files "Snakefiles") (("#!/bin/sh") (string-append "#!" (which "sh")))))) (replace 'check (lambda* (#:key tests? inputs #:allow-other-keys) (let ((hifi.fastq.gz (assoc-ref inputs "hifi.fastq.gz")) (ont.fastq.gz (assoc-ref inputs "ont.fastq.gz"))) (when tests? (invoke "../bin/verkko" "-d" "asm" "--hifi" hifi.fastq.gz "--nano" ont.fastq.gz))))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (with-directory-excursion "../" (copy-recursively "bin" (string-append out "/bin")) (copy-recursively "lib" (string-append out "/lib"))))))))) (inputs (list graphaligner mbg python-wrapper snakemake)) (native-inputs `(("perl" ,perl) ;; Provided by upstream to test the build: ("hifi.fastq.gz" ; 118 MiB ,(origin (method url-fetch) (uri "https://obj.umiacs.umd.edu/sergek/shared/ecoli_hifi_subset24x.fastq.gz") (sha256 (base32 "1nh5jzwnlf0r37rcgqwsjlszb8i0w5pfwp3rb5h869qp5qdlms8z")))) ("ont.fastq.gz" ; 244 MiB ,(origin (method url-fetch) (uri "https://obj.umiacs.umd.edu/sergek/shared/ecoli_ont_subset50x.fastq.gz") (sha256 (base32 "056pkf1dx76zs88vi4zgcbzrgvqqvlq9mpnyvmdszyhy0cj00smy")))))) (home-page "https://github.com/marbl/verkko") (synopsis "Hybrid genome assembly pipeline for telomere-to-telomere assembly of PacBio HiFi and Oxford Nanopore reads") (description "Verkko is a hybrid genome assembly pipeline developed for telomere-to-telomere assembly of PacBio HiFi and Oxford Nanopore reads. Verkko is Finnish for net, mesh and graph. Verkko uses Canu to correct remaining errors in the HiFi reads, builds a multiplex de Bruijn graph using MBG, aligns the Oxford Nanopore reads to the graph using GraphAligner, progressively resolves loops and tangles first with the HiFi reads then with the aligned Oxford Nanopore reads, and finally creates contig consensus sequences using Canu's consensus module.") (license license:public-domain)))) (define-public mbg (package (name "mbg") (version "1.0.8") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/maickrau/MBG") (commit (string-append "v" version)) (recursive? #t))) (file-name (git-file-name name version)) (sha256 (base32 "14p0vk6qfyf7ha8x30dk8hi16c5n8fpzi96k2vwmg17mlcf0hkgj")))) (build-system gnu-build-system) (arguments (list #:tests? #f ; No tests. #:make-flags #~(list (string-append "VERSION=" #$version)) #:phases #~(modify-phases %standard-phases (delete 'configure) ; No configure script. (add-after 'unpack 'use-packaged-inputs (lambda* (#:key inputs #:allow-other-keys) (let ((cxxopts (dirname (search-input-file inputs "/include/cxxopts.hpp"))) (concurrentqueue (search-input-directory inputs "/include/concurrentqueue"))) (delete-file-recursively "cxxopts") (delete-file-recursively "concurrentqueue") (substitute* "makefile" (("-Icxxopts/include") (string-append "-I" cxxopts)) (("-Iconcurrentqueue") (string-append "-I" concurrentqueue)) ;; No need to build statically. (("-Wl,-Bstatic") "") (("-static-libstdc\\+\\+") ""))))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (install-file "bin/MBG" (string-append out "/bin")))))))) (inputs (list concurrentqueue ;; parallel-hashmap ;; zstr zlib)) (native-inputs (list cxxopts)) (home-page "https://github.com/maickrau/MBG") (synopsis "Minimizer based sparse de Bruijn Graph constructor") (description "Minimizer based sparse de Bruijn Graph constructor. Homopolymer compress input sequences, pick syncmers from hpc-compressed sequences, connect syncmers with an edge if they are adjacent in a read, unitigify and homopolymer decompress. Suggested input is PacBio HiFi/CCS reads.") (license license:expat))) ;; TODO: Unbundle bloom, meryl. (define-public willowmap (package (name "willowmap") (version "2.03") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/marbl/Winnowmap") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "152650bljmdm9f1nmi4xbpxs9583faijba9i8gkp3qz76pzcvbfh")) (modules '((guix build utils))) (snippet '(begin (substitute* "ext/meryl/src/utility/src/utility/system.C" ;; This was removed in glibc-2.32. ((".*sys/sysctl\\.h.*") "")))))) (build-system gnu-build-system) (arguments (list #:tests? #f ; No tests. #:make-flags #~(list "OSVERSION=5.15") #:phases #~(modify-phases %standard-phases (delete 'configure) ; No configure script. (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((out (assoc-ref outputs "out"))) (for-each (lambda (file) (install-file file (string-append out "/bin"))) (find-files "bin")))))))) (inputs (list zlib)) (native-inputs (list perl which)) (home-page "https://github.com/marbl/Winnowmap") (synopsis "Long read / genome alignment software") (description "Winnowmap is a long-read mapping algorithm optimized for mapping ONT and PacBio reads to repetitive reference sequences. Winnowmap implements a novel weighted minimizer sampling algorithm (>=v1.0). This optimization was motivated by the need to avoid masking of frequently occurring k-mers during the seeding stage in an efficient manner, and achieve better mapping accuracy in complex repeats (e.g., long tandem repeats) of the human genome. Using weighted minimizers, Winnowmap down-weights frequently occurring k-mers, thus reducing their chance of getting selected as minimizers.") (supported-systems '("x86_64-linux")) ;; minimap2 based code is expat, as is bloom. ;; Meryl is mix bsd-3, expat and public-domain. ;; Rest of the code is public domain. (license license:expat))) (define-public quast (package (name "quast") (version "5.2.0") (source (origin (method url-fetch) (uri (list (pypi-uri "quast" version) (string-append "https://github.com/ablab/quast" "/releases/download/quast_" version "/quast-" version ".tar.gz"))) (sha256 (base32 "1nz0lz7zgrhcirmm3xcn756f91a6bpww9npap3a4l9gsgh413nfc")) (patches (search-patches "quast.patch")) (snippet #~(begin (use-modules (guix build utils)) (with-directory-excursion "quast_libs" (substitute* "run_busco.py" (("from quast_libs\\.busco import busco") "import busco")) (delete-file-recursively "site_packages/joblib2") (delete-file-recursively "site_packages/joblib3") (delete-file-recursively "site_packages/simplejson") (delete-file-recursively "minimap2") ; Accepts minimap2 >= 2.19 ;; These packages are needed at runtime (delete-file-recursively "bedtools") (delete-file-recursively "bwa") ;; These files are from python itself (delete-file "site_packages/bz2.py") (delete-file "site_packages/_bz2.py") (delete-file "site_packages/_compression.py") ;; Delete some pre-compiled binaries (delete-file-recursively "barrnap/binaries/darwin") (delete-file "barrnap/binaries/linux/nhmmer") (delete-file "busco/hmmsearch") (delete-file "sambamba/sambamba_linux") (delete-file "sambamba/sambamba_osx") ;; TODO: ;(delete-file "barrnap/bin/barrnap") ;; Genemark is a non-free, but available to academic ;; institutions. Remove some of the bundled binaries. (delete-file-recursively "genemark/linux_32") (delete-file-recursively "genemark/macosx") (delete-file-recursively "genemark-es/linux_32") (delete-file-recursively "genemark-es/macosx")))))) (build-system python-build-system) (arguments (list #:phases #~(modify-phases %standard-phases (add-after 'unpack 'patchelf-genemark (lambda* (#:key inputs #:allow-other-keys) (let ((patchelf (search-input-file inputs "/bin/patchelf")) (ld-so (search-input-file inputs #$(glibc-dynamic-linker))) (rpath (dirname (search-input-file inputs "/lib/libstdc++.so.6")))) (for-each (lambda (binary) (invoke patchelf "--set-interpreter" ld-so binary) (invoke patchelf "--set-rpath" rpath binary)) (list "quast_libs/genemark/linux_64/gmhmmp" "quast_libs/genemark/linux_64/probuild" "quast_libs/genemark-es/linux_64/gmhmme3" "quast_libs/genemark-es/linux_64/probuild"))))) (add-before 'build 'replace-bundled-binaries (lambda* (#:key inputs #:allow-other-keys) (substitute* "quast_libs/ca_utils/misc.py" (("join\\(qconfig.LIBS_LOCATION, 'minimap2'\\)") (string-append "'" (search-input-file inputs "/bin/minimap2") "'"))) (substitute* "./quast_libs/ra_utils/misc.py" (("join\\(sambamba_dirpath, fname \\+ platform_suffix\\)") (string-append "'" (search-input-file inputs "/bin/sambamba") "'")) (("join\\(qconfig.LIBS_LOCATION, 'bedtools', 'bin'\\)") (string-append "'" (dirname (search-input-file inputs "/bin/bedtools")) "'"))))) (add-after 'wrap 'wrap-more (lambda* (#:key inputs outputs #:allow-other-keys) (for-each (lambda (file) (wrap-program file `("PATH" ":" prefix ,(map (lambda (file-name) (string-append (assoc-ref inputs file-name) "/bin")) (list "bedtools" "blast+" "busco" "bwa" "hmmer" "minimap2" "sambamba"))))) (find-files (string-append #$output "/bin") "\\.py$")))) (replace 'check (lambda* (#:key tests? inputs outputs #:allow-other-keys) (when tests? (add-installed-pythonpath inputs outputs) (invoke "python" "setup.py" "test")))) (delete 'strip)))) ; Can't strip genemark binaries. (native-inputs (list (list (canonical-package gcc) "lib") patchelf)) (inputs (list python-joblib python-matplotlib python-simplejson ;; And the non-python packages: ;augustus bash-minimal bedtools blast+ busco bwa hmmer minimap2 perl sambamba)) (home-page "http://quast.sourceforge.net/") (synopsis "Genome assembly evaluation tool") (description "QUAST stands for QUality ASsessment Tool. It evaluates genome/metagenome assemblies by computing various metrics. The current QUAST toolkit includes the general QUAST tool for genome assemblies, MetaQUAST, the extension for metagenomic datasets, QUAST-LG, the extension for large genomes (e.g., mammalians), and Icarus, the interactive visualizer for these tools.") (supported-systems '("x86_64-linux")) ; Due to bundled genemark (license (list license:gpl2 ; Main program ;; Genemark (bundled) is free for non-commercial use by academic, ;; government, and non-profit/not-for-profit institutions. (license:non-copyleft "http://topaz.gatech.edu/GeneMark/license_download.cgi"))))) (define-public blobtools (let ((commit "1bed7870198831539b370f9254d5d30b94199a18") (revision "3")) ;; 2022-09-21 (package (name "blobtools") (version (git-version "1.1.1" revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/DRL/blobtools") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "0nki2m1sxkx75rkwzw6dqvxzyswrv47q0rz4bx9w0bz4fawnx86z")))) (build-system python-build-system) (arguments (list #:tests? #f)) ; No tests in repo. (inputs (list python-docopt python-matplotlib python-tqdm python-pysam python-pyyaml)) (home-page "https://blobtools.readme.io/") (synopsis "Modular command-line solution for partitioning of genome datasets") (description "Blobtools is a modular command-line solution for visualisation, quality control and taxonomic partitioning of genome datasets. BlobTools takes the information from HITS files and sums up bitscores by taxonomic group at each taxonomic rank.") (license license:gpl3)))) ;; TODO: Regenerate or remove docs folder. (define-public python-pixy (package (name "python-pixy") (version "1.2.6.beta1") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/ksamuk/pixy") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 "16hl6hcf38fya18b1x75250z1shsysvpmc75vsp6wjqggajcfqc7")))) (build-system python-build-system) (arguments (list #:phases #~(modify-phases %standard-phases (replace 'check (lambda* (#:key tests? #:allow-other-keys) (when tests? ;; "Test" based on test command in conda recipe. (invoke "pixy" "--version"))))))) (propagated-inputs (list python-multiprocess python-numcodecs python-numpy python-pandas python-scikit-allel python-scipy)) (home-page "https://pixy.readthedocs.io/") (synopsis "Unbiased estimation of nucleotide diversity within and between populations") (description "@command{pixy} is a command-line tool for painlessly estimating average nucleotide diversity within (π) and between (dxy) populations from a VCF. In particular, pixy facilitates the use of VCFs containing invariant (monomorphic) sites, which are essential for the correct computation of π and dxy in the face of missing data (i.e. always).") (license license:expat))) (define-public wfa2-lib (let ((commit "af6be887614e8bb4e2b6e8c4e500705a978bd513") ; 14 April 2022 (revision "1")) (package (name "wfa2-lib") (version (git-version "2.1" revision commit)) ; As seen in ./VERSION (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/smarco/WFA2-lib") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "09gsmks4dzmfscklb60m6gcsvsd9r6jywf10633dpcsfsdcvmzaw")) (snippet #~(begin (use-modules ((guix build utils))) (substitute* "Makefile" (("^CC=") "CC:=") (("^CPP=") "CPP:=") (("-march=native") "")))))) (build-system gnu-build-system) (arguments (list #:tests? #f ; No tests. #:parallel-build? #f ; Race condition in Makefile. #:modules '((guix build gnu-build-system) (guix build utils) (srfi srfi-26)) #:make-flags #~(list (string-append "CC=" #$(cc-for-target)) (string-append "CPP=" #$(cxx-for-target))) #:phases #~(modify-phases %standard-phases (delete 'configure) ; No configure script. ;; -flto breaks align_benchmark. (replace 'build (lambda* (#:key (make-flags '()) #:allow-other-keys) (apply invoke "make" "all" make-flags))) (replace 'install (lambda _ (for-each (cut install-file <> (string-append #$output "/bin")) (find-files "bin")) (for-each (cut install-file <> (string-append #$output "/lib")) (find-files "lib")) (for-each (lambda (file) (mkdir-p (string-append #$output "/include/wfa2-lib/" (dirname file))) (copy-file file (string-append #$output "/include/wfa2-lib/" file))) (find-files "." "\\.(h|hpp)$"))))))) (home-page "https://github.com/smarco/WFA2-lib") (synopsis "Wavefront alignment algorithm library") (description "The @acronym{wavefront alignment, WFA} algorithm is an exact gap-affine algorithm that takes advantage of homologous regions between the sequences to accelerate the alignment process. Unlike to traditional dynamic programming algorithms that run in quadratic time, the WFA runs in time @code{O(ns+s^2)}, proportional to the sequence length @code{n} and the alignment score @code{s}, using @code{O(s^2)} memory. Moreover, the WFA algorithm exhibits simple computational patterns that the modern compilers can automatically vectorize for different architectures without adapting the code.") (properties '((tunable? . #t))) (license license:expat)))) (define-public wfa2-lib-static (package (inherit (static-package wfa2-lib)) (name "wfa2-lib-static") (arguments (substitute-keyword-arguments (package-arguments wfa2-lib) ((#:make-flags flags ''()) #~(cons "CC_FLAGS+=-static" #$flags)))))) (define-public r-rrbgen (package (name "r-stitch") (version "0.0.6") (source (origin (method url-fetch) (uri (string-append "https://github.com/rwdavies/rrbgen/releases/download/" version "/rrbgen_" version ".tar.gz")) (sha256 (base32 "1vhqy8licl2pkzar4aag0q5fhnb3fdch8acyjh9445ia42z01z9c")))) (build-system r-build-system) (propagated-inputs (list r-rcpp r-rcpparmadillo)) (home-page "https://github.com/rwdavies/rrbgen") (synopsis "Lightweight limited functionality R bgen read/write library") (description "@code{r-rrbgen} supports v1.3 of the bgen format. It supports reading and writing using 8, 16, 24 or 32 bits per probability, using Layout = 2 and CompressedSNPBlocks = 1, for bi-allelic SNPs with samples of ploidy 2. Any other format specification may crash unexpectedly without a properly defined error.") (license license:gpl3))) (define-public seqlib (package (name "seqlib") (version "0.1.4") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/Zilong-Li/SeqLib") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 (base32 "1hczg1swghnxm6af74l09crdgf7l282jabmyck9mi5bk6vg9s1pn")))) (build-system gnu-build-system) (arguments (list #:phases #~(modify-phases %standard-phases ;; Patch build scripts to unbundle htslib and build a ;; seqlib shared library using libtool. (add-after 'unpack 'patch-build-scripts (lambda _ ;; Initialize libtool. (substitute* "configure.ac" (("AM_INIT_AUTOMAKE\\(foreign\\)\n" all) (string-append all "LT_INIT\n"))) (substitute* "Makefile.am" ;; Install headers (("^SUBDIRS" all) (string-append "nobase_include_HEADERS = " (string-join (find-files "SeqLib")) "\n" all)) ;; Do not recurse into htslib submodule. (("htslib") "") ;; Remove install target override. (("^install:") "") (("^\tmkdir -p lib && cp src/libseqlib.a /libhts.a lib") "")) (substitute* "src/Makefile.am" ;; Build libtool library. (("noinst_LIBRARIES = libseqlib\\.a") "lib_LTLIBRARIES = libseqlib.la\nlibseqlib_la_LIBADD = -ljsoncpp") (("libseqlib\\.a") "libseqlib.la") (("libseqlib_a") "libseqlib_la")) (substitute* (list "SeqLib/BamHeader.h" "SeqLib/BamRecord.h" "SeqLib/RefGenome.h" "src/ReadFilter.cpp") ;; Patch path to htslib headers. (("\"htslib/htslib/([^\"]*)\"" all header) (string-append "")))))))) (inputs (list zlib)) (native-inputs (list autoconf automake libtool)) ;; seqlib headers include headers from htslib and jsoncpp. So, ;; they are propagated inputs. (propagated-inputs (list htslib jsoncpp)) (home-page "https://github.com/Zilong-Li/SeqLib") (synopsis "C++ htslib interface for manipulating sequence data and VCF") (description "@code{seqlib} is a C++ htslib interface for manipulating sequence data and VCF files.") (license (list license:expat ; SeqLib/IntervalTree.h, SeqLib/aho_corasick.hpp, ; json/json-forwards.h, json/json.h, src/jsoncpp.cpp, src/ssw.c, license:asl2.0)))) ; main license (define-public vcfpp (package (name "vcfpp") (version "0.3.3") (source (origin (method url-fetch) (uri (string-append "https://github.com/Zilong-Li/vcfpp/releases/download/v" version "/vcfpp.h")) (sha256 (base32 "1wq76wz81y09ic37z30vljqnczhwx2qijav0nfvg6xi8wd2c75n3")))) (build-system copy-build-system) (arguments (list #:install-plan #~'(("vcfpp.h" "include/vcfpp/vcfpp.h")))) (home-page "https://github.com/Zilong-Li/vcfpp") (synopsis "C++ API of htslib") (description "@code{vcfpp} is a single C++ file as interface to the basic htslib. It can be easily included in a C++ program for scripting high-performance genomic analyses.") (license license:asl2.0))) (define-public r-stitch (package (name "r-stitch") (version "1.6.10") (source ;; The release tarball bundles dependencies. So, use git-fetch. (origin (method git-fetch) (uri (git-reference (url "https://github.com/rwdavies/STITCH") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 "0iy5fq2l5a35xdxqaf9ypj56da57qmwppwqmh9nflbvmbc7kgbkf")))) (build-system r-build-system) (arguments (list #:phases #~(modify-phases %standard-phases (add-after 'unpack 'chdir (lambda _ (chdir "STITCH"))) (add-after 'chdir 'patch-build-system (lambda _ (substitute* "src/Makevars" (("\\$\\(SEQLIB_ROOT\\)/src/libseqlib.a") "-lseqlib") (("\\$\\(SEQLIB_ROOT\\)/htslib/libhts.a") "-lhts") ((": SeqLib") ":"))))))) (inputs (list curl htslib seqlib zlib)) (native-inputs (list autoconf automake vcfpp)) (propagated-inputs (list r-data-table r-rrbgen ;; FIXME: These should be inputs that are substituted into ;; the source. But, for some reason, the reference scanner ;; does not pick them up that way. coreutils findutils htslib rsync)) (home-page "https://github.com/rwdavies/STITCH") (synopsis "Sequencing to imputation through constructing haplotypes") (description "@code{r-stitch} is an R program for reference panel free, read aware, low coverage sequencing genotype imputation. STITCH runs on a set of samples with sequencing reads in BAM format, as well as a list of positions to genotype, and outputs imputed genotypes in VCF format.") (license license:gpl3))) (define-public hifiasm (package (name "hifiasm") (version "0.19.8") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/chhylp123/hifiasm") (commit version))) (file-name (git-file-name name version)) (sha256 (base32 "1g6m2qdc0224vjaic87669g7y9ky1yps07qbjkmbh1vakz4zmgvr")))) (build-system gnu-build-system) (arguments (list #:tests? #f #:phases #~(modify-phases %standard-phases (delete 'configure) (replace 'install (lambda _ (install-file "hifiasm" (string-append #$output "/bin")) (install-file "hifiasm.1" (string-append #$output "/share/man/man1"))))))) (inputs (list zlib)) (home-page "https://github.com/chhylp123/hifiasm") (synopsis "haplotype-resolved assembler for accurate Hifi reads") (description "Hifiasm is a fast haplotype-resolved de-novo assembler originally designed for PacBio HiFi reads. Its latest release supports the telomere-to-telomere assembly by utilizing ultralong Oxford Nanopore reads. Hifiasm produces arguably the best single-sample telomere-to-telomere assemblies combing HiFi, ultralong and Hi-C reads, and it is one of the best haplotype-resolved assemblers for the trio-binning assembly given parental short reads. For a human genome, hifiasm can produce the telomere-to-telomere assembly in one day.") (license license:expat)))