diff options
Diffstat (limited to 'gn/packages/bioinformatics.scm')
-rw-r--r-- | gn/packages/bioinformatics.scm | 973 |
1 files changed, 266 insertions, 707 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index bdd0322..ad4db7b 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -7,60 +7,20 @@ #:use-module (guix download) #:use-module (guix git-download) #:use-module (guix build-system ant) - #:use-module (guix build-system gnu) #:use-module (guix build-system cmake) - #:use-module (guix build-system perl) + #:use-module (guix build-system gnu) #:use-module (guix build-system python) - ;; #:use-module (guix build-system ruby) - #:use-module (guix build-system r) - #:use-module (guix build-system trivial) - #:use-module (gn packages statistics) - #:use-module (gnu packages autotools) - #:use-module (gnu packages algebra) - #:use-module (gnu packages base) + #:use-module (gnu packages) #:use-module (gnu packages bioinformatics) #:use-module (gnu packages boost) #:use-module (gnu packages compression) - #:use-module (gnu packages databases) #:use-module (gnu packages check) - #:use-module (gnu packages cmake) - #:use-module (gnu packages compression) - #:use-module (gnu packages cpio) - #:use-module (gnu packages curl) - #:use-module (gnu packages documentation) - #:use-module (gnu packages datastructures) - #:use-module (gnu packages file) - #:use-module (gnu packages gawk) #:use-module (gnu packages gcc) - #:use-module (gnu packages graphviz) - #:use-module (gnu packages java) - #:use-module (gnu packages linux) - #:use-module (gnu packages machine-learning) #:use-module (gnu packages maths) - #:use-module (gnu packages mpi) - #:use-module (gnu packages ncurses) - #:use-module (gnu packages node) - #:use-module (gnu packages parallel) - #:use-module (gnu packages pcre) #:use-module (gnu packages perl) - #:use-module (gnu packages pkg-config) - #:use-module (gnu packages popt) - #:use-module (gnu packages protobuf) #:use-module (gnu packages python) - #:use-module (gnu packages ruby) - #:use-module (gnu packages statistics) - #:use-module (gnu packages tbb) - #:use-module (gnu packages textutils) - #:use-module (gnu packages time) - #:use-module (gnu packages tls) - #:use-module (gnu packages vim) - #:use-module (gnu packages web) - #:use-module (gnu packages xml) - #:use-module (gnu packages bootstrap) - #:use-module (gnu packages dlang) -; #:use-module (gn packages ldc) - #:use-module (gn packages shell) - #:use-module (srfi srfi-1)) + #:use-module (gnu packages python-xyz) + #:use-module (gnu packages statistics)) (define-public contra (package @@ -69,10 +29,15 @@ (source (origin (method url-fetch) (uri (string-append - "mirror://sourceforge/contra-cnv/CONTRA.v" version ".tar.gz")) + "mirror://sourceforge/contra-cnv/CONTRA.V" + (version-major+minor version) "/CONTRA.v" version ".tar.gz")) (sha256 (base32 - "0agpcm2xh5f0i9n9sx1kvln6mzdksddmh11bvzj6bh76yw5pnw91")))) + "0agpcm2xh5f0i9n9sx1kvln6mzdksddmh11bvzj6bh76yw5pnw91")) + (modules '((guix build utils))) + (snippet + '(begin + (delete-file "BEDTools.v2.11.2.tar.gz") #t)))) (build-system gnu-build-system) (propagated-inputs `(("python" ,python-2) @@ -87,18 +52,16 @@ (delete 'configure) (delete 'build) ; We can use Guix's BEDtools instead. (replace 'install - (lambda _ - (let* ((out (assoc-ref %outputs "out")) + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) (bin (string-append out "/bin")) (doc (string-append out "/share/doc/contra"))) - (mkdir-p bin) - (mkdir-p doc) - (and - (zero? (system* "cp" "--recursive" "scripts" bin)) - (zero? (system* "cp" "contra.py" bin)) - (zero? (system* "cp" "baseline.py" bin)) - ;; There's only a pre-built PDF available. - (zero? (system* "cp" "CONTRA_User_Guide.2.0.pdf" doc))))))))) + (copy-recursively "scripts" (string-append bin "/scripts")) + (install-file "contra.py" bin) + (install-file "baseline.py" bin) + ;; There's only a pre-built PDF available. + (install-file "CONTRA_User_Guide.2.0.pdf" doc)) + #t))))) (home-page "http://contra-cnv.sourceforge.net/") (synopsis "Tool for copy number variation (CNV) detection for targeted resequencing data") @@ -219,486 +182,6 @@ data. For whole genome sequencing data analysis, the program can also use mappability data (files created by GEM). ") (license license:gpl2+))) -(define-public tabixpp - (package - (name "tabixpp") - (version "1.0.0") - (source (origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/tabixpp/archive/v" - version ".tar.gz")) - (file-name (string-append name "-" version ".tar.gz")) - (sha256 - (base32 "1s0lgks7qlvlhvcjhi2wm18nnza1bwcnic44ij7z8wfg88h4ivwn")))) - (build-system gnu-build-system) - (inputs - `(("htslib" ,htslib) - ("zlib" ,zlib))) - (arguments - `(#:tests? #f ; There are no tests to run. - #:phases - (modify-phases %standard-phases - (delete 'configure) ; There is no configure phase. - ;; The build phase needs overriding the location of htslib. - (replace 'build - (lambda* (#:key inputs #:allow-other-keys) - (let ((htslib-ref (assoc-ref inputs "htslib"))) - (zero? - (system* "make" - (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a") - "HTS_HEADERS=" ; No need to check for headers here. - (string-append "LIBPATH=-L. -L" htslib-ref "/include")))))) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) - (install-file "tabix++" bin))))))) - (home-page "https://github.com/ekg/tabixpp") - (synopsis "C++ wrapper around tabix project") - (description "This is a C++ wrapper around the Tabix project which abstracts -some of the details of opening and jumping in tabix-indexed files.") - (license license:expat))) - -;; This version works with FreeBayes while the released version doesn't. The -;; released creates a variable with the name "vcf" somewhere, which is also the -;; name of a namespace in vcflib. -(define-public tabixpp-freebayes - (let ((commit "bbc63a49acc52212199f92e9e3b8fba0a593e3f7")) - (package (inherit tabixpp) - (name "tabixpp-freebayes") - (version (string-append "0-1." (string-take commit 7))) - (source (origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/tabixpp/archive/" - commit ".tar.gz")) - (file-name (string-append name "-" version "-checkout.tar.gz")) - (sha256 - (base32 "1s06wmpgj4my4pik5kp2lc42hzzazbp5ism2y4i2ajp2y1c68g77"))))))) - -(define-public smithwaterman - ;; TODO: Upgrading smithwaterman breaks FreeBayes. - (let ((commit "203218b47d45ac56ef234716f1bd4c741b289be1")) - (package - (name "smithwaterman") - (version (string-append "0-1." (string-take commit 7))) - (source (origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/smithwaterman/archive/" - commit ".tar.gz")) - (file-name (string-append name "-" version "-checkout.tar.gz")) - (sha256 - (base32 "1lkxy4xkjn96l70jdbsrlm687jhisgw4il0xr2dm33qwcclzzm3b")))) - (build-system gnu-build-system) - (arguments - `(#:tests? #f ; There are no tests to run. - #:phases - (modify-phases %standard-phases - (delete 'configure) ; There is no configure phase. - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) - (install-file "smithwaterman" bin))))))) - (home-page "https://github.com/ekg/smithwaterman") - (synopsis "Implementation of the Smith-Waterman algorithm") - (description "Implementation of the Smith-Waterman algorithm.") - ;; The project contains a license file for the GPLv2. The source files - ;; do not contain a license notice, so GPLv2-only is assumed here. - (license license:gpl2)))) - -(define-public multichoose - (package - (name "multichoose") - (version "1.0.3") - (source (origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/multichoose/archive/v" - version ".tar.gz")) - (file-name (string-append name "-" version ".tar.gz")) - (sha256 - (base32 "0xy86vvr3qrs4l81qis7ia1q2hnqv0xcb4a1n60smxbhqqis5w3l")))) - (build-system gnu-build-system) - (native-inputs - `(("python" ,python-2) - ("node" ,node))) - (arguments - `(#:tests? #f ; There are no tests to run. - #:phases - (modify-phases %standard-phases - (delete 'configure) ; There is no configure phase. - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) - ;; TODO: There are Python modules for these programs too. - (install-file "multichoose" bin) - (install-file "multipermute" bin))))))) - (home-page "https://github.com/ekg/multichoose") - (synopsis "Library for efficient loopless multiset combination generation -algorithm") - (description "A library implements an efficient loopless multiset -combination generation algorithm which is (approximately) described in -\"Loopless algorithms for generating permutations, combinations, and other -combinatorial configurations.\" G Ehrlich - Journal of the ACM (JACM), -1973. (Algorithm 7.)") - (license license:expat))) - -(define-public fsom - (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3")) - (package - (name "fsom") - (version (string-append "0-1." (string-take commit 7))) - (source (origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/fsom/archive/" - "a6ef318fbd347c53189384aef7f670c0e6ce89a3" ".tar.gz")) - (file-name (string-append name "-" version "-checkout.tar.gz")) - (sha256 - (base32 "0q6b57ppxfvsm5cqmmbfmjpn5qvx2zi5pamvp3yh8gpmmz8cfbl3")))) - (build-system gnu-build-system) - (arguments - `(#:tests? #f ; There are no tests to run. - #:phases - (modify-phases %standard-phases - (delete 'configure) ; There is no configure phase. - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) - (install-file "fsom" bin))))))) - (home-page "https://github.com/ekg/fsom") - (synopsis "Program for managing SOM (Self-Organizing Maps) neural networks") - (description "Program for managing SOM (Self-Organizing Maps) neural networks.") - (license license:gpl3)))) - -(define-public filevercmp - (let ((commit "1a9b779b93d0b244040274794d402106907b71b7")) - (package - (name "filevercmp") - (version (string-append "0-1." (string-take commit 7))) - (source (origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/filevercmp/archive/" - commit ".tar.gz")) - (file-name "filevercmp-src.tar.gz") - (sha256 - (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450")))) - (build-system gnu-build-system) - (arguments - `(#:tests? #f ; There are no tests to run. - #:phases - (modify-phases %standard-phases - (delete 'configure) ; There is no configure phase. - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) - (install-file "filevercmp" bin))))))) - (home-page "https://github.com/ekg/filevercmp") - (synopsis "Program to compare version strings") - (description "A program to compare version strings. It intends to be a -replacement for strverscmp.") - (license license:gpl3+)))) - -(define-public fastahack ; guix ready - (let ((commit "c68cebb4f2e5d5d2b70cf08fbdf1944e9ab2c2dd")) - (package - (name "fastahack") - (version (string-append "0-1." (string-take commit 7))) - (source (origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/fastahack/archive/" - commit ".tar.gz")) - (file-name (string-append name "-" version "-checkout.tar.gz")) - (sha256 - (base32 "0j25lcl3jk1kls66zzxjfyq5ir6sfcvqrdwfcva61y3ajc9ssay2")))) - (build-system gnu-build-system) - (arguments - `(#:tests? #f ; There are no tests to run. - #:phases - (modify-phases %standard-phases - (delete 'configure) ; There is no configure phase. - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) - (install-file "fastahack" bin))))))) - (home-page "https://github.com/ekg/fastahack") - (synopsis "Program for indexing and sequence extraction from FASTA files") - (description "Fastahack is a small application for indexing and extracting -sequences and subsequences from FASTA files. The included Fasta.cpp library -provides a FASTA reader and indexer that can be embeddedinto applications which -would benefit from directly reading subsequences from FASTA files. The library -automatically handles index file generation and use.") - ;; There is no specific license for fastahack. - ;; A part of the program is licensed GPLv2. - (license (list license:non-copyleft license:gpl2))))) - -(define-public vcflib ; guix duplicat, see below? - (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")) - (package - (name "vcflib") - (version (string-append "1.0.2-1." (string-take commit 7))) - (source - (origin - (method url-fetch) - (uri (string-append "https://github.com/vcflib/vcflib/archive/" - "5ac091365fdc716cc47cc5410bb97ee5dc2a2c92" ".tar.gz")) - (file-name "vcflib-5ac0913.tar.gz") - (sha256 - (base32 "0ywshwpif059z5h0g7zzrdfzzdj2gr8xvwlwcsdxrms3p9iy35h8")))) - (build-system gnu-build-system) - (native-inputs - `(("htslib" ,htslib) - ("zlib" ,zlib) - ("python" ,python-2) - ("perl" ,perl) - ("r" ,r) - ("node" ,node) - ("tabixpp-src" ,(package-source tabixpp-freebayes)) - ("smithwaterman-src" ,(package-source smithwaterman)) - ("multichoose-src" ,(package-source multichoose)) - ("fsom-src" ,(package-source fsom)) - ("filevercmp-src" ,(package-source filevercmp)) - ("fastahack-src" ,(package-source fastahack)) - ("intervaltree-src" - ,(origin - (method url-fetch) - (uri (string-append - "https://github.com/ekg/intervaltree/archive/" - "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz")) - (file-name "intervaltree-src.tar.gz") - (sha256 - (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks")))))) - (arguments - `(#:tests? #f - #:phases - (modify-phases %standard-phases - (delete 'configure) - (delete 'check) - (add-after 'unpack 'unpack-submodule-sources - (lambda* (#:key inputs #:allow-other-keys) - (let ((unpack (lambda (source target) - (with-directory-excursion target - (zero? (system* "tar" "xvf" - (assoc-ref inputs source) - "--strip-components=1")))))) - (and - (unpack "intervaltree-src" "intervaltree") - (unpack "fastahack-src" "fastahack") - (unpack "filevercmp-src" "filevercmp") - (unpack "fsom-src" "fsom") - (unpack "multichoose-src" "multichoose") - (unpack "smithwaterman-src" "smithwaterman") - (unpack "tabixpp-src" "tabixpp"))))) - (add-after 'unpack-submodule-sources 'fix-makefile - (lambda* (#:key inputs #:allow-other-keys) - (substitute* '("Makefile") - (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0")))) - (replace - 'build - (lambda* (#:key inputs make-flags #:allow-other-keys) - (with-directory-excursion "tabixpp" - (zero? (system* "make"))) - (zero? (system* "make" "CC=gcc" - (string-append "CFLAGS=\"" "-Itabixpp " - "-I" (assoc-ref inputs "htslib") "/include " "\"") "all")))) - (replace - 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin")) - ;;(include (string-append (assoc-ref outputs "out") "/include")) - (lib (string-append (assoc-ref outputs "out") "/lib"))) - (for-each (lambda (file) - (install-file file bin)) - (find-files "bin" ".*")) - ;; The header files do not correspond to libvcflib.a, therefore - ;; I left them out. - ;;(for-each (lambda (file) - ;; (install-file file include)) - ;; (find-files "src" "\\.h$")) - (install-file "libvcflib.a" lib))))))) - (home-page "https://github.com/vcflib/vcflib/") - (synopsis "Library for parsing and manipulating VCF files") - (description "Vcflib provides methods to manipulate and interpret -sequence variation as it can be described by VCF. It is both an API for parsing -and operating on records of genomic variation as it can be described by the VCF -format, and a collection of command-line utilities for executing complex -manipulations on VCF files.") - (license license:expat)))) - -(define-public bash-tap ; guix license issue - (package - (name "bash-tap") - (version "1.0.2") - (source (origin - (method url-fetch) - (uri (string-append "https://github.com/illusori/bash-tap/archive/" - version ".tar.gz")) - (file-name (string-append name "-" version ".tar.gz")) - (sha256 - (base32 "0qs1qi38bl3ns4mpagcawv618dsk2q1lgrbddgvs0wl3ia12cyz5")))) - (build-system trivial-build-system) - (native-inputs `(("source" ,source) - ("tar" ,tar) - ("gzip" ,gzip))) - (arguments - `(#:modules ((guix build utils)) - #:builder (begin - (use-modules (guix build utils)) - (let ((tar (string-append (assoc-ref %build-inputs "tar") "/bin/tar")) - (path (string-append (assoc-ref %build-inputs "gzip") "/bin")) - (bin (string-append %output "/bin")) - (source (string-append (assoc-ref %build-inputs "source")))) - (setenv "PATH" path) - (mkdir-p bin) - (with-directory-excursion bin - (zero? (system* tar "xvf" source - "--strip-components=1" - "--no-anchored" - "bash-tap" - "bash-tap-bootstrap" - "bash-tap-mock"))))))) - (home-page "http://www.illusori.co.uk/projects/bash-tap/") - (synopsis "Bash port of a Test::More/Test::Builder-style TAP-compliant -test library") - (description "Bash TAP is a TAP-compliant Test::More-style testing library -for Bash shell scripts and functions. Along with the Test::More-style testing -helpers it provides helper functions for mocking commands and functions and -in-process output capturing.") - ;; The author didn't specify a license. - (license license:public-domain))) - -(define-public freebayes ; guix dependent package issues - (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb") - (revision "1")) - (package - (name "freebayes") - (version (string-append "1.0.2-" revision "." (string-take commit 7))) - (source (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/ekg/freebayes.git") - (commit commit))) - (file-name (string-append name "-" version "-checkout")) - (sha256 - (base32 "1sbzwmcbn78ybymjnhwk7qc5r912azy5vqz2y7y81616yc3ba2a2")))) - (build-system gnu-build-system) - (inputs - `(("zlib" ,zlib) - ("htslib" ,htslib))) - (native-inputs - `(("bc" ,bc) ; Needed for running tests. - ("samtools" ,samtools) ; Needed for running tests. - ("parallel" ,parallel) ; Needed for running tests. - ("procps" ,procps) ; Needed for running tests. - ("bamtools" ,bamtools) - ("cmake" ,cmake) - ("python" ,python-2) - ("node" ,node) - ("r" ,r) - ("perl" ,perl) - ("bamtools-src" ,(package-source bamtools)) - ("vcflib-src" ,(package-source vcflib)) - ;; These are submodules for the vcflib version used in freebayes - ("tabixpp-src" ,(package-source tabixpp-freebayes)) - ("smithwaterman-src" ,(package-source smithwaterman)) - ("multichoose-src" ,(package-source multichoose)) - ("fsom-src" ,(package-source fsom)) - ("filevercmp-src" ,(package-source filevercmp)) - ("fastahack-src" ,(package-source fastahack)) - ("intervaltree-src" - ,(origin - (method url-fetch) - (uri (string-append - "https://github.com/ekg/intervaltree/archive/" - "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz")) - (file-name "intervaltree-src.tar.gz") - (sha256 - (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks")))) - ;; These submodules are needed to run the tests. - ("bash-tap-src" ,(package-source bash-tap)) - ;; ,(origin - ;; (method url-fetch) - ;; (uri (string-append "https://github.com/illusori/bash-tap/archive/" - ;; "c38fbfa401600cc81ccda66bfc0da3ea56288d03" ".tar.gz")) - ;; (file-name "bash-tap-src.tar.gz") - ;; (sha256 - ;; (base32 "07ijb1p0aa65ajpg9nkghc183iha6lwiydkckay8pghapa01j6nz")))) - ("test-simple-bash-src" - ,(origin - (method url-fetch) - (uri (string-append "https://github.com/ingydotnet/test-simple-bash/archive/" - "124673ff204b01c8e96b7fc9f9b32ee35d898acc" ".tar.gz")) - (file-name "test-simple-bash-src.tar.gz") - (sha256 - (base32 "016xf3wbgqbav9dncvfdx5k0f10z5xwq8jdszajzmcvnhz5wis14")))))) - (arguments - `(#:phases - (modify-phases %standard-phases - (delete 'configure) - (add-after 'unpack 'unpack-submodule-sources - (lambda* (#:key inputs #:allow-other-keys) - (let ((unpack (lambda (source target) - (with-directory-excursion target - (zero? (system* "tar" "xvf" - (assoc-ref inputs source) - "--strip-components=1")))))) - (and - (unpack "bamtools-src" "bamtools") - (unpack "vcflib-src" "vcflib") - ;;(unpack "intervaltree-src" "intervaltree") - (unpack "fastahack-src" "vcflib/fastahack") - (unpack "filevercmp-src" "vcflib/filevercmp") - (unpack "fsom-src" "vcflib/fsom") - (unpack "intervaltree-src" "vcflib/intervaltree") - (unpack "multichoose-src" "vcflib/multichoose") - (unpack "smithwaterman-src" "vcflib/smithwaterman") - (unpack "tabixpp-src" "vcflib/tabixpp") - (unpack "test-simple-bash-src" "test/test-simple-bash") - (unpack "bash-tap-src" "test/bash-tap"))))) - (add-after 'unpack-submodule-sources 'fix-makefile - (lambda* (#:key inputs #:allow-other-keys) - ;; We don't have the .git folder to get the version tag from. - ;; For this checkout of the code, it's v1.0.0. - (substitute* '("vcflib/Makefile") - (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0")))) - (replace 'build - (lambda* (#:key inputs make-flags #:allow-other-keys) - (and - ;; Compile Bamtools before compiling the main project. - (with-directory-excursion "bamtools" - (system* "mkdir" "build") - (with-directory-excursion "build" - (and (zero? (system* "cmake" "../")) - (zero? (system* "make"))))) - ;; Compile vcflib before we compiling the main project. - (with-directory-excursion "vcflib" - (with-directory-excursion "tabixpp" - (let ((htslib-ref (assoc-ref inputs "htslib"))) - (zero? - (system* "make" "HTS_HEADERS=" - (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a") - (string-append "LIBPATH=-L. -L" htslib-ref "/include"))))) - (zero? (system* "make" "CC=gcc" - (string-append "CFLAGS=\"" "-Itabixpp " - "-I" (assoc-ref inputs "htslib") "/include " "\"") "all"))) - (with-directory-excursion "src" - (zero? (system* "make")))))) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) - (install-file "bin/freebayes" bin) - (install-file "bin/bamleftalign" bin)))) - ;; There are three tests that fail. All because of the -P - ;; (--perl-regexp) option in grep, which is not compiled into the - ;; version of grep in Guix. - (replace 'check - (lambda* (#:key inputs #:allow-other-keys) - (system* "make" "test")))))) - (home-page "https://github.com/ekg/freebayes") - (synopsis "Haplotype-based variant detector") - (description "FreeBayes is a Bayesian genetic variant detector designed to -find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms), -indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and -complex events (composite insertion and substitution events) smaller than the -length of a short-read sequencing alignment.") - (license license:expat)))) - (define-public plink2 (package (name "plink2") @@ -805,146 +288,18 @@ integration with gPLINK and Haploview, there is some support for the subsequent visualization, annotation and storage of results.") (license license:gpl3+)))) - -(define-public vcflib ; duplicate? See above - (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")) - (package - (name "vcflib") - (version (string-append "v1.0.2-" (string-take commit 7))) - (source - (origin - (method url-fetch) - (uri (string-append "https://github.com/vcflib/vcflib/archive/" - "5ac091365fdc716cc47cc5410bb97ee5dc2a2c92" ".tar.gz")) - (file-name "vcflib-5ac0913.tar.gz") - (sha256 - (base32 "0ywshwpif059z5h0g7zzrdfzzdj2gr8xvwlwcsdxrms3p9iy35h8")))) - (build-system gnu-build-system) - (native-inputs - `(("htslib" ,htslib) - ("zlib" ,zlib) - ("python" ,python-2) - ("perl" ,perl) - ("tabixpp-src" - ,(origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/tabixpp/archive/" - "bbc63a49acc52212199f92e9e3b8fba0a593e3f7" ".tar.gz")) - (file-name "tabixpp-src.tar.gz") - (sha256 - (base32 "1s06wmpgj4my4pik5kp2lc42hzzazbp5ism2y4i2ajp2y1c68g77")))) - ("intervaltree-src" - ,(origin - (method url-fetch) - (uri (string-append - "https://github.com/ekg/intervaltree/archive/" - "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz")) - (file-name "intervaltree-src.tar.gz") - (sha256 - (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks")))) - ("smithwaterman-src" - ,(origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/smithwaterman/archive/" - "203218b47d45ac56ef234716f1bd4c741b289be1" ".tar.gz")) - (file-name "smithwaterman-src.tar.gz") - (sha256 - (base32 "1lkxy4xkjn96l70jdbsrlm687jhisgw4il0xr2dm33qwcclzzm3b")))) - ("multichoose-src" - ,(origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/multichoose/archive/" - "73d35daa18bf35729b9ba758041a9247a72484a5" ".tar.gz")) - (file-name "multichoose-src.tar.gz") - (sha256 - (base32 "07aizwdabmlnjaq4p3v0vsasgz1xzxid8xcxcw3paq8kh9c1099i")))) - ("fsom-src" - ,(origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/fsom/archive/" - "a6ef318fbd347c53189384aef7f670c0e6ce89a3" ".tar.gz")) - (file-name "fsom-src.tar.gz") - (sha256 - (base32 "0q6b57ppxfvsm5cqmmbfmjpn5qvx2zi5pamvp3yh8gpmmz8cfbl3")))) - ("filevercmp-src" - ,(origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/filevercmp/archive/" - "1a9b779b93d0b244040274794d402106907b71b7" ".tar.gz")) - (file-name "filevercmp-src.tar.gz") - (sha256 - (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450")))) - ("fastahack-src" - ,(origin - (method url-fetch) - (uri (string-append "https://github.com/ekg/fastahack/archive/" - "c68cebb4f2e5d5d2b70cf08fbdf1944e9ab2c2dd" ".tar.gz")) - (file-name "fastahack-src.tar.gz") - (sha256 - (base32 "0j25lcl3jk1kls66zzxjfyq5ir6sfcvqrdwfcva61y3ajc9ssay2")))))) - (arguments - `(#:tests? #f - #:phases - (modify-phases %standard-phases - (delete 'configure) - (delete 'check) - (add-after 'unpack 'unpack-submodule-sources - (lambda* (#:key inputs #:allow-other-keys) - (let ((unpack (lambda (source target) - (with-directory-excursion target - (zero? (system* "tar" "xvf" - (assoc-ref inputs source) - "--strip-components=1")))))) - (and - (unpack "intervaltree-src" "intervaltree") - (unpack "fastahack-src" "fastahack") - (unpack "filevercmp-src" "filevercmp") - (unpack "fsom-src" "fsom") - (unpack "intervaltree-src" "intervaltree") - (unpack "multichoose-src" "multichoose") - (unpack "smithwaterman-src" "smithwaterman") - (unpack "tabixpp-src" "tabixpp"))))) - (add-after 'unpack-submodule-sources 'fix-makefile - (lambda* (#:key inputs #:allow-other-keys) - (substitute* '("Makefile") - (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0")))) - (replace - 'build - (lambda* (#:key inputs make-flags #:allow-other-keys) - (with-directory-excursion "tabixpp" - (zero? (system* "make"))) - (zero? (system* "make" "CC=gcc" - (string-append "CFLAGS=\"" "-Itabixpp " - "-I" (assoc-ref inputs "htslib") "/include " "\"") "all")))) - (replace - 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") "/bin")) - (lib (string-append (assoc-ref outputs "out") "/lib"))) - (for-each (lambda (file) - (install-file file bin)) - (find-files "bin" ".*")) - (install-file "libvcflib.a" lib))))))) - (home-page "https://github.com/vcflib/vcflib/") - (synopsis "Library for parsing and manipulating VCF files") - (description "Vcflib provides methods to manipulate and interpret -sequence variation as it can be described by VCF. It is both an API for parsing -and operating on records of genomic variation as it can be described by the VCF -format, and a collection of command-line utilities for executing complex -manipulations on VCF files.") - (license license:expat)))) - (define-public pindel (package (name "pindel") (version "0.2.5b8") (source (origin - (method url-fetch) - (uri (string-append "https://github.com/genome/pindel/archive/v" - version ".tar.gz")) - (file-name (string-append name "-" version ".tar.gz")) + (method git-fetch) + (uri (git-reference + (url "https://github.com/genome/pindel.git") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) (sha256 - (base32 "06bsf0psxwf7h5p3j97xkh9k5qrwhxh6xn942y1j1m2inyhgs8bz")))) + (base32 "16a32fbgv1n58nfcxa1nyphrdrad80sgpinfa9p028n6plwycpww")))) (build-system gnu-build-system) (inputs `(("samtools" ,samtools) @@ -968,7 +323,7 @@ manipulations on VCF files.") ;; The second run actually compiles the program. Now Makefile.local ;; is available, and we should treat an exiting make with an error as ;; a true error. - (zero? (system* "make")))) + (invoke "make"))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) @@ -979,13 +334,11 @@ manipulations on VCF files.") ;; There are multiple test targets, so in order to run all ;; tests, we must run the separate make targets. (replace 'check - (lambda* (#:key inputs #:allow-other-keys) - (and - (zero? (system* "make" "acceptance-tests")) - (zero? (system* "make" "coverage-tests")) - (zero? (system* "make" "cppcheck")) - (zero? (system* "make" "functional-tests")) - (zero? (system* "make" "regression-tests")))))))) + (lambda _ + (for-each (lambda (target) + (invoke "make" target)) + '("acceptance-tests" "coverage-tests" "cppcheck" + "functional-tests" "regression-tests"))))))) (home-page "https://github.com/genome/pindel") (synopsis "Structural variants detector for next-gen sequencing data") (description "Pindel can detect breakpoints of large deletions, medium sized @@ -1008,45 +361,251 @@ reads.") (base32 "0y45ympkza7qwcbcisg006286pwjbr5978n03hx5nvl09f0mapk8")))) (build-system ant-build-system) (arguments - `(#:phases + `(#:tests? #f ; build.xml does not exist + #:phases (modify-phases %standard-phases (replace 'unpack (lambda _ (mkdir "source") (chdir "source") - (and - ;; Unpack the Java archive containing the source files. - (zero? (system* "jar" "xf" (assoc-ref %build-inputs "source"))) - ;; Remove existing compiled output. - (with-directory-excursion "net/sf/varscan/" - (for-each (lambda (file) - (unless (string= (string-take-right file 5) ".java") - (zero? (system* "rm" file)))) - (find-files "." #:directories? #f)))))) + ;; Unpack the Java archive containing the source files. + (invoke "jar" "xf" (assoc-ref %build-inputs "source")) + ;; Remove existing compiled output. + (with-directory-excursion "net/sf/varscan/" + (for-each (lambda (file) + (delete-file file)) + (find-files "." "^.java$" #:directories? #f))) + #t)) (replace 'build (lambda _ - (let ((classes '())) - (and - ;; Compile the source files. - (with-directory-excursion "net/sf/varscan/" - (for-each (lambda (file) - (when (string= (string-take-right file 5) ".java") - (zero? (system* "javac" file)) - (cons )) - (find-files "." #:directories? #f))) - ;; Construct the new Java archive. - (zero? (system* "jar" "cfm" "varscan-2.4.1.jar" - "META-INF/MANIFEST.MF" - "net/sf/varscan/*.java"))))))) + ;; Compile the source files. + (with-directory-excursion "net/sf/varscan/" + (for-each (lambda (file) + (invoke "javac" file)) + (find-files "." ".java$" #:directories? #f))) + ;; Construct the new Java archive. + (apply invoke "jar" "cfm" + (string-append "varscan-" ,version ".jar") + "META-INF/MANIFEST.MF" + (find-files "net/sf/varscan" ".java$")))) (replace 'install - (lambda _ - (let ((out (string-append (assoc-ref %outputs "out") + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (string-append (assoc-ref outputs "out") "/share/java/varscan/"))) - (mkdir-p out) - (install-file "varscan-2.4.1.jar" out))))))) - (home-page "http://dkoboldt.github.io/varscan/") + (install-file (string-append "varscan-" ,version ".jar") out)) + #t))))) + (home-page "https://dkoboldt.github.io/varscan/") (synopsis "Variant detection in massively parallel sequencing data") - (description "") + (description "Variant detection in massively parallel sequencing data.") ;; Free for non-commercial use by academic, government, and ;; non-profit/not-for-profit institutions (license license:non-copyleft))) + +(define-public edirect-gn + (package + (inherit edirect) + (name "edirect-gn") + (arguments + (substitute-keyword-arguments (package-arguments edirect) + ((#:phases phases) + `(modify-phases ,phases + ; (replace 'build + ; (lambda* (#:key inputs #:allow-other-keys) + ; (let ((go (string-append (assoc-ref inputs "go") "/bin/go"))) + ; (invoke go "build" "xtract.go")))) + (add-after 'unpack 'patch-programs + (lambda* (#:key inputs #:allow-other-keys) + (let ((gzip (assoc-ref inputs "gzip"))) + (substitute* '("index-bioc" + "pm-index" + "pm-invert" + "pm-stash" + "rchive.go" + "run-ncbi-converter") + (("gunzip") (string-append gzip "/bin/gunzip"))) + (substitute* (find-files "." "^e") + (("exec perl") "exec")) + (substitute* '("xtract" "rchive") + ;; or add current directory to PATH + ((".*PATH.*") ""))) + #t)) + (replace 'install + (lambda* (#:key inputs outputs #:allow-other-keys) + (let ((bin (string-append (assoc-ref outputs "out") "/bin")) + (xtract.linux (assoc-ref inputs "xtract.Linux")) + (rchive.linux (assoc-ref inputs "rchive.Linux"))) + (for-each + (lambda (file) + (install-file file bin)) + '("archive-pubmed" "asp-cp" "asp-ls" "download-pubmed" + "edirect.pl" "efetch" "epost" "fetch-pubmed" "ftp-cp" + "ftp-ls" "has-asp" "pm-prepare" "pm-refresh" "pm-stash" + "rchive" "xtract")) + (copy-file xtract.linux (string-append bin "/xtract.Linux")) + (copy-file rchive.linux (string-append bin "/rchive.Linux")) + (chmod (string-append bin "/xtract.Linux") #o555) + (chmod (string-append bin "/rchive.Linux") #o555)) + #t)) + (replace 'wrap-program + (lambda* (#:key outputs #:allow-other-keys) + ;; Make sure 'edirect.pl' finds all perl inputs at runtime. + (let ((out (assoc-ref outputs "out")) + (path (getenv "PERL5LIB"))) + (for-each + (lambda (file) + (wrap-program (string-append out "/bin/" file) + `("PERL5LIB" ":" prefix (,path)))) + '("edirect.pl" "asp-ls" "ftp-cp" "ftp-ls"))) + #t)))))) + (inputs + `(("gzip" ,gzip) + ,@(package-inputs edirect))) + (native-inputs + `( + ;("go" ,go) + ("xtract.Linux" + ,(origin + (method url-fetch) + (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/" + "versions/" (package-version edirect) "/xtract.Linux")) + (file-name (string-append "xtract.Linux-" (package-version edirect))) + (sha256 + (base32 + "0fx6arpn38spnwszmvkkpa3498qrrlglg2l9jw91icgqbyjjq9wq")))) + ("rchive.Linux" + ,(origin + (method url-fetch) + (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/" + "versions/" (package-version edirect) "/rchive.Linux")) + (file-name (string-append "rchive.Linux-" (package-version edirect))) + (sha256 + (base32 + "134y0zprplqlplc6qmcjb97411bxkwghmq3z0qjgh0dgdbzjq1w3")))))) + (native-search-paths + ;; Ideally this should be set for LWP somewhere. + (list (search-path-specification + (variable "PERL_LWP_SSL_CA_FILE") + (file-type 'regular) + (separator #f) + (files '("/etc/ssl/certs/ca-certificates.crt"))))) + ;; Due to the precompiled binaries we download: + (supported-systems '("x86_64-linux")))) + +;; TODO: Unbundle zlib, bamtools, tclap +(define-public sniffles + (package + (name "sniffles") + (version "1.0.11") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/fritzsedlazeck/Sniffles.git") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0rkwqn1ycckfzrg2wdid4cqahq8q2jmmgi7vvl8qxgpsihqfbq0j")))) + (build-system cmake-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (install-file (string-append "../source/bin/sniffles-core-" + ,version "/sniffles") + (string-append out "/bin"))) + #t)) + (replace 'check + (lambda _ + (with-directory-excursion "../source/test_set" + (for-each make-file-writable (find-files ".")) + (invoke (string-append "../bin/sniffles-core-" ,version "/sniffles") + "-m" "reads_region.bam" "-v" "test.vcf"))))))) + (native-inputs + `(("zlib" ,zlib))) + (home-page "https://github.com/fritzsedlazeck/Sniffles") + (synopsis "Structural variation caller using third generation sequencing") + (description + "Sniffles is a structural variation caller using third generation sequencing +(PacBio or Oxford Nanopore). It detects all types of SVs (10bp+) using evidence +from split-read alignments, high-mismatch regions, and coverage analysis.") + (license license:expat))) + +;; TODO: Unbundle Complete-Striped-Smith-Waterman-Library +(define-public ngmlr + (package + (name "ngmlr") + (version "0.2.7") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/philres/ngmlr.git") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0lmsy8w0kxbyfnrln7lxgmnx3d82sv2b20n2yw5742rvfhq1v31n")))) + (build-system cmake-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'patch-source-shebangs 'patch-more-tools + (lambda* (#:key inputs #:allow-other-keys) + (let ((bed (assoc-ref inputs "bedtools")) + (sam (assoc-ref inputs "samtools"))) + (substitute* (find-files "test" "\\.sh$") + (("bedtools") (string-append bed "/bin/bedtools")) + (("samtools") (string-append sam "/bin/samtools"))) + #t))) + (replace 'check + (lambda _ + (with-directory-excursion "../source" + (invoke "sh" "test/test_travis.sh"))))))) + (native-inputs + `(("bedtools" ,bedtools) + ("samtools" ,samtools))) + (inputs + `(("zlib" ,zlib))) + (home-page "https://github.com/philres/ngmlr") + (synopsis "Long-read mapper designed to align PacBio or Oxford Nanopore") + (description + "NGMLR is a long-read mapper designed to align PacBio or Oxford Nanopore +(standard and ultra-long) to a reference genome with a focus on reads that span +structural variations.") + (license license:expat))) + +(define-public svim + (package + (name "svim") + (version "1.2.0") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/eldariont/svim.git") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "08j02in9jbq41b67dna1apnc3y30i37v44d1khml1xlx0iga720s")))) + (build-system python-build-system) + (arguments + '(#:phases + (modify-phases %standard-phases + (replace 'check + (lambda _ + (invoke "python3" "-m" "unittest" "discover" "-s" "src/")))))) + (propagated-inputs + `(("python-matplotlib" ,python-matplotlib) + ("python-numpy" ,python-numpy) + ("python-pysam" ,python-pysam) + ("python-scipy" ,python-scipy) + ("minimap2" ,minimap2) + ("ngmlr" ,ngmlr) + ("samtools" ,samtools))) + (home-page "https://github.com/eldariont/svim") + (synopsis "Structural Variant Identification Method using Long Reads") + (description + "SVIM (pronounced SWIM) is a structural variant caller for long reads. It +is able to detect, classify and genotype five different classes of structural +variants. Unlike existing methods, SVIM integrates information from across the +genome to precisely distinguish similar events, such as tandem and interspersed +duplications and novel element insertions.") + (license license:gpl3))) |