aboutsummaryrefslogtreecommitdiff
path: root/gn/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gn/packages/bioinformatics.scm')
-rw-r--r--gn/packages/bioinformatics.scm973
1 files changed, 266 insertions, 707 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index bdd0322..ad4db7b 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -7,60 +7,20 @@
#:use-module (guix download)
#:use-module (guix git-download)
#:use-module (guix build-system ant)
- #:use-module (guix build-system gnu)
#:use-module (guix build-system cmake)
- #:use-module (guix build-system perl)
+ #:use-module (guix build-system gnu)
#:use-module (guix build-system python)
- ;; #:use-module (guix build-system ruby)
- #:use-module (guix build-system r)
- #:use-module (guix build-system trivial)
- #:use-module (gn packages statistics)
- #:use-module (gnu packages autotools)
- #:use-module (gnu packages algebra)
- #:use-module (gnu packages base)
+ #:use-module (gnu packages)
#:use-module (gnu packages bioinformatics)
#:use-module (gnu packages boost)
#:use-module (gnu packages compression)
- #:use-module (gnu packages databases)
#:use-module (gnu packages check)
- #:use-module (gnu packages cmake)
- #:use-module (gnu packages compression)
- #:use-module (gnu packages cpio)
- #:use-module (gnu packages curl)
- #:use-module (gnu packages documentation)
- #:use-module (gnu packages datastructures)
- #:use-module (gnu packages file)
- #:use-module (gnu packages gawk)
#:use-module (gnu packages gcc)
- #:use-module (gnu packages graphviz)
- #:use-module (gnu packages java)
- #:use-module (gnu packages linux)
- #:use-module (gnu packages machine-learning)
#:use-module (gnu packages maths)
- #:use-module (gnu packages mpi)
- #:use-module (gnu packages ncurses)
- #:use-module (gnu packages node)
- #:use-module (gnu packages parallel)
- #:use-module (gnu packages pcre)
#:use-module (gnu packages perl)
- #:use-module (gnu packages pkg-config)
- #:use-module (gnu packages popt)
- #:use-module (gnu packages protobuf)
#:use-module (gnu packages python)
- #:use-module (gnu packages ruby)
- #:use-module (gnu packages statistics)
- #:use-module (gnu packages tbb)
- #:use-module (gnu packages textutils)
- #:use-module (gnu packages time)
- #:use-module (gnu packages tls)
- #:use-module (gnu packages vim)
- #:use-module (gnu packages web)
- #:use-module (gnu packages xml)
- #:use-module (gnu packages bootstrap)
- #:use-module (gnu packages dlang)
-; #:use-module (gn packages ldc)
- #:use-module (gn packages shell)
- #:use-module (srfi srfi-1))
+ #:use-module (gnu packages python-xyz)
+ #:use-module (gnu packages statistics))
(define-public contra
(package
@@ -69,10 +29,15 @@
(source (origin
(method url-fetch)
(uri (string-append
- "mirror://sourceforge/contra-cnv/CONTRA.v" version ".tar.gz"))
+ "mirror://sourceforge/contra-cnv/CONTRA.V"
+ (version-major+minor version) "/CONTRA.v" version ".tar.gz"))
(sha256
(base32
- "0agpcm2xh5f0i9n9sx1kvln6mzdksddmh11bvzj6bh76yw5pnw91"))))
+ "0agpcm2xh5f0i9n9sx1kvln6mzdksddmh11bvzj6bh76yw5pnw91"))
+ (modules '((guix build utils)))
+ (snippet
+ '(begin
+ (delete-file "BEDTools.v2.11.2.tar.gz") #t))))
(build-system gnu-build-system)
(propagated-inputs
`(("python" ,python-2)
@@ -87,18 +52,16 @@
(delete 'configure)
(delete 'build) ; We can use Guix's BEDtools instead.
(replace 'install
- (lambda _
- (let* ((out (assoc-ref %outputs "out"))
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin"))
(doc (string-append out "/share/doc/contra")))
- (mkdir-p bin)
- (mkdir-p doc)
- (and
- (zero? (system* "cp" "--recursive" "scripts" bin))
- (zero? (system* "cp" "contra.py" bin))
- (zero? (system* "cp" "baseline.py" bin))
- ;; There's only a pre-built PDF available.
- (zero? (system* "cp" "CONTRA_User_Guide.2.0.pdf" doc)))))))))
+ (copy-recursively "scripts" (string-append bin "/scripts"))
+ (install-file "contra.py" bin)
+ (install-file "baseline.py" bin)
+ ;; There's only a pre-built PDF available.
+ (install-file "CONTRA_User_Guide.2.0.pdf" doc))
+ #t)))))
(home-page "http://contra-cnv.sourceforge.net/")
(synopsis "Tool for copy number variation (CNV) detection for targeted
resequencing data")
@@ -219,486 +182,6 @@ data. For whole genome sequencing data analysis, the program can also use
mappability data (files created by GEM). ")
(license license:gpl2+)))
-(define-public tabixpp
- (package
- (name "tabixpp")
- (version "1.0.0")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/tabixpp/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "1s0lgks7qlvlhvcjhi2wm18nnza1bwcnic44ij7z8wfg88h4ivwn"))))
- (build-system gnu-build-system)
- (inputs
- `(("htslib" ,htslib)
- ("zlib" ,zlib)))
- (arguments
- `(#:tests? #f ; There are no tests to run.
- #:phases
- (modify-phases %standard-phases
- (delete 'configure) ; There is no configure phase.
- ;; The build phase needs overriding the location of htslib.
- (replace 'build
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((htslib-ref (assoc-ref inputs "htslib")))
- (zero?
- (system* "make"
- (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
- "HTS_HEADERS=" ; No need to check for headers here.
- (string-append "LIBPATH=-L. -L" htslib-ref "/include"))))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "tabix++" bin)))))))
- (home-page "https://github.com/ekg/tabixpp")
- (synopsis "C++ wrapper around tabix project")
- (description "This is a C++ wrapper around the Tabix project which abstracts
-some of the details of opening and jumping in tabix-indexed files.")
- (license license:expat)))
-
-;; This version works with FreeBayes while the released version doesn't. The
-;; released creates a variable with the name "vcf" somewhere, which is also the
-;; name of a namespace in vcflib.
-(define-public tabixpp-freebayes
- (let ((commit "bbc63a49acc52212199f92e9e3b8fba0a593e3f7"))
- (package (inherit tabixpp)
- (name "tabixpp-freebayes")
- (version (string-append "0-1." (string-take commit 7)))
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/tabixpp/archive/"
- commit ".tar.gz"))
- (file-name (string-append name "-" version "-checkout.tar.gz"))
- (sha256
- (base32 "1s06wmpgj4my4pik5kp2lc42hzzazbp5ism2y4i2ajp2y1c68g77")))))))
-
-(define-public smithwaterman
- ;; TODO: Upgrading smithwaterman breaks FreeBayes.
- (let ((commit "203218b47d45ac56ef234716f1bd4c741b289be1"))
- (package
- (name "smithwaterman")
- (version (string-append "0-1." (string-take commit 7)))
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/smithwaterman/archive/"
- commit ".tar.gz"))
- (file-name (string-append name "-" version "-checkout.tar.gz"))
- (sha256
- (base32 "1lkxy4xkjn96l70jdbsrlm687jhisgw4il0xr2dm33qwcclzzm3b"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; There are no tests to run.
- #:phases
- (modify-phases %standard-phases
- (delete 'configure) ; There is no configure phase.
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "smithwaterman" bin)))))))
- (home-page "https://github.com/ekg/smithwaterman")
- (synopsis "Implementation of the Smith-Waterman algorithm")
- (description "Implementation of the Smith-Waterman algorithm.")
- ;; The project contains a license file for the GPLv2. The source files
- ;; do not contain a license notice, so GPLv2-only is assumed here.
- (license license:gpl2))))
-
-(define-public multichoose
- (package
- (name "multichoose")
- (version "1.0.3")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/multichoose/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "0xy86vvr3qrs4l81qis7ia1q2hnqv0xcb4a1n60smxbhqqis5w3l"))))
- (build-system gnu-build-system)
- (native-inputs
- `(("python" ,python-2)
- ("node" ,node)))
- (arguments
- `(#:tests? #f ; There are no tests to run.
- #:phases
- (modify-phases %standard-phases
- (delete 'configure) ; There is no configure phase.
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- ;; TODO: There are Python modules for these programs too.
- (install-file "multichoose" bin)
- (install-file "multipermute" bin)))))))
- (home-page "https://github.com/ekg/multichoose")
- (synopsis "Library for efficient loopless multiset combination generation
-algorithm")
- (description "A library implements an efficient loopless multiset
-combination generation algorithm which is (approximately) described in
-\"Loopless algorithms for generating permutations, combinations, and other
-combinatorial configurations.\" G Ehrlich - Journal of the ACM (JACM),
-1973. (Algorithm 7.)")
- (license license:expat)))
-
-(define-public fsom
- (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
- (package
- (name "fsom")
- (version (string-append "0-1." (string-take commit 7)))
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/fsom/archive/"
- "a6ef318fbd347c53189384aef7f670c0e6ce89a3" ".tar.gz"))
- (file-name (string-append name "-" version "-checkout.tar.gz"))
- (sha256
- (base32 "0q6b57ppxfvsm5cqmmbfmjpn5qvx2zi5pamvp3yh8gpmmz8cfbl3"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; There are no tests to run.
- #:phases
- (modify-phases %standard-phases
- (delete 'configure) ; There is no configure phase.
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "fsom" bin)))))))
- (home-page "https://github.com/ekg/fsom")
- (synopsis "Program for managing SOM (Self-Organizing Maps) neural networks")
- (description "Program for managing SOM (Self-Organizing Maps) neural networks.")
- (license license:gpl3))))
-
-(define-public filevercmp
- (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
- (package
- (name "filevercmp")
- (version (string-append "0-1." (string-take commit 7)))
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/filevercmp/archive/"
- commit ".tar.gz"))
- (file-name "filevercmp-src.tar.gz")
- (sha256
- (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; There are no tests to run.
- #:phases
- (modify-phases %standard-phases
- (delete 'configure) ; There is no configure phase.
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "filevercmp" bin)))))))
- (home-page "https://github.com/ekg/filevercmp")
- (synopsis "Program to compare version strings")
- (description "A program to compare version strings. It intends to be a
-replacement for strverscmp.")
- (license license:gpl3+))))
-
-(define-public fastahack ; guix ready
- (let ((commit "c68cebb4f2e5d5d2b70cf08fbdf1944e9ab2c2dd"))
- (package
- (name "fastahack")
- (version (string-append "0-1." (string-take commit 7)))
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/fastahack/archive/"
- commit ".tar.gz"))
- (file-name (string-append name "-" version "-checkout.tar.gz"))
- (sha256
- (base32 "0j25lcl3jk1kls66zzxjfyq5ir6sfcvqrdwfcva61y3ajc9ssay2"))))
- (build-system gnu-build-system)
- (arguments
- `(#:tests? #f ; There are no tests to run.
- #:phases
- (modify-phases %standard-phases
- (delete 'configure) ; There is no configure phase.
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "fastahack" bin)))))))
- (home-page "https://github.com/ekg/fastahack")
- (synopsis "Program for indexing and sequence extraction from FASTA files")
- (description "Fastahack is a small application for indexing and extracting
-sequences and subsequences from FASTA files. The included Fasta.cpp library
-provides a FASTA reader and indexer that can be embeddedinto applications which
-would benefit from directly reading subsequences from FASTA files. The library
-automatically handles index file generation and use.")
- ;; There is no specific license for fastahack.
- ;; A part of the program is licensed GPLv2.
- (license (list license:non-copyleft license:gpl2)))))
-
-(define-public vcflib ; guix duplicat, see below?
- (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb"))
- (package
- (name "vcflib")
- (version (string-append "1.0.2-1." (string-take commit 7)))
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/vcflib/vcflib/archive/"
- "5ac091365fdc716cc47cc5410bb97ee5dc2a2c92" ".tar.gz"))
- (file-name "vcflib-5ac0913.tar.gz")
- (sha256
- (base32 "0ywshwpif059z5h0g7zzrdfzzdj2gr8xvwlwcsdxrms3p9iy35h8"))))
- (build-system gnu-build-system)
- (native-inputs
- `(("htslib" ,htslib)
- ("zlib" ,zlib)
- ("python" ,python-2)
- ("perl" ,perl)
- ("r" ,r)
- ("node" ,node)
- ("tabixpp-src" ,(package-source tabixpp-freebayes))
- ("smithwaterman-src" ,(package-source smithwaterman))
- ("multichoose-src" ,(package-source multichoose))
- ("fsom-src" ,(package-source fsom))
- ("filevercmp-src" ,(package-source filevercmp))
- ("fastahack-src" ,(package-source fastahack))
- ("intervaltree-src"
- ,(origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/ekg/intervaltree/archive/"
- "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz"))
- (file-name "intervaltree-src.tar.gz")
- (sha256
- (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks"))))))
- (arguments
- `(#:tests? #f
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (delete 'check)
- (add-after 'unpack 'unpack-submodule-sources
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((unpack (lambda (source target)
- (with-directory-excursion target
- (zero? (system* "tar" "xvf"
- (assoc-ref inputs source)
- "--strip-components=1"))))))
- (and
- (unpack "intervaltree-src" "intervaltree")
- (unpack "fastahack-src" "fastahack")
- (unpack "filevercmp-src" "filevercmp")
- (unpack "fsom-src" "fsom")
- (unpack "multichoose-src" "multichoose")
- (unpack "smithwaterman-src" "smithwaterman")
- (unpack "tabixpp-src" "tabixpp")))))
- (add-after 'unpack-submodule-sources 'fix-makefile
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* '("Makefile")
- (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0"))))
- (replace
- 'build
- (lambda* (#:key inputs make-flags #:allow-other-keys)
- (with-directory-excursion "tabixpp"
- (zero? (system* "make")))
- (zero? (system* "make" "CC=gcc"
- (string-append "CFLAGS=\"" "-Itabixpp "
- "-I" (assoc-ref inputs "htslib") "/include " "\"") "all"))))
- (replace
- 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
- ;;(include (string-append (assoc-ref outputs "out") "/include"))
- (lib (string-append (assoc-ref outputs "out") "/lib")))
- (for-each (lambda (file)
- (install-file file bin))
- (find-files "bin" ".*"))
- ;; The header files do not correspond to libvcflib.a, therefore
- ;; I left them out.
- ;;(for-each (lambda (file)
- ;; (install-file file include))
- ;; (find-files "src" "\\.h$"))
- (install-file "libvcflib.a" lib)))))))
- (home-page "https://github.com/vcflib/vcflib/")
- (synopsis "Library for parsing and manipulating VCF files")
- (description "Vcflib provides methods to manipulate and interpret
-sequence variation as it can be described by VCF. It is both an API for parsing
-and operating on records of genomic variation as it can be described by the VCF
-format, and a collection of command-line utilities for executing complex
-manipulations on VCF files.")
- (license license:expat))))
-
-(define-public bash-tap ; guix license issue
- (package
- (name "bash-tap")
- (version "1.0.2")
- (source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/illusori/bash-tap/archive/"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
- (sha256
- (base32 "0qs1qi38bl3ns4mpagcawv618dsk2q1lgrbddgvs0wl3ia12cyz5"))))
- (build-system trivial-build-system)
- (native-inputs `(("source" ,source)
- ("tar" ,tar)
- ("gzip" ,gzip)))
- (arguments
- `(#:modules ((guix build utils))
- #:builder (begin
- (use-modules (guix build utils))
- (let ((tar (string-append (assoc-ref %build-inputs "tar") "/bin/tar"))
- (path (string-append (assoc-ref %build-inputs "gzip") "/bin"))
- (bin (string-append %output "/bin"))
- (source (string-append (assoc-ref %build-inputs "source"))))
- (setenv "PATH" path)
- (mkdir-p bin)
- (with-directory-excursion bin
- (zero? (system* tar "xvf" source
- "--strip-components=1"
- "--no-anchored"
- "bash-tap"
- "bash-tap-bootstrap"
- "bash-tap-mock")))))))
- (home-page "http://www.illusori.co.uk/projects/bash-tap/")
- (synopsis "Bash port of a Test::More/Test::Builder-style TAP-compliant
-test library")
- (description "Bash TAP is a TAP-compliant Test::More-style testing library
-for Bash shell scripts and functions. Along with the Test::More-style testing
-helpers it provides helper functions for mocking commands and functions and
-in-process output capturing.")
- ;; The author didn't specify a license.
- (license license:public-domain)))
-
-(define-public freebayes ; guix dependent package issues
- (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")
- (revision "1"))
- (package
- (name "freebayes")
- (version (string-append "1.0.2-" revision "." (string-take commit 7)))
- (source (origin
- (method git-fetch)
- (uri (git-reference
- (url "https://github.com/ekg/freebayes.git")
- (commit commit)))
- (file-name (string-append name "-" version "-checkout"))
- (sha256
- (base32 "1sbzwmcbn78ybymjnhwk7qc5r912azy5vqz2y7y81616yc3ba2a2"))))
- (build-system gnu-build-system)
- (inputs
- `(("zlib" ,zlib)
- ("htslib" ,htslib)))
- (native-inputs
- `(("bc" ,bc) ; Needed for running tests.
- ("samtools" ,samtools) ; Needed for running tests.
- ("parallel" ,parallel) ; Needed for running tests.
- ("procps" ,procps) ; Needed for running tests.
- ("bamtools" ,bamtools)
- ("cmake" ,cmake)
- ("python" ,python-2)
- ("node" ,node)
- ("r" ,r)
- ("perl" ,perl)
- ("bamtools-src" ,(package-source bamtools))
- ("vcflib-src" ,(package-source vcflib))
- ;; These are submodules for the vcflib version used in freebayes
- ("tabixpp-src" ,(package-source tabixpp-freebayes))
- ("smithwaterman-src" ,(package-source smithwaterman))
- ("multichoose-src" ,(package-source multichoose))
- ("fsom-src" ,(package-source fsom))
- ("filevercmp-src" ,(package-source filevercmp))
- ("fastahack-src" ,(package-source fastahack))
- ("intervaltree-src"
- ,(origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/ekg/intervaltree/archive/"
- "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz"))
- (file-name "intervaltree-src.tar.gz")
- (sha256
- (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks"))))
- ;; These submodules are needed to run the tests.
- ("bash-tap-src" ,(package-source bash-tap))
- ;; ,(origin
- ;; (method url-fetch)
- ;; (uri (string-append "https://github.com/illusori/bash-tap/archive/"
- ;; "c38fbfa401600cc81ccda66bfc0da3ea56288d03" ".tar.gz"))
- ;; (file-name "bash-tap-src.tar.gz")
- ;; (sha256
- ;; (base32 "07ijb1p0aa65ajpg9nkghc183iha6lwiydkckay8pghapa01j6nz"))))
- ("test-simple-bash-src"
- ,(origin
- (method url-fetch)
- (uri (string-append "https://github.com/ingydotnet/test-simple-bash/archive/"
- "124673ff204b01c8e96b7fc9f9b32ee35d898acc" ".tar.gz"))
- (file-name "test-simple-bash-src.tar.gz")
- (sha256
- (base32 "016xf3wbgqbav9dncvfdx5k0f10z5xwq8jdszajzmcvnhz5wis14"))))))
- (arguments
- `(#:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (add-after 'unpack 'unpack-submodule-sources
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((unpack (lambda (source target)
- (with-directory-excursion target
- (zero? (system* "tar" "xvf"
- (assoc-ref inputs source)
- "--strip-components=1"))))))
- (and
- (unpack "bamtools-src" "bamtools")
- (unpack "vcflib-src" "vcflib")
- ;;(unpack "intervaltree-src" "intervaltree")
- (unpack "fastahack-src" "vcflib/fastahack")
- (unpack "filevercmp-src" "vcflib/filevercmp")
- (unpack "fsom-src" "vcflib/fsom")
- (unpack "intervaltree-src" "vcflib/intervaltree")
- (unpack "multichoose-src" "vcflib/multichoose")
- (unpack "smithwaterman-src" "vcflib/smithwaterman")
- (unpack "tabixpp-src" "vcflib/tabixpp")
- (unpack "test-simple-bash-src" "test/test-simple-bash")
- (unpack "bash-tap-src" "test/bash-tap")))))
- (add-after 'unpack-submodule-sources 'fix-makefile
- (lambda* (#:key inputs #:allow-other-keys)
- ;; We don't have the .git folder to get the version tag from.
- ;; For this checkout of the code, it's v1.0.0.
- (substitute* '("vcflib/Makefile")
- (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0"))))
- (replace 'build
- (lambda* (#:key inputs make-flags #:allow-other-keys)
- (and
- ;; Compile Bamtools before compiling the main project.
- (with-directory-excursion "bamtools"
- (system* "mkdir" "build")
- (with-directory-excursion "build"
- (and (zero? (system* "cmake" "../"))
- (zero? (system* "make")))))
- ;; Compile vcflib before we compiling the main project.
- (with-directory-excursion "vcflib"
- (with-directory-excursion "tabixpp"
- (let ((htslib-ref (assoc-ref inputs "htslib")))
- (zero?
- (system* "make" "HTS_HEADERS="
- (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
- (string-append "LIBPATH=-L. -L" htslib-ref "/include")))))
- (zero? (system* "make" "CC=gcc"
- (string-append "CFLAGS=\"" "-Itabixpp "
- "-I" (assoc-ref inputs "htslib") "/include " "\"") "all")))
- (with-directory-excursion "src"
- (zero? (system* "make"))))))
- (replace 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
- (install-file "bin/freebayes" bin)
- (install-file "bin/bamleftalign" bin))))
- ;; There are three tests that fail. All because of the -P
- ;; (--perl-regexp) option in grep, which is not compiled into the
- ;; version of grep in Guix.
- (replace 'check
- (lambda* (#:key inputs #:allow-other-keys)
- (system* "make" "test"))))))
- (home-page "https://github.com/ekg/freebayes")
- (synopsis "Haplotype-based variant detector")
- (description "FreeBayes is a Bayesian genetic variant detector designed to
-find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
-indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
-complex events (composite insertion and substitution events) smaller than the
-length of a short-read sequencing alignment.")
- (license license:expat))))
-
(define-public plink2
(package
(name "plink2")
@@ -805,146 +288,18 @@ integration with gPLINK and Haploview, there is some support for the
subsequent visualization, annotation and storage of results.")
(license license:gpl3+))))
-
-(define-public vcflib ; duplicate? See above
- (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb"))
- (package
- (name "vcflib")
- (version (string-append "v1.0.2-" (string-take commit 7)))
- (source
- (origin
- (method url-fetch)
- (uri (string-append "https://github.com/vcflib/vcflib/archive/"
- "5ac091365fdc716cc47cc5410bb97ee5dc2a2c92" ".tar.gz"))
- (file-name "vcflib-5ac0913.tar.gz")
- (sha256
- (base32 "0ywshwpif059z5h0g7zzrdfzzdj2gr8xvwlwcsdxrms3p9iy35h8"))))
- (build-system gnu-build-system)
- (native-inputs
- `(("htslib" ,htslib)
- ("zlib" ,zlib)
- ("python" ,python-2)
- ("perl" ,perl)
- ("tabixpp-src"
- ,(origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/tabixpp/archive/"
- "bbc63a49acc52212199f92e9e3b8fba0a593e3f7" ".tar.gz"))
- (file-name "tabixpp-src.tar.gz")
- (sha256
- (base32 "1s06wmpgj4my4pik5kp2lc42hzzazbp5ism2y4i2ajp2y1c68g77"))))
- ("intervaltree-src"
- ,(origin
- (method url-fetch)
- (uri (string-append
- "https://github.com/ekg/intervaltree/archive/"
- "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz"))
- (file-name "intervaltree-src.tar.gz")
- (sha256
- (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks"))))
- ("smithwaterman-src"
- ,(origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/smithwaterman/archive/"
- "203218b47d45ac56ef234716f1bd4c741b289be1" ".tar.gz"))
- (file-name "smithwaterman-src.tar.gz")
- (sha256
- (base32 "1lkxy4xkjn96l70jdbsrlm687jhisgw4il0xr2dm33qwcclzzm3b"))))
- ("multichoose-src"
- ,(origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/multichoose/archive/"
- "73d35daa18bf35729b9ba758041a9247a72484a5" ".tar.gz"))
- (file-name "multichoose-src.tar.gz")
- (sha256
- (base32 "07aizwdabmlnjaq4p3v0vsasgz1xzxid8xcxcw3paq8kh9c1099i"))))
- ("fsom-src"
- ,(origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/fsom/archive/"
- "a6ef318fbd347c53189384aef7f670c0e6ce89a3" ".tar.gz"))
- (file-name "fsom-src.tar.gz")
- (sha256
- (base32 "0q6b57ppxfvsm5cqmmbfmjpn5qvx2zi5pamvp3yh8gpmmz8cfbl3"))))
- ("filevercmp-src"
- ,(origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/filevercmp/archive/"
- "1a9b779b93d0b244040274794d402106907b71b7" ".tar.gz"))
- (file-name "filevercmp-src.tar.gz")
- (sha256
- (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
- ("fastahack-src"
- ,(origin
- (method url-fetch)
- (uri (string-append "https://github.com/ekg/fastahack/archive/"
- "c68cebb4f2e5d5d2b70cf08fbdf1944e9ab2c2dd" ".tar.gz"))
- (file-name "fastahack-src.tar.gz")
- (sha256
- (base32 "0j25lcl3jk1kls66zzxjfyq5ir6sfcvqrdwfcva61y3ajc9ssay2"))))))
- (arguments
- `(#:tests? #f
- #:phases
- (modify-phases %standard-phases
- (delete 'configure)
- (delete 'check)
- (add-after 'unpack 'unpack-submodule-sources
- (lambda* (#:key inputs #:allow-other-keys)
- (let ((unpack (lambda (source target)
- (with-directory-excursion target
- (zero? (system* "tar" "xvf"
- (assoc-ref inputs source)
- "--strip-components=1"))))))
- (and
- (unpack "intervaltree-src" "intervaltree")
- (unpack "fastahack-src" "fastahack")
- (unpack "filevercmp-src" "filevercmp")
- (unpack "fsom-src" "fsom")
- (unpack "intervaltree-src" "intervaltree")
- (unpack "multichoose-src" "multichoose")
- (unpack "smithwaterman-src" "smithwaterman")
- (unpack "tabixpp-src" "tabixpp")))))
- (add-after 'unpack-submodule-sources 'fix-makefile
- (lambda* (#:key inputs #:allow-other-keys)
- (substitute* '("Makefile")
- (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0"))))
- (replace
- 'build
- (lambda* (#:key inputs make-flags #:allow-other-keys)
- (with-directory-excursion "tabixpp"
- (zero? (system* "make")))
- (zero? (system* "make" "CC=gcc"
- (string-append "CFLAGS=\"" "-Itabixpp "
- "-I" (assoc-ref inputs "htslib") "/include " "\"") "all"))))
- (replace
- 'install
- (lambda* (#:key outputs #:allow-other-keys)
- (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
- (lib (string-append (assoc-ref outputs "out") "/lib")))
- (for-each (lambda (file)
- (install-file file bin))
- (find-files "bin" ".*"))
- (install-file "libvcflib.a" lib)))))))
- (home-page "https://github.com/vcflib/vcflib/")
- (synopsis "Library for parsing and manipulating VCF files")
- (description "Vcflib provides methods to manipulate and interpret
-sequence variation as it can be described by VCF. It is both an API for parsing
-and operating on records of genomic variation as it can be described by the VCF
-format, and a collection of command-line utilities for executing complex
-manipulations on VCF files.")
- (license license:expat))))
-
(define-public pindel
(package
(name "pindel")
(version "0.2.5b8")
(source (origin
- (method url-fetch)
- (uri (string-append "https://github.com/genome/pindel/archive/v"
- version ".tar.gz"))
- (file-name (string-append name "-" version ".tar.gz"))
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/genome/pindel.git")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
(sha256
- (base32 "06bsf0psxwf7h5p3j97xkh9k5qrwhxh6xn942y1j1m2inyhgs8bz"))))
+ (base32 "16a32fbgv1n58nfcxa1nyphrdrad80sgpinfa9p028n6plwycpww"))))
(build-system gnu-build-system)
(inputs
`(("samtools" ,samtools)
@@ -968,7 +323,7 @@ manipulations on VCF files.")
;; The second run actually compiles the program. Now Makefile.local
;; is available, and we should treat an exiting make with an error as
;; a true error.
- (zero? (system* "make"))))
+ (invoke "make")))
(replace 'install
(lambda* (#:key outputs #:allow-other-keys)
(let ((bin (string-append (assoc-ref outputs "out") "/bin")))
@@ -979,13 +334,11 @@ manipulations on VCF files.")
;; There are multiple test targets, so in order to run all
;; tests, we must run the separate make targets.
(replace 'check
- (lambda* (#:key inputs #:allow-other-keys)
- (and
- (zero? (system* "make" "acceptance-tests"))
- (zero? (system* "make" "coverage-tests"))
- (zero? (system* "make" "cppcheck"))
- (zero? (system* "make" "functional-tests"))
- (zero? (system* "make" "regression-tests"))))))))
+ (lambda _
+ (for-each (lambda (target)
+ (invoke "make" target))
+ '("acceptance-tests" "coverage-tests" "cppcheck"
+ "functional-tests" "regression-tests")))))))
(home-page "https://github.com/genome/pindel")
(synopsis "Structural variants detector for next-gen sequencing data")
(description "Pindel can detect breakpoints of large deletions, medium sized
@@ -1008,45 +361,251 @@ reads.")
(base32 "0y45ympkza7qwcbcisg006286pwjbr5978n03hx5nvl09f0mapk8"))))
(build-system ant-build-system)
(arguments
- `(#:phases
+ `(#:tests? #f ; build.xml does not exist
+ #:phases
(modify-phases %standard-phases
(replace 'unpack
(lambda _
(mkdir "source")
(chdir "source")
- (and
- ;; Unpack the Java archive containing the source files.
- (zero? (system* "jar" "xf" (assoc-ref %build-inputs "source")))
- ;; Remove existing compiled output.
- (with-directory-excursion "net/sf/varscan/"
- (for-each (lambda (file)
- (unless (string= (string-take-right file 5) ".java")
- (zero? (system* "rm" file))))
- (find-files "." #:directories? #f))))))
+ ;; Unpack the Java archive containing the source files.
+ (invoke "jar" "xf" (assoc-ref %build-inputs "source"))
+ ;; Remove existing compiled output.
+ (with-directory-excursion "net/sf/varscan/"
+ (for-each (lambda (file)
+ (delete-file file))
+ (find-files "." "^.java$" #:directories? #f)))
+ #t))
(replace 'build
(lambda _
- (let ((classes '()))
- (and
- ;; Compile the source files.
- (with-directory-excursion "net/sf/varscan/"
- (for-each (lambda (file)
- (when (string= (string-take-right file 5) ".java")
- (zero? (system* "javac" file))
- (cons ))
- (find-files "." #:directories? #f)))
- ;; Construct the new Java archive.
- (zero? (system* "jar" "cfm" "varscan-2.4.1.jar"
- "META-INF/MANIFEST.MF"
- "net/sf/varscan/*.java")))))))
+ ;; Compile the source files.
+ (with-directory-excursion "net/sf/varscan/"
+ (for-each (lambda (file)
+ (invoke "javac" file))
+ (find-files "." ".java$" #:directories? #f)))
+ ;; Construct the new Java archive.
+ (apply invoke "jar" "cfm"
+ (string-append "varscan-" ,version ".jar")
+ "META-INF/MANIFEST.MF"
+ (find-files "net/sf/varscan" ".java$"))))
(replace 'install
- (lambda _
- (let ((out (string-append (assoc-ref %outputs "out")
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((out (string-append (assoc-ref outputs "out")
"/share/java/varscan/")))
- (mkdir-p out)
- (install-file "varscan-2.4.1.jar" out)))))))
- (home-page "http://dkoboldt.github.io/varscan/")
+ (install-file (string-append "varscan-" ,version ".jar") out))
+ #t)))))
+ (home-page "https://dkoboldt.github.io/varscan/")
(synopsis "Variant detection in massively parallel sequencing data")
- (description "")
+ (description "Variant detection in massively parallel sequencing data.")
;; Free for non-commercial use by academic, government, and
;; non-profit/not-for-profit institutions
(license license:non-copyleft)))
+
+(define-public edirect-gn
+ (package
+ (inherit edirect)
+ (name "edirect-gn")
+ (arguments
+ (substitute-keyword-arguments (package-arguments edirect)
+ ((#:phases phases)
+ `(modify-phases ,phases
+ ; (replace 'build
+ ; (lambda* (#:key inputs #:allow-other-keys)
+ ; (let ((go (string-append (assoc-ref inputs "go") "/bin/go")))
+ ; (invoke go "build" "xtract.go"))))
+ (add-after 'unpack 'patch-programs
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((gzip (assoc-ref inputs "gzip")))
+ (substitute* '("index-bioc"
+ "pm-index"
+ "pm-invert"
+ "pm-stash"
+ "rchive.go"
+ "run-ncbi-converter")
+ (("gunzip") (string-append gzip "/bin/gunzip")))
+ (substitute* (find-files "." "^e")
+ (("exec perl") "exec"))
+ (substitute* '("xtract" "rchive")
+ ;; or add current directory to PATH
+ ((".*PATH.*") "")))
+ #t))
+ (replace 'install
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
+ (xtract.linux (assoc-ref inputs "xtract.Linux"))
+ (rchive.linux (assoc-ref inputs "rchive.Linux")))
+ (for-each
+ (lambda (file)
+ (install-file file bin))
+ '("archive-pubmed" "asp-cp" "asp-ls" "download-pubmed"
+ "edirect.pl" "efetch" "epost" "fetch-pubmed" "ftp-cp"
+ "ftp-ls" "has-asp" "pm-prepare" "pm-refresh" "pm-stash"
+ "rchive" "xtract"))
+ (copy-file xtract.linux (string-append bin "/xtract.Linux"))
+ (copy-file rchive.linux (string-append bin "/rchive.Linux"))
+ (chmod (string-append bin "/xtract.Linux") #o555)
+ (chmod (string-append bin "/rchive.Linux") #o555))
+ #t))
+ (replace 'wrap-program
+ (lambda* (#:key outputs #:allow-other-keys)
+ ;; Make sure 'edirect.pl' finds all perl inputs at runtime.
+ (let ((out (assoc-ref outputs "out"))
+ (path (getenv "PERL5LIB")))
+ (for-each
+ (lambda (file)
+ (wrap-program (string-append out "/bin/" file)
+ `("PERL5LIB" ":" prefix (,path))))
+ '("edirect.pl" "asp-ls" "ftp-cp" "ftp-ls")))
+ #t))))))
+ (inputs
+ `(("gzip" ,gzip)
+ ,@(package-inputs edirect)))
+ (native-inputs
+ `(
+ ;("go" ,go)
+ ("xtract.Linux"
+ ,(origin
+ (method url-fetch)
+ (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
+ "versions/" (package-version edirect) "/xtract.Linux"))
+ (file-name (string-append "xtract.Linux-" (package-version edirect)))
+ (sha256
+ (base32
+ "0fx6arpn38spnwszmvkkpa3498qrrlglg2l9jw91icgqbyjjq9wq"))))
+ ("rchive.Linux"
+ ,(origin
+ (method url-fetch)
+ (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
+ "versions/" (package-version edirect) "/rchive.Linux"))
+ (file-name (string-append "rchive.Linux-" (package-version edirect)))
+ (sha256
+ (base32
+ "134y0zprplqlplc6qmcjb97411bxkwghmq3z0qjgh0dgdbzjq1w3"))))))
+ (native-search-paths
+ ;; Ideally this should be set for LWP somewhere.
+ (list (search-path-specification
+ (variable "PERL_LWP_SSL_CA_FILE")
+ (file-type 'regular)
+ (separator #f)
+ (files '("/etc/ssl/certs/ca-certificates.crt")))))
+ ;; Due to the precompiled binaries we download:
+ (supported-systems '("x86_64-linux"))))
+
+;; TODO: Unbundle zlib, bamtools, tclap
+(define-public sniffles
+ (package
+ (name "sniffles")
+ (version "1.0.11")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/fritzsedlazeck/Sniffles.git")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0rkwqn1ycckfzrg2wdid4cqahq8q2jmmgi7vvl8qxgpsihqfbq0j"))))
+ (build-system cmake-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((out (assoc-ref outputs "out")))
+ (install-file (string-append "../source/bin/sniffles-core-"
+ ,version "/sniffles")
+ (string-append out "/bin")))
+ #t))
+ (replace 'check
+ (lambda _
+ (with-directory-excursion "../source/test_set"
+ (for-each make-file-writable (find-files "."))
+ (invoke (string-append "../bin/sniffles-core-" ,version "/sniffles")
+ "-m" "reads_region.bam" "-v" "test.vcf")))))))
+ (native-inputs
+ `(("zlib" ,zlib)))
+ (home-page "https://github.com/fritzsedlazeck/Sniffles")
+ (synopsis "Structural variation caller using third generation sequencing")
+ (description
+ "Sniffles is a structural variation caller using third generation sequencing
+(PacBio or Oxford Nanopore). It detects all types of SVs (10bp+) using evidence
+from split-read alignments, high-mismatch regions, and coverage analysis.")
+ (license license:expat)))
+
+;; TODO: Unbundle Complete-Striped-Smith-Waterman-Library
+(define-public ngmlr
+ (package
+ (name "ngmlr")
+ (version "0.2.7")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/philres/ngmlr.git")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0lmsy8w0kxbyfnrln7lxgmnx3d82sv2b20n2yw5742rvfhq1v31n"))))
+ (build-system cmake-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'patch-source-shebangs 'patch-more-tools
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((bed (assoc-ref inputs "bedtools"))
+ (sam (assoc-ref inputs "samtools")))
+ (substitute* (find-files "test" "\\.sh$")
+ (("bedtools") (string-append bed "/bin/bedtools"))
+ (("samtools") (string-append sam "/bin/samtools")))
+ #t)))
+ (replace 'check
+ (lambda _
+ (with-directory-excursion "../source"
+ (invoke "sh" "test/test_travis.sh")))))))
+ (native-inputs
+ `(("bedtools" ,bedtools)
+ ("samtools" ,samtools)))
+ (inputs
+ `(("zlib" ,zlib)))
+ (home-page "https://github.com/philres/ngmlr")
+ (synopsis "Long-read mapper designed to align PacBio or Oxford Nanopore")
+ (description
+ "NGMLR is a long-read mapper designed to align PacBio or Oxford Nanopore
+(standard and ultra-long) to a reference genome with a focus on reads that span
+structural variations.")
+ (license license:expat)))
+
+(define-public svim
+ (package
+ (name "svim")
+ (version "1.2.0")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/eldariont/svim.git")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "08j02in9jbq41b67dna1apnc3y30i37v44d1khml1xlx0iga720s"))))
+ (build-system python-build-system)
+ (arguments
+ '(#:phases
+ (modify-phases %standard-phases
+ (replace 'check
+ (lambda _
+ (invoke "python3" "-m" "unittest" "discover" "-s" "src/"))))))
+ (propagated-inputs
+ `(("python-matplotlib" ,python-matplotlib)
+ ("python-numpy" ,python-numpy)
+ ("python-pysam" ,python-pysam)
+ ("python-scipy" ,python-scipy)
+ ("minimap2" ,minimap2)
+ ("ngmlr" ,ngmlr)
+ ("samtools" ,samtools)))
+ (home-page "https://github.com/eldariont/svim")
+ (synopsis "Structural Variant Identification Method using Long Reads")
+ (description
+ "SVIM (pronounced SWIM) is a structural variant caller for long reads. It
+is able to detect, classify and genotype five different classes of structural
+variants. Unlike existing methods, SVIM integrates information from across the
+genome to precisely distinguish similar events, such as tandem and interspersed
+duplications and novel element insertions.")
+ (license license:gpl3)))