You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
5751 lines
230 KiB
5751 lines
230 KiB
;;; GNU Guix --- Functional package management for GNU
|
|
;;; Copyright © 2014, 2015, 2016 Ricardo Wurmus <rekado@elephly.net>
|
|
;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
|
|
;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
|
|
;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
|
|
;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
|
|
;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
|
|
;;;
|
|
;;; This file is part of GNU Guix.
|
|
;;;
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
;;; under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
;;; your option) any later version.
|
|
;;;
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (gnu packages bioinformatics)
|
|
#:use-module ((guix licenses) #:prefix license:)
|
|
#:use-module (guix packages)
|
|
#:use-module (guix utils)
|
|
#:use-module (guix download)
|
|
#:use-module (guix git-download)
|
|
#:use-module (guix hg-download)
|
|
#:use-module (guix build-system ant)
|
|
#:use-module (guix build-system gnu)
|
|
#:use-module (guix build-system cmake)
|
|
#:use-module (guix build-system perl)
|
|
#:use-module (guix build-system python)
|
|
#:use-module (guix build-system r)
|
|
#:use-module (guix build-system ruby)
|
|
#:use-module (guix build-system trivial)
|
|
#:use-module (gnu packages)
|
|
#:use-module (gnu packages autotools)
|
|
#:use-module (gnu packages algebra)
|
|
#:use-module (gnu packages base)
|
|
#:use-module (gnu packages bison)
|
|
#:use-module (gnu packages boost)
|
|
#:use-module (gnu packages compression)
|
|
#:use-module (gnu packages cpio)
|
|
#:use-module (gnu packages curl)
|
|
#:use-module (gnu packages documentation)
|
|
#:use-module (gnu packages datastructures)
|
|
#:use-module (gnu packages file)
|
|
#:use-module (gnu packages gawk)
|
|
#:use-module (gnu packages gcc)
|
|
#:use-module (gnu packages gd)
|
|
#:use-module (gnu packages image)
|
|
#:use-module (gnu packages java)
|
|
#:use-module (gnu packages linux)
|
|
#:use-module (gnu packages logging)
|
|
#:use-module (gnu packages machine-learning)
|
|
#:use-module (gnu packages maths)
|
|
#:use-module (gnu packages mpi)
|
|
#:use-module (gnu packages ncurses)
|
|
#:use-module (gnu packages pcre)
|
|
#:use-module (gnu packages pdf)
|
|
#:use-module (gnu packages perl)
|
|
#:use-module (gnu packages pkg-config)
|
|
#:use-module (gnu packages popt)
|
|
#:use-module (gnu packages protobuf)
|
|
#:use-module (gnu packages python)
|
|
#:use-module (gnu packages readline)
|
|
#:use-module (gnu packages ruby)
|
|
#:use-module (gnu packages statistics)
|
|
#:use-module (gnu packages tbb)
|
|
#:use-module (gnu packages textutils)
|
|
#:use-module (gnu packages time)
|
|
#:use-module (gnu packages tls)
|
|
#:use-module (gnu packages vim)
|
|
#:use-module (gnu packages web)
|
|
#:use-module (gnu packages xml)
|
|
#:use-module (gnu packages xorg)
|
|
#:use-module (gnu packages zip)
|
|
#:use-module (srfi srfi-1))
|
|
|
|
(define-public aragorn
|
|
(package
|
|
(name "aragorn")
|
|
(version "1.2.36")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
|
|
version ".tgz"))
|
|
(sha256
|
|
(base32
|
|
"1dg7jlz1qpqy88igjxd6ncs11ccsirb36qv1z01a0np4i4jh61mb"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; there are no tests
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'build
|
|
(lambda _
|
|
(zero? (system* "gcc"
|
|
"-O3"
|
|
"-ffast-math"
|
|
"-finline-functions"
|
|
"-o"
|
|
"aragorn"
|
|
(string-append "aragorn" ,version ".c")))))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(man (string-append out "/share/man/man1")))
|
|
(mkdir-p bin)
|
|
(copy-file "aragorn"
|
|
(string-append bin "/aragorn"))
|
|
(mkdir-p man)
|
|
(copy-file "aragorn.1"
|
|
(string-append man "/aragorn.1")))
|
|
#t)))))
|
|
(home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
|
|
(synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
|
|
(description
|
|
"Aragorn identifies transfer RNA, mitochondrial RNA and
|
|
transfer-messenger RNA from nucleotide sequences, based on homology to known
|
|
tRNA consensus sequences and RNA structure. It also outputs the secondary
|
|
structure of the predicted RNA.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public bamtools
|
|
(package
|
|
(name "bamtools")
|
|
(version "2.3.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/pezmaster31/bamtools/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before
|
|
'configure 'set-ldflags
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(setenv "LDFLAGS"
|
|
(string-append
|
|
"-Wl,-rpath="
|
|
(assoc-ref outputs "out") "/lib/bamtools")))))))
|
|
(inputs `(("zlib" ,zlib)))
|
|
(home-page "https://github.com/pezmaster31/bamtools")
|
|
(synopsis "C++ API and command-line toolkit for working with BAM data")
|
|
(description
|
|
"BamTools provides both a C++ API and a command-line toolkit for handling
|
|
BAM files.")
|
|
(license license:expat)))
|
|
|
|
(define-public bedops
|
|
(package
|
|
(name "bedops")
|
|
(version "2.4.14")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/bedops/bedops/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f
|
|
#:make-flags (list (string-append "BINDIR=" %output "/bin"))
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'unpack-tarballs
|
|
(lambda _
|
|
;; FIXME: Bedops includes tarballs of minimally patched upstream
|
|
;; libraries jansson, zlib, and bzip2. We cannot just use stock
|
|
;; libraries because at least one of the libraries (zlib) is
|
|
;; patched to add a C++ function definition (deflateInit2cpp).
|
|
;; Until the Bedops developers offer a way to link against system
|
|
;; libraries we have to build the in-tree copies of these three
|
|
;; libraries.
|
|
|
|
;; See upstream discussion:
|
|
;; https://github.com/bedops/bedops/issues/124
|
|
|
|
;; Unpack the tarballs to benefit from shebang patching.
|
|
(with-directory-excursion "third-party"
|
|
(and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
|
|
(zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
|
|
(zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
|
|
;; Disable unpacking of tarballs in Makefile.
|
|
(substitute* "system.mk/Makefile.linux"
|
|
(("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
|
|
(("\\./configure") "CONFIG_SHELL=bash ./configure"))
|
|
(substitute* "third-party/zlib-1.2.7/Makefile.in"
|
|
(("^SHELL=.*$") "SHELL=bash\n")))
|
|
(alist-delete 'configure %standard-phases))))
|
|
(home-page "https://github.com/bedops/bedops")
|
|
(synopsis "Tools for high-performance genomic feature operations")
|
|
(description
|
|
"BEDOPS is a suite of tools to address common questions raised in genomic
|
|
studies---mostly with regard to overlap and proximity relationships between
|
|
data sets. It aims to be scalable and flexible, facilitating the efficient
|
|
and accurate analysis and management of large-scale genomic data.
|
|
|
|
BEDOPS provides tools that perform highly efficient and scalable Boolean and
|
|
other set operations, statistical calculations, archiving, conversion and
|
|
other management of genomic data of arbitrary scale. Tasks can be easily
|
|
split by chromosome for distributing whole-genome analyses across a
|
|
computational cluster.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public bedtools
|
|
(package
|
|
(name "bedtools")
|
|
(version "2.25.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1ywcy3yfwzhl905b51l0ffjia55h75vv3mw5xkvib04pp6pj548m"))))
|
|
(build-system gnu-build-system)
|
|
(native-inputs `(("python" ,python-2)))
|
|
(inputs `(("samtools" ,samtools)
|
|
("zlib" ,zlib)))
|
|
(arguments
|
|
'(#:test-target "test"
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(for-each (lambda (file)
|
|
(install-file file bin))
|
|
(find-files "bin" ".*")))
|
|
#t)))))
|
|
(home-page "https://github.com/arq5x/bedtools2")
|
|
(synopsis "Tools for genome analysis and arithmetic")
|
|
(description
|
|
"Collectively, the bedtools utilities are a swiss-army knife of tools for
|
|
a wide-range of genomics analysis tasks. The most widely-used tools enable
|
|
genome arithmetic: that is, set theory on the genome. For example, bedtools
|
|
allows one to intersect, merge, count, complement, and shuffle genomic
|
|
intervals from multiple files in widely-used genomic file formats such as BAM,
|
|
BED, GFF/GTF, VCF.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public bioawk
|
|
(package
|
|
(name "bioawk")
|
|
(version "1.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/lh3/bioawk/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("bison" ,bison)))
|
|
(arguments
|
|
`(#:tests? #f ; There are no tests to run.
|
|
;; Bison must generate files, before other targets can build.
|
|
#:parallel-build? #f
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure) ; There is no configure phase.
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(man (string-append out "/share/man/man1")))
|
|
(mkdir-p man)
|
|
(copy-file "awk.1" (string-append man "/bioawk.1"))
|
|
(install-file "bioawk" bin)))))))
|
|
(home-page "https://github.com/lh3/bioawk")
|
|
(synopsis "AWK with bioinformatics extensions")
|
|
(description "Bioawk is an extension to Brian Kernighan's awk, adding the
|
|
support of several common biological data formats, including optionally gzip'ed
|
|
BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
|
|
also adds a few built-in functions and a command line option to use TAB as the
|
|
input/output delimiter. When the new functionality is not used, bioawk is
|
|
intended to behave exactly the same as the original BWK awk.")
|
|
(license license:x11)))
|
|
|
|
(define-public python2-pybedtools
|
|
(package
|
|
(name "python2-pybedtools")
|
|
(version "0.6.9")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; no Python 3 support
|
|
(inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-matplotlib" ,python2-matplotlib)))
|
|
(propagated-inputs
|
|
`(("bedtools" ,bedtools)
|
|
("samtools" ,samtools)))
|
|
(native-inputs
|
|
`(("python-pyyaml" ,python2-pyyaml)
|
|
("python-nose" ,python2-nose)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "https://pythonhosted.org/pybedtools/")
|
|
(synopsis "Python wrapper for BEDtools programs")
|
|
(description
|
|
"pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
|
|
which are widely used for genomic interval manipulation or \"genome algebra\".
|
|
pybedtools extends BEDTools by offering feature-level manipulations from with
|
|
Python.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public bioperl-minimal
|
|
(let* ((inputs `(("perl-module-build" ,perl-module-build)
|
|
("perl-data-stag" ,perl-data-stag)
|
|
("perl-libwww" ,perl-libwww)
|
|
("perl-uri" ,perl-uri)))
|
|
(transitive-inputs
|
|
(map (compose package-name cadr)
|
|
(delete-duplicates
|
|
(concatenate
|
|
(map (compose package-transitive-target-inputs cadr) inputs))))))
|
|
(package
|
|
(name "bioperl-minimal")
|
|
(version "1.6.924")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://cpan/authors/id/C/CJ/CJFIELDS/BioPerl-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1l3npcvvvwjlhkna9dndpfv1hklhrgva013kw96m0n1wpd37ask1"))))
|
|
(build-system perl-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after
|
|
'install 'wrap-programs
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
;; Make sure all executables in "bin" find the required Perl
|
|
;; modules at runtime. As the PERL5LIB variable contains also
|
|
;; the paths of native inputs, we pick the transitive target
|
|
;; inputs from %build-inputs.
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin/"))
|
|
(path (string-join
|
|
(cons (string-append out "/lib/perl5/site_perl")
|
|
(map (lambda (name)
|
|
(assoc-ref %build-inputs name))
|
|
',transitive-inputs))
|
|
":")))
|
|
(for-each (lambda (file)
|
|
(wrap-program file
|
|
`("PERL5LIB" ":" prefix (,path))))
|
|
(find-files bin "\\.pl$"))
|
|
#t))))))
|
|
(inputs inputs)
|
|
(native-inputs
|
|
`(("perl-test-most" ,perl-test-most)))
|
|
(home-page "http://search.cpan.org/dist/BioPerl")
|
|
(synopsis "Bioinformatics toolkit")
|
|
(description
|
|
"BioPerl is the product of a community effort to produce Perl code which
|
|
is useful in biology. Examples include Sequence objects, Alignment objects
|
|
and database searching objects. These objects not only do what they are
|
|
advertised to do in the documentation, but they also interact - Alignment
|
|
objects are made from the Sequence objects, Sequence objects have access to
|
|
Annotation and SeqFeature objects and databases, Blast objects can be
|
|
converted to Alignment objects, and so on. This means that the objects
|
|
provide a coordinated and extensible framework to do computational biology.")
|
|
(license (package-license perl)))))
|
|
|
|
(define-public python-biopython
|
|
(package
|
|
(name "python-biopython")
|
|
(version "1.66")
|
|
(source (origin
|
|
(method url-fetch)
|
|
;; use PyPi rather than biopython.org to ease updating
|
|
(uri (pypi-uri "biopython" version))
|
|
(sha256
|
|
(base32
|
|
"1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))
|
|
(build-system python-build-system)
|
|
(inputs
|
|
`(("python-numpy" ,python-numpy)))
|
|
(native-inputs
|
|
`(("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://biopython.org/")
|
|
(synopsis "Tools for biological computation in Python")
|
|
(description
|
|
"Biopython is a set of tools for biological computation including parsers
|
|
for bioinformatics files into Python data structures; interfaces to common
|
|
bioinformatics programs; a standard sequence class and tools for performing
|
|
common operations on them; code to perform data classification; code for
|
|
dealing with alignments; code making it easy to split up parallelizable tasks
|
|
into separate processes; and more.")
|
|
(license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
|
|
|
|
(define-public python2-biopython
|
|
(package (inherit (package-with-python2 python-biopython))
|
|
(inputs
|
|
`(("python2-numpy" ,python2-numpy)))))
|
|
|
|
(define-public blast+
|
|
(package
|
|
(name "blast+")
|
|
(version "2.4.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
|
|
version "/ncbi-blast-" version "+-src.tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"14n9jik6vhiwjd3m7bach4xj1pzfn0szbsbyfxybd9l9cc43b6mb"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Remove bundled bzip2 and zlib
|
|
(delete-file-recursively "c++/src/util/compress/bzip2")
|
|
(delete-file-recursively "c++/src/util/compress/zlib")
|
|
(substitute* "c++/src/util/compress/Makefile.in"
|
|
(("bzip2 zlib api") "api"))
|
|
;; Remove useless msbuild directory
|
|
(delete-file-recursively
|
|
"c++/src/build-system/project_tree_builder/msbuild")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(;; There are three(!) tests for this massive library, and all fail with
|
|
;; "unparsable timing stats".
|
|
;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
|
|
;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
|
|
;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
|
|
#:tests? #f
|
|
#:out-of-source? #t
|
|
#:parallel-build? #f ; not supported
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before
|
|
'configure 'set-HOME
|
|
;; $HOME needs to be set at some point during the configure phase
|
|
(lambda _ (setenv "HOME" "/tmp") #t))
|
|
(add-after
|
|
'unpack 'enter-dir
|
|
(lambda _ (chdir "c++") #t))
|
|
(add-after
|
|
'enter-dir 'fix-build-system
|
|
(lambda _
|
|
(define (which* cmd)
|
|
(cond ((string=? cmd "date")
|
|
;; make call to "date" deterministic
|
|
"date -d @0")
|
|
((which cmd)
|
|
=> identity)
|
|
(else
|
|
(format (current-error-port)
|
|
"WARNING: Unable to find absolute path for ~s~%"
|
|
cmd)
|
|
#f)))
|
|
|
|
;; Rewrite hardcoded paths to various tools
|
|
(substitute* (append '("src/build-system/configure.ac"
|
|
"src/build-system/configure"
|
|
"scripts/common/impl/if_diff.sh"
|
|
"scripts/common/impl/run_with_lock.sh"
|
|
"src/build-system/Makefile.configurables.real"
|
|
"src/build-system/Makefile.in.top"
|
|
"src/build-system/Makefile.meta.gmake=no"
|
|
"src/build-system/Makefile.meta.in"
|
|
"src/build-system/Makefile.meta_l"
|
|
"src/build-system/Makefile.meta_p"
|
|
"src/build-system/Makefile.meta_r"
|
|
"src/build-system/Makefile.mk.in"
|
|
"src/build-system/Makefile.requirements"
|
|
"src/build-system/Makefile.rules_with_autodep.in")
|
|
(find-files "scripts/common/check" "\\.sh$"))
|
|
(("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
|
|
(or (which* cmd) all)))
|
|
|
|
(substitute* (find-files "src/build-system" "^config.*")
|
|
(("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
|
|
(("^PATH=.*") ""))
|
|
|
|
;; rewrite "/var/tmp" in check script
|
|
(substitute* "scripts/common/check/check_make_unix.sh"
|
|
(("/var/tmp") "/tmp"))
|
|
|
|
;; do not reset PATH
|
|
(substitute* (find-files "scripts/common/impl/" "\\.sh$")
|
|
(("^ *PATH=.*") "")
|
|
(("action=/bin/") "action=")
|
|
(("export PATH") ":"))
|
|
#t))
|
|
(replace
|
|
'configure
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out"))
|
|
(lib (string-append (assoc-ref outputs "lib") "/lib"))
|
|
(include (string-append (assoc-ref outputs "include")
|
|
"/include/ncbi-tools++")))
|
|
;; The 'configure' script doesn't recognize things like
|
|
;; '--enable-fast-install'.
|
|
(zero? (system* "./configure.orig"
|
|
(string-append "--with-build-root=" (getcwd) "/build")
|
|
(string-append "--prefix=" out)
|
|
(string-append "--libdir=" lib)
|
|
(string-append "--includedir=" include)
|
|
(string-append "--with-bz2="
|
|
(assoc-ref inputs "bzip2"))
|
|
(string-append "--with-z="
|
|
(assoc-ref inputs "zlib"))
|
|
;; Each library is built twice by default, once
|
|
;; with "-static" in its name, and again
|
|
;; without.
|
|
"--without-static"
|
|
"--with-dll"))))))))
|
|
(outputs '("out" ; 19 MB
|
|
"lib" ; 203 MB
|
|
"include")) ; 32 MB
|
|
(inputs
|
|
`(("bzip2" ,bzip2)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("cpio" ,cpio)))
|
|
(home-page "http://blast.ncbi.nlm.nih.gov")
|
|
(synopsis "Basic local alignment search tool")
|
|
(description
|
|
"BLAST is a popular method of performing a DNA or protein sequence
|
|
similarity search, using heuristics to produce results quickly. It also
|
|
calculates an “expect value” that estimates how many matches would have
|
|
occurred at a given score by chance, which can aid a user in judging how much
|
|
confidence to have in an alignment.")
|
|
;; Most of the sources are in the public domain, with the following
|
|
;; exceptions:
|
|
;; * Expat:
|
|
;; * ./c++/include/util/bitset/
|
|
;; * ./c++/src/html/ncbi_menu*.js
|
|
;; * Boost license:
|
|
;; * ./c++/include/util/impl/floating_point_comparison.hpp
|
|
;; * LGPL 2+:
|
|
;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
|
|
;; * ASL 2.0:
|
|
;; * ./c++/src/corelib/teamcity_*
|
|
(license (list license:public-domain
|
|
license:expat
|
|
license:boost1.0
|
|
license:lgpl2.0+
|
|
license:asl2.0))))
|
|
|
|
(define-public bless
|
|
(package
|
|
(name "bless")
|
|
(version "1p02")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/bless-ec/bless.v"
|
|
version ".tgz"))
|
|
(sha256
|
|
(base32
|
|
"0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
`(begin
|
|
;; Remove bundled boost, pigz, zlib, and .git directory
|
|
;; FIXME: also remove bundled sources for murmurhash3 and
|
|
;; kmc once packaged.
|
|
(delete-file-recursively "boost")
|
|
(delete-file-recursively "pigz")
|
|
(delete-file-recursively "google-sparsehash")
|
|
(delete-file-recursively "zlib")
|
|
(delete-file-recursively ".git")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ;no "check" target
|
|
#:make-flags
|
|
(list (string-append "ZLIB="
|
|
(assoc-ref %build-inputs "zlib")
|
|
"/lib/libz.a")
|
|
(string-append "LDFLAGS="
|
|
(string-join '("-lboost_filesystem"
|
|
"-lboost_system"
|
|
"-lboost_iostreams"
|
|
"-lz"
|
|
"-fopenmp"
|
|
"-std=c++11"))))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'do-not-build-bundled-pigz
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(substitute* "Makefile"
|
|
(("cd pigz/pigz-2.3.3; make") ""))
|
|
#t))
|
|
(add-after 'unpack 'patch-paths-to-executables
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(substitute* "parse_args.cpp"
|
|
(("kmc_binary = .*")
|
|
(string-append "kmc_binary = \""
|
|
(assoc-ref outputs "out")
|
|
"/bin/kmc\";"))
|
|
(("pigz_binary = .*")
|
|
(string-append "pigz_binary = \""
|
|
(assoc-ref inputs "pigz")
|
|
"/bin/pigz\";")))
|
|
#t))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(for-each (lambda (file)
|
|
(install-file file bin))
|
|
'("bless" "kmc/bin/kmc"))
|
|
#t)))
|
|
(delete 'configure))))
|
|
(native-inputs
|
|
`(("perl" ,perl)))
|
|
(inputs
|
|
`(("openmpi" ,openmpi)
|
|
("boost" ,boost)
|
|
("sparsehash" ,sparsehash)
|
|
("pigz" ,pigz)
|
|
("zlib" ,zlib)))
|
|
(supported-systems '("x86_64-linux"))
|
|
(home-page "http://sourceforge.net/p/bless-ec/wiki/Home/")
|
|
(synopsis "Bloom-filter-based error correction tool for NGS reads")
|
|
(description
|
|
"@dfn{Bloom-filter-based error correction solution for high-throughput
|
|
sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
|
|
correction tool for genomic reads produced by @dfn{Next-generation
|
|
sequencing} (NGS). BLESS produces accurate correction results with much less
|
|
memory compared with previous solutions and is also able to tolerate a higher
|
|
false-positive rate. BLESS can extend reads like DNA assemblers to correct
|
|
errors at the end of reads.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bowtie
|
|
(package
|
|
(name "bowtie")
|
|
(version "2.2.6")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1ssfvymxfrap6f9pf86s9bvsbqdgka4abr2r7j3mgr4w1l289m86"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "Makefile"
|
|
;; replace BUILD_HOST and BUILD_TIME for deterministic build
|
|
(("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
|
|
(("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
|
|
(build-system gnu-build-system)
|
|
(inputs `(("perl" ,perl)
|
|
("perl-clone" ,perl-clone)
|
|
("perl-test-deep" ,perl-test-deep)
|
|
("perl-test-simple" ,perl-test-simple)
|
|
("python" ,python-2)
|
|
("tbb" ,tbb)))
|
|
(arguments
|
|
'(#:make-flags
|
|
(list "allall"
|
|
"WITH_TBB=1"
|
|
(string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(alist-delete
|
|
'configure
|
|
(alist-replace
|
|
'check
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(system* "perl"
|
|
"scripts/test/simple_tests.pl"
|
|
"--bowtie2=./bowtie2"
|
|
"--bowtie2-build=./bowtie2-build"))
|
|
%standard-phases))))
|
|
(home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
|
|
(synopsis "Fast and sensitive nucleotide sequence read aligner")
|
|
(description
|
|
"Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
|
|
reads to long reference sequences. It is particularly good at aligning reads
|
|
of about 50 up to 100s or 1,000s of characters, and particularly good at
|
|
aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
|
|
genome with an FM Index to keep its memory footprint small: for the human
|
|
genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
|
|
gapped, local, and paired-end alignment modes.")
|
|
(supported-systems '("x86_64-linux"))
|
|
(license license:gpl3+)))
|
|
|
|
(define-public tophat
|
|
(package
|
|
(name "tophat")
|
|
(version "2.1.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://ccb.jhu.edu/software/tophat/downloads/tophat-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
|
|
(patches (search-patches "tophat-build-with-later-seqan.patch"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Remove bundled SeqAn and samtools
|
|
(delete-file-recursively "src/SeqAn-1.3")
|
|
(delete-file-recursively "src/samtools-0.1.18")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:parallel-build? #f ; not supported
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'use-system-samtools
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "src/Makefile.in"
|
|
(("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
|
|
(("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
|
|
(("SAMPROG = samtools_0\\.1\\.18") "")
|
|
(("\\$\\(samtools_0_1_18_SOURCES\\)") "")
|
|
(("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
|
|
(substitute* '("src/common.cpp"
|
|
"src/tophat.py")
|
|
(("samtools_0.1.18") (which "samtools")))
|
|
(substitute* '("src/common.h"
|
|
"src/bam2fastx.cpp")
|
|
(("#include \"bam.h\"") "#include <samtools/bam.h>")
|
|
(("#include \"sam.h\"") "#include <samtools/sam.h>"))
|
|
(substitute* '("src/bwt_map.h"
|
|
"src/map2gtf.h"
|
|
"src/align_status.h")
|
|
(("#include <bam.h>") "#include <samtools/bam.h>")
|
|
(("#include <sam.h>") "#include <samtools/sam.h>"))
|
|
#t)))))
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("bowtie" ,bowtie)
|
|
("samtools" ,samtools-0.1)
|
|
("ncurses" ,ncurses)
|
|
("python" ,python-2)
|
|
("perl" ,perl)
|
|
("zlib" ,zlib)
|
|
("seqan" ,seqan)))
|
|
(home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
|
|
(synopsis "Spliced read mapper for RNA-Seq data")
|
|
(description
|
|
"TopHat is a fast splice junction mapper for nucleotide sequence
|
|
reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
|
|
mammalian-sized genomes using the ultra high-throughput short read
|
|
aligner Bowtie, and then analyzes the mapping results to identify
|
|
splice junctions between exons.")
|
|
;; TopHat is released under the Boost Software License, Version 1.0
|
|
;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
|
|
(license license:boost1.0)))
|
|
|
|
(define-public bwa
|
|
(package
|
|
(name "bwa")
|
|
(version "0.7.12")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
|
|
version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(alist-replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append
|
|
(assoc-ref outputs "out") "/bin"))
|
|
(doc (string-append
|
|
(assoc-ref outputs "out") "/share/doc/bwa"))
|
|
(man (string-append
|
|
(assoc-ref outputs "out") "/share/man/man1")))
|
|
(mkdir-p bin)
|
|
(mkdir-p doc)
|
|
(mkdir-p man)
|
|
(install-file "bwa" bin)
|
|
(install-file "README.md" doc)
|
|
(install-file "bwa.1" man)))
|
|
;; no "configure" script
|
|
(alist-delete 'configure %standard-phases))))
|
|
(inputs `(("zlib" ,zlib)))
|
|
;; Non-portable SSE instructions are used so building fails on platforms
|
|
;; other than x86_64.
|
|
(supported-systems '("x86_64-linux"))
|
|
(home-page "http://bio-bwa.sourceforge.net/")
|
|
(synopsis "Burrows-Wheeler sequence aligner")
|
|
(description
|
|
"BWA is a software package for mapping low-divergent sequences against a
|
|
large reference genome, such as the human genome. It consists of three
|
|
algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
|
|
designed for Illumina sequence reads up to 100bp, while the rest two for
|
|
longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
|
|
features such as long-read support and split alignment, but BWA-MEM, which is
|
|
the latest, is generally recommended for high-quality queries as it is faster
|
|
and more accurate. BWA-MEM also has better performance than BWA-backtrack for
|
|
70-100bp Illumina reads.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bwa-pssm
|
|
(package (inherit bwa)
|
|
(name "bwa-pssm")
|
|
(version "0.5.11")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
|
|
"archive/" version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("gdsl" ,gdsl)
|
|
("zlib" ,zlib)
|
|
("perl" ,perl)))
|
|
(home-page "http://bwa-pssm.binf.ku.dk/")
|
|
(synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
|
|
(description
|
|
"BWA-PSSM is a probabilistic short genomic sequence read aligner based on
|
|
the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
|
|
existing aligners it is fast and sensitive. Unlike most other aligners,
|
|
however, it is also adaptible in the sense that one can direct the alignment
|
|
based on known biases within the data set. It is coded as a modification of
|
|
the original BWA alignment program and shares the genome index structure as
|
|
well as many of the command line options.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public python2-bx-python
|
|
(package
|
|
(name "python2-bx-python")
|
|
(version "0.7.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/b/bx-python/bx-python-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "setup.py"
|
|
;; remove dependency on outdated "distribute" module
|
|
(("^from distribute_setup import use_setuptools") "")
|
|
(("^use_setuptools\\(\\)") "")))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;tests fail because test data are not included
|
|
#:python ,python-2))
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-nose" ,python2-nose)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://bitbucket.org/james_taylor/bx-python/")
|
|
(synopsis "Tools for manipulating biological data")
|
|
(description
|
|
"bx-python provides tools for manipulating biological data, particularly
|
|
multiple sequence alignments.")
|
|
(license license:expat)))
|
|
|
|
(define-public python-pysam
|
|
(package
|
|
(name "python-pysam")
|
|
(version "0.8.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "pysam" version))
|
|
(sha256
|
|
(base32
|
|
"1slx5mb94mzm5qzk52q270sab0sar95j67w1g1k452nz3s9j7krh"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; tests are excluded in the manifest
|
|
#:phases
|
|
(alist-cons-before
|
|
'build 'set-flags
|
|
(lambda _
|
|
(setenv "LDFLAGS" "-lncurses")
|
|
(setenv "CFLAGS" "-D_CURSES_LIB=1"))
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("ncurses" ,ncurses)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-setuptools" ,python-setuptools)))
|
|
(home-page "https://github.com/pysam-developers/pysam")
|
|
(synopsis "Python bindings to the SAMtools C API")
|
|
(description
|
|
"Pysam is a Python module for reading and manipulating files in the
|
|
SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
|
|
also includes an interface for tabix.")
|
|
(license license:expat)))
|
|
|
|
(define-public python2-pysam
|
|
(package-with-python2 python-pysam))
|
|
|
|
(define-public python-twobitreader
|
|
(package
|
|
(name "python-twobitreader")
|
|
(version "3.1.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "twobitreader" version))
|
|
(sha256
|
|
(base32
|
|
"0y408fp6psqzwxpcpqn0wp7fr41dwz8d54wpj6j261fj5q8vs169"))))
|
|
(properties `((python2-variant . ,(delay python2-twobitreader))))
|
|
(build-system python-build-system)
|
|
(native-inputs
|
|
`(("python-sphinx" ,python-sphinx)))
|
|
(home-page "https://github.com/benjschiller/twobitreader")
|
|
(synopsis "Python library for reading .2bit files")
|
|
(description
|
|
"twobitreader is a Python library for reading .2bit files as used by the
|
|
UCSC genome browser.")
|
|
(license license:artistic2.0)))
|
|
|
|
(define-public python2-twobitreader
|
|
(let ((base (package-with-python2 (strip-python2-variant python-twobitreader))))
|
|
(package
|
|
(inherit base)
|
|
(native-inputs `(("python2-setuptools" ,python2-setuptools)
|
|
,@(package-native-inputs base))))))
|
|
|
|
(define-public python-plastid
|
|
(package
|
|
(name "python-plastid")
|
|
(version "0.4.5")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "plastid" version))
|
|
(sha256
|
|
(base32
|
|
"1nhxw8a5gn9as58i2ih52c5cjwj48ik418pzsjwph3s66mmy9yvq"))))
|
|
(properties `((python2-variant . ,(delay python2-plastid))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
;; Some test files are not included.
|
|
`(#:tests? #f))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)
|
|
("python-scipy" ,python-scipy)
|
|
("python-pandas" ,python-pandas)
|
|
("python-pysam" ,python-pysam)
|
|
("python-matplotlib" ,python-matplotlib)
|
|
("python-biopython" ,python-biopython)
|
|
("python-twobitreader" ,python-twobitreader)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-nose" ,python-nose)))
|
|
(home-page "https://github.com/joshuagryphon/plastid")
|
|
(synopsis "Python library for genomic analysis")
|
|
(description
|
|
"plastid is a Python library for genomic analysis – in particular,
|
|
high-throughput sequencing data – with an emphasis on simplicity.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public python2-plastid
|
|
(let ((base (package-with-python2 (strip-python2-variant python-plastid))))
|
|
(package
|
|
(inherit base)
|
|
;; setuptools is required at runtime
|
|
(propagated-inputs `(("python2-setuptools" ,python2-setuptools)
|
|
,@(package-propagated-inputs base))))))
|
|
|
|
(define-public cd-hit
|
|
(package
|
|
(name "cd-hit")
|
|
(version "4.6.5")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/weizhongli/cdhit"
|
|
"/releases/download/V" version
|
|
"/cd-hit-v" version "-2016-0304.tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"15db0hq38yyifwqx9b6l34z14jcq576dmjavhj8a426c18lvnhp3"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; there are no tests
|
|
#:make-flags
|
|
;; Executables are copied directly to the PREFIX.
|
|
(list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
;; No "configure" script
|
|
(delete 'configure)
|
|
;; Remove sources of non-determinism
|
|
(add-after 'unpack 'be-timeless
|
|
(lambda _
|
|
(substitute* "cdhit-utility.c++"
|
|
((" \\(built on \" __DATE__ \"\\)") ""))
|
|
(substitute* "cdhit-common.c++"
|
|
(("__DATE__") "\"0\"")
|
|
(("\", %s, \" __TIME__ \"\\\\n\", date") ""))
|
|
#t))
|
|
;; The "install" target does not create the target directory
|
|
(add-before 'install 'create-target-dir
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
|
|
#t)))))
|
|
(inputs
|
|
`(("perl" ,perl)))
|
|
(home-page "http://weizhongli-lab.org/cd-hit/")
|
|
(synopsis "Cluster and compare protein or nucleotide sequences")
|
|
(description
|
|
"CD-HIT is a program for clustering and comparing protein or nucleotide
|
|
sequences. CD-HIT is designed to be fast and handle extremely large
|
|
databases.")
|
|
;; The manual says: "It can be copied under the GNU General Public License
|
|
;; version 2 (GPLv2)."
|
|
(license license:gpl2)))
|
|
|
|
(define-public clipper
|
|
(package
|
|
(name "clipper")
|
|
(version "0.3.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/YeoLab/clipper/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1q7jpimsqln7ic44i8v2rx2haj5wvik8hc1s2syd31zcn0xk1iyq"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
;; remove unnecessary setup dependency
|
|
'(substitute* "setup.py"
|
|
(("setup_requires = .*") "")))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; only Python 2 is supported
|
|
(inputs
|
|
`(("htseq" ,htseq)
|
|
("python-pybedtools" ,python2-pybedtools)
|
|
("python-cython" ,python2-cython)
|
|
("python-scikit-learn" ,python2-scikit-learn)
|
|
("python-matplotlib" ,python2-matplotlib)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-scipy" ,python2-scipy)))
|
|
(native-inputs
|
|
`(("python-mock" ,python2-mock) ; for tests
|
|
("python-pytz" ,python2-pytz) ; for tests
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "https://github.com/YeoLab/clipper")
|
|
(synopsis "CLIP peak enrichment recognition")
|
|
(description
|
|
"CLIPper is a tool to define peaks in CLIP-seq datasets.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public codingquarry
|
|
(package
|
|
(name "codingquarry")
|
|
(version "2.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"mirror://sourceforge/codingquarry/CodingQuarry_v"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ; no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(doc (string-append out "/share/doc/codingquarry")))
|
|
(install-file "INSTRUCTIONS.pdf" doc)
|
|
(copy-recursively "QuarryFiles"
|
|
(string-append out "/QuarryFiles"))
|
|
(install-file "CodingQuarry" bin)
|
|
(install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
|
|
(inputs `(("openmpi" ,openmpi)))
|
|
(native-search-paths
|
|
(list (search-path-specification
|
|
(variable "QUARRY_PATH")
|
|
(files '("QuarryFiles")))))
|
|
(native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
|
|
(synopsis "Fungal gene predictor")
|
|
(description "CodingQuarry is a highly accurate, self-training GHMM fungal
|
|
gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
|
|
(home-page "https://sourceforge.net/projects/codingquarry/")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public couger
|
|
(package
|
|
(name "couger")
|
|
(version "1.8.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://couger.oit.duke.edu/static/assets/COUGER"
|
|
version ".zip"))
|
|
(sha256
|
|
(base32
|
|
"04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(delete 'build)
|
|
(replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out")))
|
|
(copy-recursively "src" (string-append out "/src"))
|
|
(mkdir (string-append out "/bin"))
|
|
;; Add "src" directory to module lookup path.
|
|
(substitute* "couger"
|
|
(("from argparse")
|
|
(string-append "import sys\nsys.path.append(\""
|
|
out "\")\nfrom argparse")))
|
|
(copy-file "couger" (string-append out "/bin/couger")))
|
|
#t))
|
|
(add-after
|
|
'install 'wrap-program
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; Make sure 'couger' runs with the correct PYTHONPATH.
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(path (getenv "PYTHONPATH")))
|
|
(wrap-program (string-append out "/bin/couger")
|
|
`("PYTHONPATH" ":" prefix (,path))))
|
|
#t)))))
|
|
(inputs
|
|
`(("python" ,python-2)
|
|
("python2-pillow" ,python2-pillow)
|
|
("python2-numpy" ,python2-numpy)
|
|
("python2-scipy" ,python2-scipy)
|
|
("python2-matplotlib" ,python2-matplotlib)))
|
|
(propagated-inputs
|
|
`(("r" ,r)
|
|
("libsvm" ,libsvm)
|
|
("randomjungle" ,randomjungle)))
|
|
(native-inputs
|
|
`(("unzip" ,unzip)))
|
|
(home-page "http://couger.oit.duke.edu")
|
|
(synopsis "Identify co-factors in sets of genomic regions")
|
|
(description
|
|
"COUGER can be applied to any two sets of genomic regions bound by
|
|
paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
|
|
putative co-factors that provide specificity to each TF. The framework
|
|
determines the genomic targets uniquely-bound by each TF, and identifies a
|
|
small set of co-factors that best explain the in vivo binding differences
|
|
between the two TFs.
|
|
|
|
COUGER uses classification algorithms (support vector machines and random
|
|
forests) with features that reflect the DNA binding specificities of putative
|
|
co-factors. The features are generated either from high-throughput TF-DNA
|
|
binding data (from protein binding microarray experiments), or from large
|
|
collections of DNA motifs.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public clustal-omega
|
|
(package
|
|
(name "clustal-omega")
|
|
(version "1.2.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://www.clustal.org/omega/clustal-omega-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("argtable" ,argtable)))
|
|
(home-page "http://www.clustal.org/omega/")
|
|
(synopsis "Multiple sequence aligner for protein and DNA/RNA")
|
|
(description
|
|
"Clustal-Omega is a general purpose multiple sequence alignment (MSA)
|
|
program for protein and DNA/RNA. It produces high quality MSAs and is capable
|
|
of handling data-sets of hundreds of thousands of sequences in reasonable
|
|
time.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public crossmap
|
|
(package
|
|
(name "crossmap")
|
|
(version "0.2.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
|
|
;; This patch has been sent upstream already and is available
|
|
;; for download from Sourceforge, but it has not been merged.
|
|
(patches (search-patches "crossmap-allow-system-pysam.patch"))
|
|
(modules '((guix build utils)))
|
|
;; remove bundled copy of pysam
|
|
(snippet
|
|
'(delete-file-recursively "lib/pysam"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'set-env
|
|
(lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)
|
|
("python-pysam" ,python2-pysam)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-nose" ,python2-nose)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://crossmap.sourceforge.net/")
|
|
(synopsis "Convert genome coordinates between assemblies")
|
|
(description
|
|
"CrossMap is a program for conversion of genome coordinates or annotation
|
|
files between different genome assemblies. It supports most commonly used
|
|
file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public cufflinks
|
|
(package
|
|
(name "cufflinks")
|
|
(version "2.2.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "http://cole-trapnell-lab.github.io/"
|
|
"cufflinks/assets/downloads/cufflinks-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:make-flags
|
|
(list
|
|
;; The includes for "eigen" are located in a subdirectory.
|
|
(string-append "EIGEN_CPPFLAGS="
|
|
"-I" (assoc-ref %build-inputs "eigen")
|
|
"/include/eigen3/")
|
|
;; Cufflinks must be linked with various boost libraries.
|
|
(string-append "LDFLAGS="
|
|
(string-join '("-lboost_system"
|
|
"-lboost_serialization"
|
|
"-lboost_thread"))))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'fix-search-for-bam
|
|
(lambda _
|
|
(substitute* '("ax_bam.m4"
|
|
"configure"
|
|
"src/hits.h")
|
|
(("<bam/sam\\.h>") "<samtools/sam.h>")
|
|
(("<bam/bam\\.h>") "<samtools/bam.h>")
|
|
(("<bam/version\\.hpp>") "<samtools/version.h>"))
|
|
#t)))
|
|
#:configure-flags
|
|
(list (string-append "--with-bam="
|
|
(assoc-ref %build-inputs "samtools")))))
|
|
(inputs
|
|
`(("eigen" ,eigen)
|
|
("samtools" ,samtools-0.1)
|
|
("htslib" ,htslib)
|
|
("boost" ,boost)
|
|
("python" ,python-2)
|
|
("zlib" ,zlib)))
|
|
(home-page "http://cole-trapnell-lab.github.io/cufflinks/")
|
|
(synopsis "Transcriptome assembly and RNA-Seq expression analysis")
|
|
(description
|
|
"Cufflinks assembles RNA transcripts, estimates their abundances,
|
|
and tests for differential expression and regulation in RNA-Seq
|
|
samples. It accepts aligned RNA-Seq reads and assembles the
|
|
alignments into a parsimonious set of transcripts. Cufflinks then
|
|
estimates the relative abundances of these transcripts based on how
|
|
many reads support each one, taking into account biases in library
|
|
preparation protocols.")
|
|
(license license:boost1.0)))
|
|
|
|
(define-public cutadapt
|
|
(package
|
|
(name "cutadapt")
|
|
(version "1.8")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/marcelm/cutadapt/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"161bp87y6gd6r5bmvjpn2b1k942i3fizfpa139f0jn6jv1wcp5h5"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
;; tests must be run after install
|
|
`(#:phases (alist-cons-after
|
|
'install 'check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(setenv "PYTHONPATH"
|
|
(string-append
|
|
(getenv "PYTHONPATH")
|
|
":" (assoc-ref outputs "out")
|
|
"/lib/python"
|
|
(string-take (string-take-right
|
|
(assoc-ref inputs "python") 5) 3)
|
|
"/site-packages"))
|
|
(zero? (system* "nosetests" "-P" "tests")))
|
|
(alist-delete 'check %standard-phases))))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-nose" ,python-nose)
|
|
("python-setuptools" ,python-setuptools)))
|
|
(home-page "https://code.google.com/p/cutadapt/")
|
|
(synopsis "Remove adapter sequences from nucleotide sequencing reads")
|
|
(description
|
|
"Cutadapt finds and removes adapter sequences, primers, poly-A tails and
|
|
other types of unwanted sequence from high-throughput sequencing reads.")
|
|
(license license:expat)))
|
|
|
|
(define-public libbigwig
|
|
(package
|
|
(name "libbigwig")
|
|
(version "0.1.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/dpryan79/libBigWig/"
|
|
"archive/" version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:test-target "test"
|
|
#:make-flags
|
|
(list "CC=gcc"
|
|
(string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(add-before 'check 'disable-curl-test
|
|
(lambda _
|
|
(substitute* "Makefile"
|
|
(("./test/testRemote.*") ""))
|
|
#t))
|
|
;; This has been fixed with the upstream commit 4ff6959cd8a0, but
|
|
;; there has not yet been a release containing this change.
|
|
(add-before 'install 'create-target-dirs
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out")))
|
|
(mkdir-p (string-append out "/lib"))
|
|
(mkdir-p (string-append out "/include"))
|
|
#t))))))
|
|
(inputs
|
|
`(("zlib" ,zlib)
|
|
("curl" ,curl)))
|
|
(native-inputs
|
|
`(("doxygen" ,doxygen)))
|
|
(home-page "https://github.com/dpryan79/libBigWig")
|
|
(synopsis "C library for handling bigWig files")
|
|
(description
|
|
"This package provides a C library for parsing local and remote BigWig
|
|
files.")
|
|
(license license:expat)))
|
|
|
|
(define-public python-pybigwig
|
|
(package
|
|
(name "python-pybigwig")
|
|
(version "0.2.5")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "pyBigWig" version))
|
|
(sha256
|
|
(base32
|
|
"0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Delete bundled libBigWig sources
|
|
(delete-file-recursively "libBigWig")))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'link-with-libBigWig
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "setup.py"
|
|
(("libs=\\[") "libs=[\"BigWig\", "))
|
|
#t)))))
|
|
(inputs
|
|
`(("libbigwig" ,libbigwig)
|
|
("zlib" ,zlib)
|
|
("curl" ,curl)))
|
|
(home-page "https://github.com/dpryan79/pyBigWig")
|
|
(synopsis "Access bigWig files in Python using libBigWig")
|
|
(description
|
|
"This package provides Python bindings to the libBigWig library for
|
|
accessing bigWig files.")
|
|
(license license:expat)))
|
|
|
|
(define-public python2-pybigwig
|
|
(let ((pybigwig (package-with-python2 python-pybigwig)))
|
|
(package (inherit pybigwig)
|
|
(native-inputs
|
|
`(("python-setuptools" ,python2-setuptools))))))
|
|
|
|
(define-public deeptools
|
|
(package
|
|
(name "deeptools")
|
|
(version "2.1.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/fidelram/deepTools/"
|
|
"archive/" version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1nmfin0zjdby3vay3r4flvz94dr6qjhj41ax4yz3vx13j6wz8izd"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2))
|
|
(inputs
|
|
`(("python-scipy" ,python2-scipy)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-numpydoc" ,python2-numpydoc)
|
|
("python-matplotlib" ,python2-matplotlib)
|
|
("python-bx-python" ,python2-bx-python)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-pybigwig" ,python2-pybigwig)))
|
|
(native-inputs
|
|
`(("python-mock" ,python2-mock) ;for tests
|
|
("python-pytz" ,python2-pytz) ;for tests
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "https://github.com/fidelram/deepTools")
|
|
(synopsis "Tools for normalizing and visualizing deep-sequencing data")
|
|
(description
|
|
"DeepTools addresses the challenge of handling the large amounts of data
|
|
that are now routinely generated from DNA sequencing centers. To do so,
|
|
deepTools contains useful modules to process the mapped reads data to create
|
|
coverage files in standard bedGraph and bigWig file formats. By doing so,
|
|
deepTools allows the creation of normalized coverage files or the comparison
|
|
between two files (for example, treatment and control). Finally, using such
|
|
normalized and standardized files, multiple visualizations can be created to
|
|
identify enrichments with functional annotations of the genome.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public diamond
|
|
(package
|
|
(name "diamond")
|
|
(version "0.8.9")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/bbuchfink/diamond/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1g0j24qcx48mp04hbk0hpbvh0gbw5wmifpliyaq95zp4qwwcs5x4"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
'(#:tests? #f ; no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'remove-native-compilation
|
|
(lambda _
|
|
(substitute* "CMakeLists.txt" (("-march=native") ""))
|
|
#t)))))
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(home-page "https://github.com/bbuchfink/diamond")
|
|
(synopsis "Accelerated BLAST compatible local sequence aligner")
|
|
(description
|
|
"DIAMOND is a BLAST-compatible local aligner for mapping protein and
|
|
translated DNA query sequences against a protein reference database (BLASTP
|
|
and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
|
|
reads at a typical sensitivity of 90-99% relative to BLAST depending on the
|
|
data and settings.")
|
|
;; diamond fails to build on other platforms
|
|
;; https://github.com/bbuchfink/diamond/issues/18
|
|
(supported-systems '("x86_64-linux"))
|
|
(license (license:non-copyleft "file://src/COPYING"
|
|
"See src/COPYING in the distribution."))))
|
|
|
|
(define-public edirect
|
|
(package
|
|
(name "edirect")
|
|
(version "4.10")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
|
|
"versions/2016-05-03/edirect.tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
|
|
(build-system perl-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(delete 'build)
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((target (string-append (assoc-ref outputs "out")
|
|
"/bin")))
|
|
(mkdir-p target)
|
|
(copy-file "edirect.pl"
|
|
(string-append target "/edirect.pl"))
|
|
#t)))
|
|
(add-after
|
|
'install 'wrap-program
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; Make sure 'edirect.pl' finds all perl inputs at runtime.
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(path (getenv "PERL5LIB")))
|
|
(wrap-program (string-append out "/bin/edirect.pl")
|
|
`("PERL5LIB" ":" prefix (,path)))))))))
|
|
(inputs
|
|
`(("perl-html-parser" ,perl-html-parser)
|
|
("perl-encode-locale" ,perl-encode-locale)
|
|
("perl-file-listing" ,perl-file-listing)
|
|
("perl-html-tagset" ,perl-html-tagset)
|
|
("perl-html-tree" ,perl-html-tree)
|
|
("perl-http-cookies" ,perl-http-cookies)
|
|
("perl-http-date" ,perl-http-date)
|
|
("perl-http-message" ,perl-http-message)
|
|
("perl-http-negotiate" ,perl-http-negotiate)
|
|
("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
|
|
("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
|
|
("perl-net-http" ,perl-net-http)
|
|
("perl-uri" ,perl-uri)
|
|
("perl-www-robotrules" ,perl-www-robotrules)
|
|
("perl" ,perl)))
|
|
(home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
|
|
(synopsis "Tools for accessing the NCBI's set of databases")
|
|
(description
|
|
"Entrez Direct (EDirect) is a method for accessing the National Center
|
|
for Biotechnology Information's (NCBI) set of interconnected
|
|
databases (publication, sequence, structure, gene, variation, expression,
|
|
etc.) from a terminal. Functions take search terms from command-line
|
|
arguments. Individual operations are combined to build multi-step queries.
|
|
Record retrieval and formatting normally complete the process.
|
|
|
|
EDirect also provides an argument-driven function that simplifies the
|
|
extraction of data from document summaries or other results that are returned
|
|
in structured XML format. This can eliminate the need for writing custom
|
|
software to answer ad hoc questions.")
|
|
(license license:public-domain)))
|
|
|
|
(define-public express
|
|
(package
|
|
(name "express")
|
|
(version "1.5.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append
|
|
"http://bio.math.berkeley.edu/eXpress/downloads/express-"
|
|
version "/express-" version "-src.tgz"))
|
|
(sha256
|
|
(base32
|
|
"03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'use-shared-boost-libs-and-set-bamtools-paths
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "CMakeLists.txt"
|
|
(("set\\(Boost_USE_STATIC_LIBS ON\\)")
|
|
"set(Boost_USE_STATIC_LIBS OFF)")
|
|
(("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
|
|
(string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
|
|
(substitute* "src/CMakeLists.txt"
|
|
(("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
|
|
(string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
|
|
#t)
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("bamtools" ,bamtools)
|
|
("protobuf" ,protobuf)
|
|
("zlib" ,zlib)))
|
|
(home-page "http://bio.math.berkeley.edu/eXpress")
|
|
(synopsis "Streaming quantification for high-throughput genomic sequencing")
|
|
(description
|
|
"eXpress is a streaming tool for quantifying the abundances of a set of
|
|
target sequences from sampled subsequences. Example applications include
|
|
transcript-level RNA-Seq quantification, allele-specific/haplotype expression
|
|
analysis (from RNA-Seq), transcription factor binding quantification in
|
|
ChIP-Seq, and analysis of metagenomic data.")
|
|
(license license:artistic2.0)))
|
|
|
|
(define-public express-beta-diversity
|
|
(package
|
|
(name "express-beta-diversity")
|
|
(version "1.0.7")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append
|
|
"https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(add-before 'build 'enter-source (lambda _ (chdir "source") #t))
|
|
(replace 'check
|
|
(lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
|
|
"-u"))))
|
|
(add-after 'check 'exit-source (lambda _ (chdir "..") #t))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out")
|
|
"/bin")))
|
|
(mkdir-p bin)
|
|
(copy-file "scripts/convertToEBD.py"
|
|
(string-append bin "/convertToEBD.py"))
|
|
(copy-file "bin/ExpressBetaDiversity"
|
|
(string-append bin "/ExpressBetaDiversity"))
|
|
#t))))))
|
|
(inputs
|
|
`(("python" ,python-2)))
|
|
(home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
|
|
(synopsis "Taxon- and phylogenetic-based beta diversity measures")
|
|
(description
|
|
"Express Beta Diversity (EBD) calculates ecological beta diversity
|
|
(dissimilarity) measures between biological communities. EBD implements a
|
|
variety of diversity measures including those that make use of phylogenetic
|
|
similarity of community members.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public fasttree
|
|
(package
|
|
(name "fasttree")
|
|
(version "2.1.8")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://www.microbesonline.org/fasttree/FastTree-"
|
|
version ".c"))
|
|
(sha256
|
|
(base32
|
|
"0dzqc9vr9iiiw21y159xfjl2z90vw0y7r4x6456pcaxiy5hd2wmi"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'unpack)
|
|
(delete 'configure)
|
|
(replace 'build
|
|
(lambda* (#:key source #:allow-other-keys)
|
|
(and (zero? (system* "gcc"
|
|
"-O3"
|
|
"-finline-functions"
|
|
"-funroll-loops"
|
|
"-Wall"
|
|
"-o"
|
|
"FastTree"
|
|
source
|
|
"-lm"))
|
|
(zero? (system* "gcc"
|
|
"-DOPENMP"
|
|
"-fopenmp"
|
|
"-O3"
|
|
"-finline-functions"
|
|
"-funroll-loops"
|
|
"-Wall"
|
|
"-o"
|
|
"FastTreeMP"
|
|
source
|
|
"-lm")))))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out")
|
|
"/bin")))
|
|
(mkdir-p bin)
|
|
(copy-file "FastTree"
|
|
(string-append bin "/FastTree"))
|
|
(copy-file "FastTreeMP"
|
|
(string-append bin "/FastTreeMP"))
|
|
#t))))))
|
|
(home-page "http://www.microbesonline.org/fasttree")
|
|
(synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
|
|
(description
|
|
"FastTree can handle alignments with up to a million of sequences in a
|
|
reasonable amount of time and memory. For large alignments, FastTree is
|
|
100-1,000 times faster than PhyML 3.0 or RAxML 7.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public fastx-toolkit
|
|
(package
|
|
(name "fastx-toolkit")
|
|
(version "0.0.14")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append
|
|
"https://github.com/agordon/fastx_toolkit/releases/download/"
|
|
version "/fastx_toolkit-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("libgtextutils" ,libgtextutils)))
|
|
(native-inputs
|
|
`(("pkg-config" ,pkg-config)))
|
|
(home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
|
|
(synopsis "Tools for FASTA/FASTQ file preprocessing")
|
|
(description
|
|
"The FASTX-Toolkit is a collection of command line tools for Short-Reads
|
|
FASTA/FASTQ files preprocessing.
|
|
|
|
Next-Generation sequencing machines usually produce FASTA or FASTQ files,
|
|
containing multiple short-reads sequences. The main processing of such
|
|
FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
|
|
is sometimes more productive to preprocess the files before mapping the
|
|
sequences to the genome---manipulating the sequences to produce better mapping
|
|
results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
|
|
(license license:agpl3+)))
|
|
|
|
(define-public flexbar
|
|
(package
|
|
(name "flexbar")
|
|
(version "2.5")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append "mirror://sourceforge/flexbar/"
|
|
version "/flexbar_v" version "_src.tgz"))
|
|
(sha256
|
|
(base32
|
|
"13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:configure-flags (list
|
|
(string-append "-DFLEXBAR_BINARY_DIR="
|
|
(assoc-ref %outputs "out")
|
|
"/bin/"))
|
|
#:phases
|
|
(alist-replace
|
|
'check
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(setenv "PATH" (string-append
|
|
(assoc-ref outputs "out") "/bin:"
|
|
(getenv "PATH")))
|
|
(chdir "../flexbar_v2.5_src/test")
|
|
(zero? (system* "bash" "flexbar_validate.sh")))
|
|
(alist-delete 'install %standard-phases))))
|
|
(inputs
|
|
`(("tbb" ,tbb)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("pkg-config" ,pkg-config)
|
|
("seqan" ,seqan)))
|
|
(home-page "http://flexbar.sourceforge.net")
|
|
(synopsis "Barcode and adapter removal tool for sequencing platforms")
|
|
(description
|
|
"Flexbar preprocesses high-throughput nucleotide sequencing data
|
|
efficiently. It demultiplexes barcoded runs and removes adapter sequences.
|
|
Moreover, trimming and filtering features are provided. Flexbar increases
|
|
read mapping rates and improves genome and transcriptome assemblies. It
|
|
supports next-generation sequencing data in fasta/q and csfasta/q format from
|
|
Illumina, Roche 454, and the SOLiD platform.")
|
|
(license license:gpl3)))
|
|
|
|
(define-public fraggenescan
|
|
(package
|
|
(name "fraggenescan")
|
|
(version "1.20")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append "mirror://sourceforge/fraggenescan/"
|
|
"FragGeneScan" version ".tar.gz"))
|
|
(sha256
|
|
(base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(add-before 'build 'patch-paths
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (string-append (assoc-ref outputs "out")))
|
|
(share (string-append out "/share/fraggenescan/")))
|
|
(substitute* "run_FragGeneScan.pl"
|
|
(("system\\(\"rm")
|
|
(string-append "system(\"" (which "rm")))
|
|
(("system\\(\"mv")
|
|
(string-append "system(\"" (which "mv")))
|
|
;; This script and other programs expect the training files
|
|
;; to be in the non-standard location bin/train/XXX. Change
|
|
;; this to be share/fraggenescan/train/XXX instead.
|
|
(("^\\$train.file = \\$dir.*")
|
|
(string-append "$train_file = \""
|
|
share
|
|
"train/\".$FGS_train_file;")))
|
|
(substitute* "run_hmm.c"
|
|
(("^ strcat\\(train_dir, \\\"train/\\\"\\);")
|
|
(string-append " strcpy(train_dir, \"" share "/train/\");")))
|
|
(substitute* "post_process.pl"
|
|
(("^my \\$dir = substr.*")
|
|
(string-append "my $dir = \"" share "\";"))))
|
|
#t))
|
|
(replace 'build
|
|
(lambda _ (and (zero? (system* "make" "clean"))
|
|
(zero? (system* "make" "fgs")))))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (string-append (assoc-ref outputs "out")))
|
|
(bin (string-append out "/bin/"))
|
|
(share (string-append out "/share/fraggenescan/train")))
|
|
(install-file "run_FragGeneScan.pl" bin)
|
|
(install-file "FragGeneScan" bin)
|
|
(install-file "FGS_gff.py" bin)
|
|
(install-file "post_process.pl" bin)
|
|
(copy-recursively "train" share))))
|
|
(delete 'check)
|
|
(add-after 'install 'post-install-check
|
|
;; In lieu of 'make check', run one of the examples and check the
|
|
;; output files gets created.
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (string-append (assoc-ref outputs "out")))
|
|
(bin (string-append out "/bin/")))
|
|
(and (zero? (system* (string-append bin "run_FragGeneScan.pl")
|
|
"-genome=./example/NC_000913.fna"
|
|
"-out=./test2"
|
|
"-complete=1"
|
|
"-train=complete"))
|
|
(file-exists? "test2.faa")
|
|
(file-exists? "test2.ffn")
|
|
(file-exists? "test2.gff")
|
|
(file-exists? "test2.out"))))))))
|
|
(inputs
|
|
`(("perl" ,perl)
|
|
("python" ,python-2))) ;not compatible with python 3.
|
|
(home-page "https://sourceforge.net/projects/fraggenescan/")
|
|
(synopsis "Finds potentially fragmented genes in short reads")
|
|
(description
|
|
"FragGeneScan is a program for predicting bacterial and archaeal genes in
|
|
short and error-prone DNA sequencing reads. It can also be applied to predict
|
|
genes in incomplete assemblies or complete genomes.")
|
|
;; GPL3+ according to private correspondense with the authors.
|
|
(license license:gpl3+)))
|
|
|
|
(define-public fxtract
|
|
(let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
|
|
(package
|
|
(name "fxtract")
|
|
(version "2.3")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/ctSkennerton/fxtract/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append "ctstennerton-util-"
|
|
(string-take util-commit 7)
|
|
"-checkout"))
|
|
(sha256
|
|
(base32
|
|
"0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:make-flags (list
|
|
(string-append "PREFIX=" (assoc-ref %outputs "out"))
|
|
"CC=gcc")
|
|
#:test-target "fxtract_test"
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(add-before 'build 'copy-util
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(rmdir "util")
|
|
(copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
|
|
#t))
|
|
;; Do not use make install as this requires additional dependencies.
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out"/bin")))
|
|
(install-file "fxtract" bin)
|
|
#t))))))
|
|
(inputs
|
|
`(("pcre" ,pcre)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
;; ctskennerton-util is licensed under GPL2.
|
|
`(("ctskennerton-util"
|
|
,(origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/ctSkennerton/util.git")
|
|
(commit util-commit)))
|
|
(file-name (string-append
|
|
"ctstennerton-util-" util-commit "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
|
|
(home-page "https://github.com/ctSkennerton/fxtract")
|
|
(synopsis "Extract sequences from FASTA and FASTQ files")
|
|
(description
|
|
"Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
|
|
or FASTQ) file given a subsequence. It uses a simple substring search for
|
|
basic tasks but can change to using POSIX regular expressions, PCRE, hash
|
|
lookups or multi-pattern searching as required. By default fxtract looks in
|
|
the sequence of each record but can also be told to look in the header,
|
|
comment or quality sections.")
|
|
(license license:expat))))
|
|
|
|
(define-public grit
|
|
(package
|
|
(name "grit")
|
|
(version "2.0.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/nboley/grit/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'generate-from-cython-sources
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; Delete these C files to force fresh generation from pyx sources.
|
|
(delete-file "grit/sparsify_support_fns.c")
|
|
(delete-file "grit/call_peaks_support_fns.c")
|
|
(substitute* "setup.py"
|
|
(("Cython.Setup") "Cython.Build")
|
|
;; Add numpy include path to fix compilation
|
|
(("pyx\", \\]")
|
|
(string-append "pyx\", ], include_dirs = ['"
|
|
(assoc-ref inputs "python-numpy")
|
|
"/lib/python2.7/site-packages/numpy/core/include/"
|
|
"']"))) #t)
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("python-scipy" ,python2-scipy)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-networkx" ,python2-networkx)))
|
|
(native-inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://grit-bio.org")
|
|
(synopsis "Tool for integrative analysis of RNA-seq type assays")
|
|
(description
|
|
"GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
|
|
full length transcript models. When none of these data sources are available,
|
|
GRIT can be run by providing a candidate set of TES or TSS sites. In
|
|
addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
|
|
also be run in quantification mode, where it uses a provided GTF file and just
|
|
estimates transcript expression.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public hisat
|
|
(package
|
|
(name "hisat")
|
|
(version "0.1.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://ccb.jhu.edu/software/hisat/downloads/hisat-"
|
|
version "-beta-source.zip"))
|
|
(sha256
|
|
(base32
|
|
"1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no check target
|
|
#:make-flags '("allall"
|
|
;; Disable unsupported `popcnt' instructions on
|
|
;; architectures other than x86_64
|
|
,@(if (string-prefix? "x86_64"
|
|
(or (%current-target-system)
|
|
(%current-system)))
|
|
'()
|
|
'("POPCNT_CAPABILITY=0")))
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'patch-sources
|
|
(lambda _
|
|
;; XXX Cannot use snippet because zip files are not supported
|
|
(substitute* "Makefile"
|
|
(("^CC = .*$") "CC = gcc")
|
|
(("^CPP = .*$") "CPP = g++")
|
|
;; replace BUILD_HOST and BUILD_TIME for deterministic build
|
|
(("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
|
|
(("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
|
|
(substitute* '("hisat-build" "hisat-inspect")
|
|
(("/usr/bin/env") (which "env"))))
|
|
(alist-replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bi/")))
|
|
(for-each (lambda (file)
|
|
(install-file file bin))
|
|
(find-files
|
|
"."
|
|
"hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))))
|
|
(alist-delete 'configure %standard-phases)))))
|
|
(native-inputs
|
|
`(("unzip" ,unzip)))
|
|
(inputs
|
|
`(("perl" ,perl)
|
|
("python" ,python)
|
|
("zlib" ,zlib)))
|
|
;; Non-portable SSE instructions are used so building fails on platforms
|
|
;; other than x86_64.
|
|
(supported-systems '("x86_64-linux"))
|
|
(home-page "http://ccb.jhu.edu/software/hisat/index.shtml")
|
|
(synopsis "Hierarchical indexing for spliced alignment of transcripts")
|
|
(description
|
|
"HISAT is a fast and sensitive spliced alignment program for mapping
|
|
RNA-seq reads. In addition to one global FM index that represents a whole
|
|
genome, HISAT uses a large set of small FM indexes that collectively cover the
|
|
whole genome. These small indexes (called local indexes) combined with
|
|
several alignment strategies enable effective alignment of RNA-seq reads, in
|
|
particular, reads spanning multiple exons.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public hmmer
|
|
(package
|
|
(name "hmmer")
|
|
(version "3.1b2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://eddylab.org/software/hmmer"
|
|
(version-prefix version 1) "/"
|
|
version "/hmmer-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0djmgc0pfli0jilfx8hql1axhwhqxqb8rxg2r5rg07aw73sfs5nx"))))
|
|
(build-system gnu-build-system)
|
|
(native-inputs `(("perl" ,perl)))
|
|
(home-page "http://hmmer.org/")
|
|
(synopsis "Biosequence analysis using profile hidden Markov models")
|
|
(description
|
|
"HMMER is used for searching sequence databases for homologs of protein
|
|
sequences, and for making protein sequence alignments. It implements methods
|
|
using probabilistic models called profile hidden Markov models (profile
|
|
HMMs).")
|
|
(license (list license:gpl3+
|
|
;; The bundled library 'easel' is distributed
|
|
;; under The Janelia Farm Software License.
|
|
(license:non-copyleft
|
|
"file://easel/LICENSE"
|
|
"See easel/LICENSE in the distribution.")))))
|
|
|
|
(define-public htseq
|
|
(package
|
|
(name "htseq")
|
|
(version "0.6.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/H/HTSeq/HTSeq-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1i85ppf2j2lj12m0x690qq5nn17xxk23pbbx2c83r8ayb5wngzwv"))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; only Python 2 is supported
|
|
;; Numpy needs to be propagated when htseq is used as a Python library.
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python2-numpy)))
|
|
(inputs
|
|
`(("python-pysam" ,python2-pysam)))
|
|
(native-inputs
|
|
`(("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://www-huber.embl.de/users/anders/HTSeq/")
|
|
(synopsis "Analysing high-throughput sequencing data with Python")
|
|
(description
|
|
"HTSeq is a Python package that provides infrastructure to process data
|
|
from high-throughput sequencing assays.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public java-htsjdk
|
|
(package
|
|
(name "java-htsjdk")
|
|
(version "1.129")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/samtools/htsjdk/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0asdk9b8jx2ij7yd6apg9qx03li8q7z3ml0qy2r2qczkra79y6fw"))
|
|
(modules '((guix build utils)))
|
|
;; remove build dependency on git
|
|
(snippet '(substitute* "build.xml"
|
|
(("failifexecutionfails=\"true\"")
|
|
"failifexecutionfails=\"false\"")))))
|
|
(build-system ant-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; test require Internet access
|
|
#:make-flags
|
|
(list (string-append "-Ddist=" (assoc-ref %outputs "out")
|
|
"/share/java/htsjdk/"))
|
|
#:build-target "all"
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
;; The build phase also installs the jars
|
|
(delete 'install))))
|
|
(home-page "http://samtools.github.io/htsjdk/")
|
|
(synopsis "Java API for high-throughput sequencing data (HTS) formats")
|
|
(description
|
|
"HTSJDK is an implementation of a unified Java library for accessing
|
|
common file formats, such as SAM and VCF, used for high-throughput
|
|
sequencing (HTS) data. There are also an number of useful utilities for
|
|
manipulating HTS data.")
|
|
(license license:expat)))
|
|
|
|
(define-public htslib
|
|
(package
|
|
(name "htslib")
|
|
(version "1.3.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/samtools/htslib/releases/download/"
|
|
version "/htslib-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after
|
|
'unpack 'patch-tests
|
|
(lambda _
|
|
(substitute* "test/test.pl"
|
|
(("/bin/bash") (which "bash")))
|
|
#t)))))
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("perl" ,perl)))
|
|
(home-page "http://www.htslib.org")
|
|
(synopsis "C library for reading/writing high-throughput sequencing data")
|
|
(description
|
|
"HTSlib is a C library for reading/writing high-throughput sequencing
|
|
data. It also provides the bgzip, htsfile, and tabix utilities.")
|
|
;; Files under cram/ are released under the modified BSD license;
|
|
;; the rest is released under the Expat license
|
|
(license (list license:expat license:bsd-3))))
|
|
|
|
(define-public idr
|
|
(package
|
|
(name "idr")
|
|
(version "2.0.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/nboley/idr/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1k3x44biak00aiv3hpm1yd6nn4hhp7n0qnbs3zh2q9sw7qr1qj5r"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after
|
|
'install 'wrap-program
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(python-version (string-take (string-take-right
|
|
(assoc-ref inputs "python") 5) 3))
|
|
(path (string-join
|
|
(map (lambda (name)
|
|
(string-append (assoc-ref inputs name)
|
|
"/lib/python" python-version
|
|
"/site-packages"))
|
|
'("python-scipy"
|
|
"python-numpy"
|
|
"python-matplotlib"))
|
|
":")))
|
|
(wrap-program (string-append out "/bin/idr")
|
|
`("PYTHONPATH" ":" prefix (,path))))
|
|
#t)))))
|
|
(inputs
|
|
`(("python-scipy" ,python-scipy)
|
|
("python-numpy" ,python-numpy)
|
|
("python-matplotlib" ,python-matplotlib)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-setuptools" ,python-setuptools)))
|
|
(home-page "https://github.com/nboley/idr")
|
|
(synopsis "Tool to measure the irreproducible discovery rate (IDR)")
|
|
(description
|
|
"The IDR (Irreproducible Discovery Rate) framework is a unified approach
|
|
to measure the reproducibility of findings identified from replicate
|
|
experiments and provide highly stable thresholds based on reproducibility.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public jellyfish
|
|
(package
|
|
(name "jellyfish")
|
|
(version "2.2.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/gmarcais/Jellyfish/"
|
|
"releases/download/v" version
|
|
"/jellyfish-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0a6xnynqy2ibfbfz86b9g2m2dgm7f1469pmymkpam333gi3p26nk"))))
|
|
(build-system gnu-build-system)
|
|
(outputs '("out" ;for library
|
|
"ruby" ;for Ruby bindings
|
|
"python")) ;for Python bindings
|
|
(arguments
|
|
`(#:configure-flags
|
|
(list (string-append "--enable-ruby-binding="
|
|
(assoc-ref %outputs "ruby"))
|
|
(string-append "--enable-python-binding="
|
|
(assoc-ref %outputs "python")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before 'check 'set-SHELL-variable
|
|
(lambda _
|
|
;; generator_manager.hpp either uses /bin/sh or $SHELL
|
|
;; to run tests.
|
|
(setenv "SHELL" (which "bash"))
|
|
#t)))))
|
|
(native-inputs
|
|
`(("bc" ,bc)
|
|
("time" ,time)
|
|
("ruby" ,ruby)
|
|
("python" ,python-2)))
|
|
(synopsis "Tool for fast counting of k-mers in DNA")
|
|
(description
|
|
"Jellyfish is a tool for fast, memory-efficient counting of k-mers in
|
|
DNA. A k-mer is a substring of length k, and counting the occurrences of all
|
|
such substrings is a central step in many analyses of DNA sequence. Jellyfish
|
|
is a command-line program that reads FASTA and multi-FASTA files containing
|
|
DNA sequences. It outputs its k-mer counts in a binary format, which can be
|
|
translated into a human-readable text format using the @code{jellyfish dump}
|
|
command, or queried for specific k-mers with @code{jellyfish query}.")
|
|
(home-page "http://www.genome.umd.edu/jellyfish.html")
|
|
;; The combined work is published under the GPLv3 or later. Individual
|
|
;; files such as lib/jsoncpp.cpp are released under the Expat license.
|
|
(license (list license:gpl3+ license:expat))))
|
|
|
|
(define-public khmer
|
|
(package
|
|
(name "khmer")
|
|
(version "2.0")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "khmer" version))
|
|
(sha256
|
|
(base32
|
|
"0wb05shqh77v00256qlm68vbbx3kl76fyzihszbz5nhanl4ni33a"))
|
|
(patches (search-patches "khmer-use-libraries.patch"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'set-paths
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; Delete bundled libraries.
|
|
(delete-file-recursively "third-party/zlib")
|
|
(delete-file-recursively "third-party/bzip2")
|
|
;; Replace bundled seqan.
|
|
(let* ((seqan-all "third-party/seqan")
|
|
(seqan-include (string-append
|
|
seqan-all "/core/include")))
|
|
(delete-file-recursively seqan-all)
|
|
(copy-recursively (string-append (assoc-ref inputs "seqan")
|
|
"/include/seqan")
|
|
(string-append seqan-include "/seqan")))
|
|
;; We do not replace the bundled MurmurHash as the canonical
|
|
;; repository for this code 'SMHasher' is unsuitable for
|
|
;; providing a library. See
|
|
;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
|
|
#t))
|
|
(add-after 'unpack 'set-cc
|
|
(lambda _
|
|
(setenv "CC" "gcc")
|
|
#t))
|
|
;; It is simpler to test after installation.
|
|
(delete 'check)
|
|
(add-after 'install 'post-install-check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out")))
|
|
(setenv "PATH"
|
|
(string-append
|
|
(getenv "PATH")
|
|
":"
|
|
(assoc-ref outputs "out")
|
|
"/bin"))
|
|
(setenv "PYTHONPATH"
|
|
(string-append
|
|
(getenv "PYTHONPATH")
|
|
":"
|
|
out
|
|
"/lib/python"
|
|
(string-take (string-take-right
|
|
(assoc-ref inputs "python") 5) 3)
|
|
"/site-packages"))
|
|
(with-directory-excursion "build"
|
|
(zero? (system* "nosetests" "khmer" "--attr"
|
|
"!known_failing")))))))))
|
|
(native-inputs
|
|
`(("seqan" ,seqan)
|
|
("python-nose" ,python-nose)))
|
|
(inputs
|
|
`(("zlib" ,zlib)
|
|
("bzip2" ,bzip2)
|
|
("python-screed" ,python-screed)
|
|
("python-bz2file" ,python-bz2file)))
|
|
(home-page "https://khmer.readthedocs.org/")
|
|
(synopsis "K-mer counting, filtering and graph traversal library")
|
|
(description "The khmer software is a set of command-line tools for
|
|
working with DNA shotgun sequencing data from genomes, transcriptomes,
|
|
metagenomes and single cells. Khmer can make de novo assemblies faster, and
|
|
sometimes better. Khmer can also identify and fix problems with shotgun
|
|
data.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public macs
|
|
(package
|
|
(name "macs")
|
|
(version "2.1.0.20151222")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "MACS2" version))
|
|
(sha256
|
|
(base32
|
|
"1r2hcz6irhcq7lwbafjks98jbn34hv05avgbdjnp6w6mlfjkf8x5"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2 ; only compatible with Python 2.7
|
|
#:tests? #f)) ; no test target
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)))
|
|
(native-inputs
|
|
`(("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://github.com/taoliu/MACS/")
|
|
(synopsis "Model based analysis for ChIP-Seq data")
|
|
(description
|
|
"MACS is an implementation of a ChIP-Seq analysis algorithm for
|
|
identifying transcript factor binding sites named Model-based Analysis of
|
|
ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
|
|
the significance of enriched ChIP regions and it improves the spatial
|
|
resolution of binding sites through combining the information of both
|
|
sequencing tag position and orientation.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public mafft
|
|
(package
|
|
(name "mafft")
|
|
(version "7.299")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://mafft.cbrc.jp/alignment/software/mafft-" version
|
|
"-without-extensions-src.tgz"))
|
|
(file-name (string-append name "-" version ".tgz"))
|
|
(sha256
|
|
(base32
|
|
"1pwwdy5a17ggx8h9v9y712ilswj27dc3d23r65l56jgjz67y5zc0"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; no automated tests, though there are tests in the read me
|
|
#:make-flags (let ((out (assoc-ref %outputs "out")))
|
|
(list (string-append "PREFIX=" out)
|
|
(string-append "BINDIR="
|
|
(string-append out "/bin"))))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'enter-dir
|
|
(lambda _ (chdir "core") #t))
|
|
(add-after 'enter-dir 'patch-makefile
|
|
(lambda _
|
|
;; on advice from the MAFFT authors, there is no need to
|
|
;; distribute mafft-profile, mafft-distance, or
|
|
;; mafft-homologs.rb as they are too "specialised".
|
|
(substitute* "Makefile"
|
|
;; remove mafft-homologs.rb from SCRIPTS
|
|
(("^SCRIPTS = mafft mafft-homologs.rb")
|
|
"SCRIPTS = mafft")
|
|
;; remove mafft-homologs from MANPAGES
|
|
(("^MANPAGES = mafft.1 mafft-homologs.1")
|
|
"MANPAGES = mafft.1")
|
|
;; remove mafft-distance from PROGS
|
|
(("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
|
|
"PROGS = dvtditr dndfast7 dndblast sextet5")
|
|
;; remove mafft-profile from PROGS
|
|
(("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
|
|
"splittbfast disttbfast tbfast f2cl mccaskillwrap")
|
|
(("^rm -f mafft-profile mafft-profile.exe") "#")
|
|
(("^rm -f mafft-distance mafft-distance.exe") ")#")
|
|
;; do not install MAN pages in libexec folder
|
|
(("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
|
|
\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
|
|
#t))
|
|
(add-after 'enter-dir 'patch-paths
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* '("pairash.c"
|
|
"mafft.tmpl")
|
|
(("perl") (which "perl"))
|
|
(("([\"`| ])awk" _ prefix)
|
|
(string-append prefix (which "awk")))
|
|
(("grep") (which "grep")))
|
|
#t))
|
|
(delete 'configure))))
|
|
(inputs
|
|
`(("perl" ,perl)
|
|
("gawk" ,gawk)
|
|
("grep" ,grep)))
|
|
(propagated-inputs
|
|
`(("coreutils" ,coreutils)))
|
|
(home-page "http://mafft.cbrc.jp/alignment/software/")
|
|
(synopsis "Multiple sequence alignment program")
|
|
(description
|
|
"MAFFT offers a range of multiple alignment methods for nucleotide and
|
|
protein sequences. For instance, it offers L-INS-i (accurate; for alignment
|
|
of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
|
|
sequences).")
|
|
(license (license:non-copyleft
|
|
"http://mafft.cbrc.jp/alignment/software/license.txt"
|
|
"BSD-3 with different formatting"))))
|
|
|
|
(define-public metabat
|
|
(package
|
|
(name "metabat")
|
|
(version "0.26.3")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://bitbucket.org/berkeleylab/metabat/get/"
|
|
version ".tar.bz2"))
|
|
(file-name (string-append name "-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"1vpfvgsn8wdsv1g7z73zxcncskx7dy7bw5msg1hhibk25ay11pyg"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'fix-includes
|
|
(lambda _
|
|
(substitute* "src/BamUtils.h"
|
|
(("^#include \"bam/bam\\.h\"")
|
|
"#include \"samtools/bam.h\"")
|
|
(("^#include \"bam/sam\\.h\"")
|
|
"#include \"samtools/sam.h\""))
|
|
(substitute* "src/KseqReader.h"
|
|
(("^#include \"bam/kseq\\.h\"")
|
|
"#include \"htslib/kseq.h\""))
|
|
#t))
|
|
(add-after 'unpack 'fix-scons
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "SConstruct"
|
|
(("^htslib_dir = 'samtools'")
|
|
(string-append "hitslib_dir = '"
|
|
(assoc-ref inputs "htslib")
|
|
"'"))
|
|
(("^samtools_dir = 'samtools'")
|
|
(string-append "samtools_dir = '"
|
|
(assoc-ref inputs "htslib")
|
|
"'"))
|
|
(("^findStaticOrShared\\('bam', hts_lib")
|
|
(string-append "findStaticOrShared('bam', '"
|
|
(assoc-ref inputs "samtools")
|
|
"/lib'"))
|
|
;; Do not distribute README.
|
|
(("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
|
|
#t))
|
|
(delete 'configure)
|
|
(replace 'build
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(mkdir (assoc-ref outputs "out"))
|
|
(zero? (system* "scons"
|
|
(string-append
|
|
"PREFIX="
|
|
(assoc-ref outputs "out"))
|
|
(string-append
|
|
"BOOST_ROOT="
|
|
(assoc-ref inputs "boost"))
|
|
"install"))))
|
|
;; Check and install are carried out during build phase.
|
|
(delete 'check)
|
|
(delete 'install))))
|
|
(inputs
|
|
`(("zlib" ,zlib)
|
|
("perl" ,perl)
|
|
("samtools" ,samtools)
|
|
("htslib" ,htslib)
|
|
("boost" ,boost)))
|
|
(native-inputs
|
|
`(("scons" ,scons)))
|
|
(home-page "https://bitbucket.org/berkeleylab/metabat")
|
|
(synopsis
|
|
"Reconstruction of single genomes from complex microbial communities")
|
|
(description
|
|
"Grouping large genomic fragments assembled from shotgun metagenomic
|
|
sequences to deconvolute complex microbial communities, or metagenome binning,
|
|
enables the study of individual organisms and their interactions. MetaBAT is
|
|
an automated metagenome binning software, which integrates empirical
|
|
probabilistic distances of genome abundance and tetranucleotide frequency.")
|
|
(license (license:non-copyleft "file://license.txt"
|
|
"See license.txt in the distribution."))))
|
|
|
|
(define-public miso
|
|
(package
|
|
(name "miso")
|
|
(version "0.5.3")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/m/misopy/misopy-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0x446867az8ir0z8c1vjqffkp0ma37wm4sylixnkhgawllzx8v5w"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "setup.py"
|
|
;; Use setuptools, or else the executables are not
|
|
;; installed.
|
|
(("distutils.core") "setuptools")
|
|
;; use "gcc" instead of "cc" for compilation
|
|
(("^defines")
|
|
"cc.set_executables(
|
|
compiler='gcc',
|
|
compiler_so='gcc',
|
|
linker_exe='gcc',
|
|
linker_so='gcc -shared'); defines")))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2 ; only Python 2 is supported
|
|
#:tests? #f)) ; no "test" target
|
|
(inputs
|
|
`(("samtools" ,samtools)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-scipy" ,python2-scipy)
|
|
("python-matplotlib" ,python2-matplotlib)))
|
|
(native-inputs
|
|
`(("python-mock" ,python2-mock) ;for tests
|
|
("python-pytz" ,python2-pytz) ;for tests
|
|
("python-setuptools" ,python2-setuptools)))
|
|
(home-page "http://genes.mit.edu/burgelab/miso/index.html")
|
|
(synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
|
|
(description
|
|
"MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
|
|
the expression level of alternatively spliced genes from RNA-Seq data, and
|
|
identifies differentially regulated isoforms or exons across samples. By
|
|
modeling the generative process by which reads are produced from isoforms in
|
|
RNA-Seq, the MISO model uses Bayesian inference to compute the probability
|
|
that a read originated from a particular isoform.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public muscle
|
|
(package
|
|
(name "muscle")
|
|
(version "3.8.1551")
|
|
(source (origin
|
|
(method url-fetch/tarbomb)
|
|
(file-name (string-append name "-" version))
|
|
(uri (string-append
|
|
"http://www.drive5.com/muscle/muscle_src_"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:make-flags (list "LDLIBS = -lm")
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'check
|
|
;; There are no tests, so just test if it runs.
|
|
(lambda _ (zero? (system* "./muscle" "-version"))))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin")))
|
|
(install-file "muscle" bin)))))))
|
|