You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
8023 lines
317 KiB
8023 lines
317 KiB
;;; GNU Guix --- Functional package management for GNU
|
|
;;; Copyright © 2014, 2015, 2016, 2017 Ricardo Wurmus <rekado@elephly.net>
|
|
;;; Copyright © 2015, 2016 Ben Woodcroft <donttrustben@gmail.com>
|
|
;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
|
|
;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
|
|
;;; Copyright © 2016 Roel Janssen <roel@gnu.org>
|
|
;;; Copyright © 2016 Efraim Flashner <efraim@flashner.co.il>
|
|
;;; Copyright © 2016 Marius Bakke <mbakke@fastmail.com>
|
|
;;; Copyright © 2016 Raoul Bonnal <ilpuccio.febo@gmail.com>
|
|
;;;
|
|
;;; This file is part of GNU Guix.
|
|
;;;
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
;;; under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
;;; your option) any later version.
|
|
;;;
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (gnu packages bioinformatics)
|
|
#:use-module ((guix licenses) #:prefix license:)
|
|
#:use-module (guix packages)
|
|
#:use-module (guix utils)
|
|
#:use-module (guix download)
|
|
#:use-module (guix git-download)
|
|
#:use-module (guix hg-download)
|
|
#:use-module (guix build-system ant)
|
|
#:use-module (guix build-system gnu)
|
|
#:use-module (guix build-system cmake)
|
|
#:use-module (guix build-system perl)
|
|
#:use-module (guix build-system python)
|
|
#:use-module (guix build-system r)
|
|
#:use-module (guix build-system ruby)
|
|
#:use-module (guix build-system trivial)
|
|
#:use-module (gnu packages)
|
|
#:use-module (gnu packages autotools)
|
|
#:use-module (gnu packages algebra)
|
|
#:use-module (gnu packages base)
|
|
#:use-module (gnu packages bash)
|
|
#:use-module (gnu packages bison)
|
|
#:use-module (gnu packages boost)
|
|
#:use-module (gnu packages compression)
|
|
#:use-module (gnu packages cpio)
|
|
#:use-module (gnu packages curl)
|
|
#:use-module (gnu packages documentation)
|
|
#:use-module (gnu packages datastructures)
|
|
#:use-module (gnu packages file)
|
|
#:use-module (gnu packages flex)
|
|
#:use-module (gnu packages gawk)
|
|
#:use-module (gnu packages gcc)
|
|
#:use-module (gnu packages gd)
|
|
#:use-module (gnu packages gtk)
|
|
#:use-module (gnu packages glib)
|
|
#:use-module (gnu packages groff)
|
|
#:use-module (gnu packages guile)
|
|
#:use-module (gnu packages haskell)
|
|
#:use-module (gnu packages image)
|
|
#:use-module (gnu packages imagemagick)
|
|
#:use-module (gnu packages java)
|
|
#:use-module (gnu packages linux)
|
|
#:use-module (gnu packages logging)
|
|
#:use-module (gnu packages machine-learning)
|
|
#:use-module (gnu packages man)
|
|
#:use-module (gnu packages maths)
|
|
#:use-module (gnu packages mpi)
|
|
#:use-module (gnu packages ncurses)
|
|
#:use-module (gnu packages pcre)
|
|
#:use-module (gnu packages parallel)
|
|
#:use-module (gnu packages pdf)
|
|
#:use-module (gnu packages perl)
|
|
#:use-module (gnu packages pkg-config)
|
|
#:use-module (gnu packages popt)
|
|
#:use-module (gnu packages protobuf)
|
|
#:use-module (gnu packages python)
|
|
#:use-module (gnu packages readline)
|
|
#:use-module (gnu packages ruby)
|
|
#:use-module (gnu packages serialization)
|
|
#:use-module (gnu packages statistics)
|
|
#:use-module (gnu packages tbb)
|
|
#:use-module (gnu packages tex)
|
|
#:use-module (gnu packages texinfo)
|
|
#:use-module (gnu packages textutils)
|
|
#:use-module (gnu packages time)
|
|
#:use-module (gnu packages tls)
|
|
#:use-module (gnu packages vim)
|
|
#:use-module (gnu packages web)
|
|
#:use-module (gnu packages xml)
|
|
#:use-module (gnu packages xorg)
|
|
#:use-module (gnu packages zip)
|
|
#:use-module (srfi srfi-1))
|
|
|
|
(define-public aragorn
|
|
(package
|
|
(name "aragorn")
|
|
(version "1.2.38")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
|
|
version ".tgz"))
|
|
(sha256
|
|
(base32
|
|
"09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; there are no tests
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'build
|
|
(lambda _
|
|
(zero? (system* "gcc"
|
|
"-O3"
|
|
"-ffast-math"
|
|
"-finline-functions"
|
|
"-o"
|
|
"aragorn"
|
|
(string-append "aragorn" ,version ".c")))))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(man (string-append out "/share/man/man1")))
|
|
(mkdir-p bin)
|
|
(copy-file "aragorn"
|
|
(string-append bin "/aragorn"))
|
|
(mkdir-p man)
|
|
(copy-file "aragorn.1"
|
|
(string-append man "/aragorn.1")))
|
|
#t)))))
|
|
(home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
|
|
(synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
|
|
(description
|
|
"Aragorn identifies transfer RNA, mitochondrial RNA and
|
|
transfer-messenger RNA from nucleotide sequences, based on homology to known
|
|
tRNA consensus sequences and RNA structure. It also outputs the secondary
|
|
structure of the predicted RNA.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public bamm
|
|
(package
|
|
(name "bamm")
|
|
(version "1.7.3")
|
|
(source (origin
|
|
(method url-fetch)
|
|
;; BamM is not available on pypi.
|
|
(uri (string-append
|
|
"https://github.com/Ecogenomics/BamM/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1f35yxp4pc8aadsvbpg6r4kg2jh4fkjci0iby4iyljm6980sac0s"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
`(begin
|
|
;; Delete bundled htslib.
|
|
(delete-file-recursively "c/htslib-1.3.1")
|
|
#t))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2 ; BamM is Python 2 only.
|
|
;; Do not use bundled libhts. Do use the bundled libcfu because it has
|
|
;; been modified from its original form.
|
|
#:configure-flags
|
|
(let ((htslib (assoc-ref %build-inputs "htslib")))
|
|
(list "--with-libhts-lib" (string-append htslib "/lib")
|
|
"--with-libhts-inc" (string-append htslib "/include/htslib")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'autogen
|
|
(lambda _
|
|
(with-directory-excursion "c"
|
|
(let ((sh (which "sh")))
|
|
;; Use autogen so that 'configure' works.
|
|
(substitute* "autogen.sh" (("/bin/sh") sh))
|
|
(setenv "CONFIG_SHELL" sh)
|
|
(substitute* "configure" (("/bin/sh") sh))
|
|
(zero? (system* "./autogen.sh"))))))
|
|
(delete 'build)
|
|
;; Run tests after installation so compilation only happens once.
|
|
(delete 'check)
|
|
(add-after 'install 'wrap-executable
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(path (getenv "PATH")))
|
|
(wrap-program (string-append out "/bin/bamm")
|
|
`("PATH" ":" prefix (,path))))
|
|
#t))
|
|
(add-after 'wrap-executable 'post-install-check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(setenv "PATH"
|
|
(string-append (assoc-ref outputs "out")
|
|
"/bin:"
|
|
(getenv "PATH")))
|
|
(setenv "PYTHONPATH"
|
|
(string-append
|
|
(assoc-ref outputs "out")
|
|
"/lib/python"
|
|
(string-take (string-take-right
|
|
(assoc-ref inputs "python") 5) 3)
|
|
"/site-packages:"
|
|
(getenv "PYTHONPATH")))
|
|
;; There are 2 errors printed, but they are safe to ignore:
|
|
;; 1) [E::hts_open_format] fail to open file ...
|
|
;; 2) samtools view: failed to open ...
|
|
(zero? (system* "nosetests")))))))
|
|
(native-inputs
|
|
`(("autoconf" ,autoconf)
|
|
("automake" ,automake)
|
|
("libtool" ,libtool)
|
|
("zlib" ,zlib)
|
|
("python-nose" ,python2-nose)
|
|
("python-pysam" ,python2-pysam)))
|
|
(inputs
|
|
`(("htslib" ,htslib)
|
|
("samtools" ,samtools)
|
|
("bwa" ,bwa)
|
|
("grep" ,grep)
|
|
("sed" ,sed)
|
|
("coreutils" ,coreutils)))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python2-numpy)))
|
|
(home-page "http://ecogenomics.github.io/BamM/")
|
|
(synopsis "Metagenomics-focused BAM file manipulator")
|
|
(description
|
|
"BamM is a C library, wrapped in python, to efficiently generate and
|
|
parse BAM files, specifically for the analysis of metagenomic data. For
|
|
instance, it implements several methods to assess contig-wise read coverage.")
|
|
(license license:lgpl3+)))
|
|
|
|
(define-public bamtools
|
|
(package
|
|
(name "bamtools")
|
|
(version "2.3.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/pezmaster31/bamtools/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1brry29bw2xr2l9pqn240rkqwayg85b8qq78zk2zs6nlspk4d018"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before
|
|
'configure 'set-ldflags
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(setenv "LDFLAGS"
|
|
(string-append
|
|
"-Wl,-rpath="
|
|
(assoc-ref outputs "out") "/lib/bamtools")))))))
|
|
(inputs `(("zlib" ,zlib)))
|
|
(home-page "https://github.com/pezmaster31/bamtools")
|
|
(synopsis "C++ API and command-line toolkit for working with BAM data")
|
|
(description
|
|
"BamTools provides both a C++ API and a command-line toolkit for handling
|
|
BAM files.")
|
|
(license license:expat)))
|
|
|
|
(define-public bcftools
|
|
(package
|
|
(name "bcftools")
|
|
(version "1.3.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/samtools/bcftools/releases/download/"
|
|
version "/bcftools-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"095ry68vmz9q5s1scjsa698dhgyvgw5aicz24c19iwfbai07mhqj"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
;; Delete bundled htslib.
|
|
'(delete-file-recursively "htslib-1.3.1"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:test-target "test"
|
|
#:make-flags
|
|
(list
|
|
"USE_GPL=1"
|
|
(string-append "prefix=" (assoc-ref %outputs "out"))
|
|
(string-append "HTSDIR=" (assoc-ref %build-inputs "htslib") "/include")
|
|
(string-append "HTSLIB=" (assoc-ref %build-inputs "htslib") "/lib/libhts.a")
|
|
(string-append "BGZIP=" (assoc-ref %build-inputs "htslib") "/bin/bgzip")
|
|
(string-append "TABIX=" (assoc-ref %build-inputs "htslib") "/bin/tabix"))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'patch-Makefile
|
|
(lambda _
|
|
(substitute* "Makefile"
|
|
;; Do not attempt to build htslib.
|
|
(("^include \\$\\(HTSDIR\\)/htslib\\.mk") "")
|
|
;; Link against GSL cblas.
|
|
(("-lcblas") "-lgslcblas"))
|
|
#t))
|
|
(delete 'configure)
|
|
(add-before 'check 'patch-tests
|
|
(lambda _
|
|
(substitute* "test/test.pl"
|
|
(("/bin/bash") (which "bash")))
|
|
#t)))))
|
|
(native-inputs
|
|
`(("htslib" ,htslib)
|
|
("perl" ,perl)))
|
|
(inputs
|
|
`(("gsl" ,gsl)
|
|
("zlib" ,zlib)))
|
|
(home-page "https://samtools.github.io/bcftools/")
|
|
(synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
|
|
(description
|
|
"BCFtools is a set of utilities that manipulate variant calls in the
|
|
Variant Call Format (VCF) and its binary counterpart BCF. All commands work
|
|
transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
|
|
;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
|
|
(license (list license:gpl3+ license:expat))))
|
|
|
|
(define-public bedops
|
|
(package
|
|
(name "bedops")
|
|
(version "2.4.14")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/bedops/bedops/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1kqbac547wyqma81cyky9n7mkgikjpsfd3nnmcm6hpqwanqgh10v"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f
|
|
#:make-flags (list (string-append "BINDIR=" %output "/bin"))
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'unpack-tarballs
|
|
(lambda _
|
|
;; FIXME: Bedops includes tarballs of minimally patched upstream
|
|
;; libraries jansson, zlib, and bzip2. We cannot just use stock
|
|
;; libraries because at least one of the libraries (zlib) is
|
|
;; patched to add a C++ function definition (deflateInit2cpp).
|
|
;; Until the Bedops developers offer a way to link against system
|
|
;; libraries we have to build the in-tree copies of these three
|
|
;; libraries.
|
|
|
|
;; See upstream discussion:
|
|
;; https://github.com/bedops/bedops/issues/124
|
|
|
|
;; Unpack the tarballs to benefit from shebang patching.
|
|
(with-directory-excursion "third-party"
|
|
(and (zero? (system* "tar" "xvf" "jansson-2.6.tar.bz2"))
|
|
(zero? (system* "tar" "xvf" "zlib-1.2.7.tar.bz2"))
|
|
(zero? (system* "tar" "xvf" "bzip2-1.0.6.tar.bz2"))))
|
|
;; Disable unpacking of tarballs in Makefile.
|
|
(substitute* "system.mk/Makefile.linux"
|
|
(("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
|
|
(("\\./configure") "CONFIG_SHELL=bash ./configure"))
|
|
(substitute* "third-party/zlib-1.2.7/Makefile.in"
|
|
(("^SHELL=.*$") "SHELL=bash\n")))
|
|
(alist-delete 'configure %standard-phases))))
|
|
(home-page "https://github.com/bedops/bedops")
|
|
(synopsis "Tools for high-performance genomic feature operations")
|
|
(description
|
|
"BEDOPS is a suite of tools to address common questions raised in genomic
|
|
studies---mostly with regard to overlap and proximity relationships between
|
|
data sets. It aims to be scalable and flexible, facilitating the efficient
|
|
and accurate analysis and management of large-scale genomic data.
|
|
|
|
BEDOPS provides tools that perform highly efficient and scalable Boolean and
|
|
other set operations, statistical calculations, archiving, conversion and
|
|
other management of genomic data of arbitrary scale. Tasks can be easily
|
|
split by chromosome for distributing whole-genome analyses across a
|
|
computational cluster.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public bedtools
|
|
(package
|
|
(name "bedtools")
|
|
(version "2.26.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/arq5x/bedtools2/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0xvri5hnp2iim1cx6mcd5d9f102p5ql41x69rd6106x1c17pinqm"))))
|
|
(build-system gnu-build-system)
|
|
(native-inputs `(("python" ,python-2)))
|
|
(inputs `(("samtools" ,samtools)
|
|
("zlib" ,zlib)))
|
|
(arguments
|
|
'(#:test-target "test"
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(for-each (lambda (file)
|
|
(install-file file bin))
|
|
(find-files "bin" ".*")))
|
|
#t)))))
|
|
(home-page "https://github.com/arq5x/bedtools2")
|
|
(synopsis "Tools for genome analysis and arithmetic")
|
|
(description
|
|
"Collectively, the bedtools utilities are a swiss-army knife of tools for
|
|
a wide-range of genomics analysis tasks. The most widely-used tools enable
|
|
genome arithmetic: that is, set theory on the genome. For example, bedtools
|
|
allows one to intersect, merge, count, complement, and shuffle genomic
|
|
intervals from multiple files in widely-used genomic file formats such as BAM,
|
|
BED, GFF/GTF, VCF.")
|
|
(license license:gpl2)))
|
|
|
|
;; Later releases of bedtools produce files with more columns than
|
|
;; what Ribotaper expects.
|
|
(define-public bedtools-2.18
|
|
(package (inherit bedtools)
|
|
(name "bedtools")
|
|
(version "2.18.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/arq5x/bedtools2/"
|
|
"archive/v" version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"05vrnr8yp7swfagshzpgqmzk1blnwnq8pq5pckzi1m26w98d63vf"))))))
|
|
|
|
(define-public ribotaper
|
|
(package
|
|
(name "ribotaper")
|
|
(version "1.3.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://ohlerlab.mdc-berlin.de/"
|
|
"files/RiboTaper/RiboTaper_Version_"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("bedtools" ,bedtools-2.18)
|
|
("samtools" ,samtools-0.1)
|
|
("r" ,r)
|
|
("r-foreach" ,r-foreach)
|
|
("r-xnomial" ,r-xnomial)
|
|
("r-domc" ,r-domc)
|
|
("r-multitaper" ,r-multitaper)
|
|
("r-seqinr" ,r-seqinr)))
|
|
(home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
|
|
(synopsis "Define translated ORFs using ribosome profiling data")
|
|
(description
|
|
"Ribotaper is a method for defining translated @dfn{open reading
|
|
frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
|
|
provides the Ribotaper pipeline.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bioawk
|
|
(package
|
|
(name "bioawk")
|
|
(version "1.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/lh3/bioawk/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32 "1daizxsk17ahi9n58fj8vpgwyhzrzh54bzqhanjanp88kgrz7gjw"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("bison" ,bison)))
|
|
(arguments
|
|
`(#:tests? #f ; There are no tests to run.
|
|
;; Bison must generate files, before other targets can build.
|
|
#:parallel-build? #f
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure) ; There is no configure phase.
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(man (string-append out "/share/man/man1")))
|
|
(mkdir-p man)
|
|
(copy-file "awk.1" (string-append man "/bioawk.1"))
|
|
(install-file "bioawk" bin)))))))
|
|
(home-page "https://github.com/lh3/bioawk")
|
|
(synopsis "AWK with bioinformatics extensions")
|
|
(description "Bioawk is an extension to Brian Kernighan's awk, adding the
|
|
support of several common biological data formats, including optionally gzip'ed
|
|
BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
|
|
also adds a few built-in functions and a command line option to use TAB as the
|
|
input/output delimiter. When the new functionality is not used, bioawk is
|
|
intended to behave exactly the same as the original BWK awk.")
|
|
(license license:x11)))
|
|
|
|
(define-public python2-pybedtools
|
|
(package
|
|
(name "python2-pybedtools")
|
|
(version "0.6.9")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/p/pybedtools/pybedtools-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1ldzdxw1p4y3g2ignmggsdypvqkcwqwzhdha4rbgpih048z5p4an"))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; no Python 3 support
|
|
(inputs
|
|
`(("python-matplotlib" ,python2-matplotlib)))
|
|
(propagated-inputs
|
|
`(("bedtools" ,bedtools)
|
|
("samtools" ,samtools)))
|
|
(native-inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-pyyaml" ,python2-pyyaml)
|
|
("python-nose" ,python2-nose)))
|
|
(home-page "https://pythonhosted.org/pybedtools/")
|
|
(synopsis "Python wrapper for BEDtools programs")
|
|
(description
|
|
"pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
|
|
which are widely used for genomic interval manipulation or \"genome algebra\".
|
|
pybedtools extends BEDTools by offering feature-level manipulations from with
|
|
Python.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public python-biom-format
|
|
(package
|
|
(name "python-biom-format")
|
|
(version "2.1.5")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
;; Use GitHub as source because PyPI distribution does not contain
|
|
;; test data: https://github.com/biocore/biom-format/issues/693
|
|
(uri (string-append "https://github.com/biocore/biom-format/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1n25w3p1rixbpac8iysmzcja6m4ip5r6sz19l8y6wlwi49hxn278"))))
|
|
(build-system python-build-system)
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)
|
|
("python-scipy" ,python-scipy)
|
|
("python-future" ,python-future)
|
|
("python-click" ,python-click)
|
|
("python-h5py" ,python-h5py)))
|
|
(home-page "http://www.biom-format.org")
|
|
(synopsis "Biological Observation Matrix (BIOM) format utilities")
|
|
(description
|
|
"The BIOM file format is designed to be a general-use format for
|
|
representing counts of observations e.g. operational taxonomic units, KEGG
|
|
orthology groups or lipid types, in one or more biological samples
|
|
e.g. microbiome samples, genomes, metagenomes.")
|
|
(license license:bsd-3)
|
|
(properties `((python2-variant . ,(delay python2-biom-format))))))
|
|
|
|
(define-public python2-biom-format
|
|
(let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
|
|
(package
|
|
(inherit base)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
;; Do not require the unmaintained pyqi library.
|
|
(add-after 'unpack 'remove-pyqi
|
|
(lambda _
|
|
(substitute* "setup.py"
|
|
(("install_requires.append\\(\"pyqi\"\\)") "pass"))
|
|
#t)))
|
|
,@(package-arguments base))))))
|
|
|
|
(define-public bioperl-minimal
|
|
(let* ((inputs `(("perl-module-build" ,perl-module-build)
|
|
("perl-data-stag" ,perl-data-stag)
|
|
("perl-libwww" ,perl-libwww)
|
|
("perl-uri" ,perl-uri)))
|
|
(transitive-inputs
|
|
(map (compose package-name cadr)
|
|
(delete-duplicates
|
|
(concatenate
|
|
(map (compose package-transitive-target-inputs cadr) inputs))))))
|
|
(package
|
|
(name "bioperl-minimal")
|
|
(version "1.7.0")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/bioperl/bioperl-live/"
|
|
"archive/release-"
|
|
(string-map (lambda (c)
|
|
(if (char=? c #\.)
|
|
#\- c)) version)
|
|
".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"12phgpxwgkqflkwfb9dcqg7a31dpjlfhar8wcgv0aj5ln4akfz06"))))
|
|
(build-system perl-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after
|
|
'install 'wrap-programs
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
;; Make sure all executables in "bin" find the required Perl
|
|
;; modules at runtime. As the PERL5LIB variable contains also
|
|
;; the paths of native inputs, we pick the transitive target
|
|
;; inputs from %build-inputs.
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin/"))
|
|
(path (string-join
|
|
(cons (string-append out "/lib/perl5/site_perl")
|
|
(map (lambda (name)
|
|
(assoc-ref %build-inputs name))
|
|
',transitive-inputs))
|
|
":")))
|
|
(for-each (lambda (file)
|
|
(wrap-program file
|
|
`("PERL5LIB" ":" prefix (,path))))
|
|
(find-files bin "\\.pl$"))
|
|
#t))))))
|
|
(inputs inputs)
|
|
(native-inputs
|
|
`(("perl-test-most" ,perl-test-most)))
|
|
(home-page "http://search.cpan.org/dist/BioPerl")
|
|
(synopsis "Bioinformatics toolkit")
|
|
(description
|
|
"BioPerl is the product of a community effort to produce Perl code which
|
|
is useful in biology. Examples include Sequence objects, Alignment objects
|
|
and database searching objects. These objects not only do what they are
|
|
advertised to do in the documentation, but they also interact - Alignment
|
|
objects are made from the Sequence objects, Sequence objects have access to
|
|
Annotation and SeqFeature objects and databases, Blast objects can be
|
|
converted to Alignment objects, and so on. This means that the objects
|
|
provide a coordinated and extensible framework to do computational biology.")
|
|
(license (package-license perl)))))
|
|
|
|
(define-public python-biopython
|
|
(package
|
|
(name "python-biopython")
|
|
(version "1.68")
|
|
(source (origin
|
|
(method url-fetch)
|
|
;; use PyPi rather than biopython.org to ease updating
|
|
(uri (pypi-uri "biopython" version))
|
|
(sha256
|
|
(base32
|
|
"07qc7nz0k77y8hf8s18rscvibvm91zw0kkq7ylrhisf8vp8hkp6i"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before 'check 'set-home
|
|
;; Some tests require a home directory to be set.
|
|
(lambda _ (setenv "HOME" "/tmp") #t)))))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)))
|
|
(home-page "http://biopython.org/")
|
|
(synopsis "Tools for biological computation in Python")
|
|
(description
|
|
"Biopython is a set of tools for biological computation including parsers
|
|
for bioinformatics files into Python data structures; interfaces to common
|
|
bioinformatics programs; a standard sequence class and tools for performing
|
|
common operations on them; code to perform data classification; code for
|
|
dealing with alignments; code making it easy to split up parallelizable tasks
|
|
into separate processes; and more.")
|
|
(license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
|
|
|
|
(define-public python2-biopython
|
|
(package-with-python2 python-biopython))
|
|
|
|
;; An outdated version of biopython is required for seqmagick, see
|
|
;; https://github.com/fhcrc/seqmagick/issues/59
|
|
;; When that issue has been resolved this package should be removed.
|
|
(define python2-biopython-1.66
|
|
(package
|
|
(inherit python2-biopython)
|
|
(version "1.66")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "biopython" version))
|
|
(sha256
|
|
(base32
|
|
"1gdv92593klimg22icf5j9by7xiq86jnwzkpz4abaa05ylkdf6hp"))))))
|
|
|
|
(define-public bpp-core
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
|
|
(package
|
|
(name "bpp-core")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bpp-core")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f))
|
|
(inputs
|
|
`(("gcc" ,gcc-5))) ; Compilation of bpp-phyl fails with GCC 4.9 so we
|
|
; compile all of the bpp packages with GCC 5.
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "C++ libraries for Bioinformatics")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. It is
|
|
Object Oriented and is designed to be both easy to use and computer efficient.
|
|
Bio++ intends to help programmers to write computer expensive programs, by
|
|
providing them a set of re-usable tools.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public bpp-phyl
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
|
|
(package
|
|
(name "bpp-phyl")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bpp-phyl")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f
|
|
;; If out-of-source, test data is not copied into the build directory
|
|
;; so the tests fail.
|
|
#:out-of-source? #f))
|
|
(inputs
|
|
`(("bpp-core" ,bpp-core)
|
|
("bpp-seq" ,bpp-seq)
|
|
;; GCC 4.8 fails due to an 'internal compiler error', so we use a more
|
|
;; modern GCC.
|
|
("gcc" ,gcc-5)))
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "Bio++ phylogenetic Library")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. This
|
|
library provides phylogenetics-related modules.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public bpp-popgen
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
|
|
(package
|
|
(name "bpp-popgen")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bpp-popgen")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f
|
|
#:tests? #f)) ; There are no tests.
|
|
(inputs
|
|
`(("bpp-core" ,bpp-core)
|
|
("bpp-seq" ,bpp-seq)
|
|
("gcc" ,gcc-5)))
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "Bio++ population genetics library")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. This
|
|
library provides population genetics-related modules.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public bpp-seq
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
|
|
(package
|
|
(name "bpp-seq")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bpp-seq")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f
|
|
;; If out-of-source, test data is not copied into the build directory
|
|
;; so the tests fail.
|
|
#:out-of-source? #f))
|
|
(inputs
|
|
`(("bpp-core" ,bpp-core)
|
|
("gcc" ,gcc-5))) ; Use GCC 5 as per 'bpp-core'.
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "Bio++ sequence library")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. This
|
|
library provides sequence-related modules.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public bppsuite
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
|
|
(package
|
|
(name "bppsuite")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bppsuite")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f
|
|
#:tests? #f)) ; There are no tests.
|
|
(native-inputs
|
|
`(("groff" ,groff)
|
|
("man-db" ,man-db)
|
|
("texinfo" ,texinfo)))
|
|
(inputs
|
|
`(("bpp-core" ,bpp-core)
|
|
("bpp-seq" ,bpp-seq)
|
|
("bpp-phyl" ,bpp-phyl)
|
|
("bpp-phyl" ,bpp-popgen)
|
|
("gcc" ,gcc-5)))
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "Bioinformatics tools written with the Bio++ libraries")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. This
|
|
package provides command line tools using the Bio++ library.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public blast+
|
|
(package
|
|
(name "blast+")
|
|
(version "2.4.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
|
|
version "/ncbi-blast-" version "+-src.tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"14n9jik6vhiwjd3m7bach4xj1pzfn0szbsbyfxybd9l9cc43b6mb"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Remove bundled bzip2 and zlib
|
|
(delete-file-recursively "c++/src/util/compress/bzip2")
|
|
(delete-file-recursively "c++/src/util/compress/zlib")
|
|
(substitute* "c++/src/util/compress/Makefile.in"
|
|
(("bzip2 zlib api") "api"))
|
|
;; Remove useless msbuild directory
|
|
(delete-file-recursively
|
|
"c++/src/build-system/project_tree_builder/msbuild")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(;; There are three(!) tests for this massive library, and all fail with
|
|
;; "unparsable timing stats".
|
|
;; ERR [127] -- [util/regexp] test_pcre.sh (unparsable timing stats)
|
|
;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
|
|
;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
|
|
#:tests? #f
|
|
#:out-of-source? #t
|
|
#:parallel-build? #f ; not supported
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before
|
|
'configure 'set-HOME
|
|
;; $HOME needs to be set at some point during the configure phase
|
|
(lambda _ (setenv "HOME" "/tmp") #t))
|
|
(add-after
|
|
'unpack 'enter-dir
|
|
(lambda _ (chdir "c++") #t))
|
|
(add-after
|
|
'enter-dir 'fix-build-system
|
|
(lambda _
|
|
(define (which* cmd)
|
|
(cond ((string=? cmd "date")
|
|
;; make call to "date" deterministic
|
|
"date -d @0")
|
|
((which cmd)
|
|
=> identity)
|
|
(else
|
|
(format (current-error-port)
|
|
"WARNING: Unable to find absolute path for ~s~%"
|
|
cmd)
|
|
#f)))
|
|
|
|
;; Rewrite hardcoded paths to various tools
|
|
(substitute* (append '("src/build-system/configure.ac"
|
|
"src/build-system/configure"
|
|
"scripts/common/impl/if_diff.sh"
|
|
"scripts/common/impl/run_with_lock.sh"
|
|
"src/build-system/Makefile.configurables.real"
|
|
"src/build-system/Makefile.in.top"
|
|
"src/build-system/Makefile.meta.gmake=no"
|
|
"src/build-system/Makefile.meta.in"
|
|
"src/build-system/Makefile.meta_l"
|
|
"src/build-system/Makefile.meta_p"
|
|
"src/build-system/Makefile.meta_r"
|
|
"src/build-system/Makefile.mk.in"
|
|
"src/build-system/Makefile.requirements"
|
|
"src/build-system/Makefile.rules_with_autodep.in")
|
|
(find-files "scripts/common/check" "\\.sh$"))
|
|
(("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
|
|
(or (which* cmd) all)))
|
|
|
|
(substitute* (find-files "src/build-system" "^config.*")
|
|
(("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
|
|
(("^PATH=.*") ""))
|
|
|
|
;; rewrite "/var/tmp" in check script
|
|
(substitute* "scripts/common/check/check_make_unix.sh"
|
|
(("/var/tmp") "/tmp"))
|
|
|
|
;; do not reset PATH
|
|
(substitute* (find-files "scripts/common/impl/" "\\.sh$")
|
|
(("^ *PATH=.*") "")
|
|
(("action=/bin/") "action=")
|
|
(("export PATH") ":"))
|
|
#t))
|
|
(replace
|
|
'configure
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out"))
|
|
(lib (string-append (assoc-ref outputs "lib") "/lib"))
|
|
(include (string-append (assoc-ref outputs "include")
|
|
"/include/ncbi-tools++")))
|
|
;; The 'configure' script doesn't recognize things like
|
|
;; '--enable-fast-install'.
|
|
(zero? (system* "./configure.orig"
|
|
(string-append "--with-build-root=" (getcwd) "/build")
|
|
(string-append "--prefix=" out)
|
|
(string-append "--libdir=" lib)
|
|
(string-append "--includedir=" include)
|
|
(string-append "--with-bz2="
|
|
(assoc-ref inputs "bzip2"))
|
|
(string-append "--with-z="
|
|
(assoc-ref inputs "zlib"))
|
|
;; Each library is built twice by default, once
|
|
;; with "-static" in its name, and again
|
|
;; without.
|
|
"--without-static"
|
|
"--with-dll"))))))))
|
|
(outputs '("out" ; 19 MB
|
|
"lib" ; 203 MB
|
|
"include")) ; 32 MB
|
|
(inputs
|
|
`(("bzip2" ,bzip2)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("cpio" ,cpio)))
|
|
(home-page "http://blast.ncbi.nlm.nih.gov")
|
|
(synopsis "Basic local alignment search tool")
|
|
(description
|
|
"BLAST is a popular method of performing a DNA or protein sequence
|
|
similarity search, using heuristics to produce results quickly. It also
|
|
calculates an “expect value” that estimates how many matches would have
|
|
occurred at a given score by chance, which can aid a user in judging how much
|
|
confidence to have in an alignment.")
|
|
;; Most of the sources are in the public domain, with the following
|
|
;; exceptions:
|
|
;; * Expat:
|
|
;; * ./c++/include/util/bitset/
|
|
;; * ./c++/src/html/ncbi_menu*.js
|
|
;; * Boost license:
|
|
;; * ./c++/include/util/impl/floating_point_comparison.hpp
|
|
;; * LGPL 2+:
|
|
;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
|
|
;; * ASL 2.0:
|
|
;; * ./c++/src/corelib/teamcity_*
|
|
(license (list license:public-domain
|
|
license:expat
|
|
license:boost1.0
|
|
license:lgpl2.0+
|
|
license:asl2.0))))
|
|
|
|
(define-public bless
|
|
(package
|
|
(name "bless")
|
|
(version "1p02")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/bless-ec/bless.v"
|
|
version ".tgz"))
|
|
(sha256
|
|
(base32
|
|
"0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
`(begin
|
|
;; Remove bundled boost, pigz, zlib, and .git directory
|
|
;; FIXME: also remove bundled sources for murmurhash3 and
|
|
;; kmc once packaged.
|
|
(delete-file-recursively "boost")
|
|
(delete-file-recursively "pigz")
|
|
(delete-file-recursively "google-sparsehash")
|
|
(delete-file-recursively "zlib")
|
|
(delete-file-recursively ".git")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ;no "check" target
|
|
#:make-flags
|
|
(list (string-append "ZLIB="
|
|
(assoc-ref %build-inputs "zlib")
|
|
"/lib/libz.a")
|
|
(string-append "LDFLAGS="
|
|
(string-join '("-lboost_filesystem"
|
|
"-lboost_system"
|
|
"-lboost_iostreams"
|
|
"-lz"
|
|
"-fopenmp"
|
|
"-std=c++11"))))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'do-not-build-bundled-pigz
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(substitute* "Makefile"
|
|
(("cd pigz/pigz-2.3.3; make") ""))
|
|
#t))
|
|
(add-after 'unpack 'patch-paths-to-executables
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(substitute* "parse_args.cpp"
|
|
(("kmc_binary = .*")
|
|
(string-append "kmc_binary = \""
|
|
(assoc-ref outputs "out")
|
|
"/bin/kmc\";"))
|
|
(("pigz_binary = .*")
|
|
(string-append "pigz_binary = \""
|
|
(assoc-ref inputs "pigz")
|
|
"/bin/pigz\";")))
|
|
#t))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(for-each (lambda (file)
|
|
(install-file file bin))
|
|
'("bless" "kmc/bin/kmc"))
|
|
#t)))
|
|
(delete 'configure))))
|
|
(native-inputs
|
|
`(("perl" ,perl)))
|
|
(inputs
|
|
`(("openmpi" ,openmpi)
|
|
("boost" ,boost)
|
|
("sparsehash" ,sparsehash)
|
|
("pigz" ,pigz)
|
|
("zlib" ,zlib)))
|
|
(supported-systems '("x86_64-linux"))
|
|
(home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
|
|
(synopsis "Bloom-filter-based error correction tool for NGS reads")
|
|
(description
|
|
"@dfn{Bloom-filter-based error correction solution for high-throughput
|
|
sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
|
|
correction tool for genomic reads produced by @dfn{Next-generation
|
|
sequencing} (NGS). BLESS produces accurate correction results with much less
|
|
memory compared with previous solutions and is also able to tolerate a higher
|
|
false-positive rate. BLESS can extend reads like DNA assemblers to correct
|
|
errors at the end of reads.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bowtie
|
|
(package
|
|
(name "bowtie")
|
|
(version "2.2.9")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/BenLangmead/bowtie2/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1vp5db8i7is57iwjybcdg18f5ivyzlj5g1ix1nlvxainzivhz55g"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "Makefile"
|
|
;; replace BUILD_HOST and BUILD_TIME for deterministic build
|
|
(("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
|
|
(("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
|
|
(build-system gnu-build-system)
|
|
(inputs `(("perl" ,perl)
|
|
("perl-clone" ,perl-clone)
|
|
("perl-test-deep" ,perl-test-deep)
|
|
("perl-test-simple" ,perl-test-simple)
|
|
("python" ,python-2)
|
|
("tbb" ,tbb)))
|
|
(arguments
|
|
'(#:make-flags
|
|
(list "allall"
|
|
"WITH_TBB=1"
|
|
(string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(alist-delete
|
|
'configure
|
|
(alist-replace
|
|
'check
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(system* "perl"
|
|
"scripts/test/simple_tests.pl"
|
|
"--bowtie2=./bowtie2"
|
|
"--bowtie2-build=./bowtie2-build"))
|
|
%standard-phases))))
|
|
(home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
|
|
(synopsis "Fast and sensitive nucleotide sequence read aligner")
|
|
(description
|
|
"Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
|
|
reads to long reference sequences. It is particularly good at aligning reads
|
|
of about 50 up to 100s or 1,000s of characters, and particularly good at
|
|
aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
|
|
genome with an FM Index to keep its memory footprint small: for the human
|
|
genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
|
|
gapped, local, and paired-end alignment modes.")
|
|
(supported-systems '("x86_64-linux"))
|
|
(license license:gpl3+)))
|
|
|
|
(define-public tophat
|
|
(package
|
|
(name "tophat")
|
|
(version "2.1.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://ccb.jhu.edu/software/tophat/downloads/tophat-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"168zlzykq622zbgkh90a90f1bdgsxkscq2zxzbj8brq80hbjpyp7"))
|
|
(patches (search-patches "tophat-build-with-later-seqan.patch"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Remove bundled SeqAn and samtools
|
|
(delete-file-recursively "src/SeqAn-1.3")
|
|
(delete-file-recursively "src/samtools-0.1.18")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:parallel-build? #f ; not supported
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'use-system-samtools
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "src/Makefile.in"
|
|
(("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
|
|
(("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
|
|
(("SAMPROG = samtools_0\\.1\\.18") "")
|
|
(("\\$\\(samtools_0_1_18_SOURCES\\)") "")
|
|
(("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
|
|
(substitute* '("src/common.cpp"
|
|
"src/tophat.py")
|
|
(("samtools_0.1.18") (which "samtools")))
|
|
(substitute* '("src/common.h"
|
|
"src/bam2fastx.cpp")
|
|
(("#include \"bam.h\"") "#include <samtools/bam.h>")
|
|
(("#include \"sam.h\"") "#include <samtools/sam.h>"))
|
|
(substitute* '("src/bwt_map.h"
|
|
"src/map2gtf.h"
|
|
"src/align_status.h")
|
|
(("#include <bam.h>") "#include <samtools/bam.h>")
|
|
(("#include <sam.h>") "#include <samtools/sam.h>"))
|
|
#t)))))
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("bowtie" ,bowtie)
|
|
("samtools" ,samtools-0.1)
|
|
("ncurses" ,ncurses)
|
|
("python" ,python-2)
|
|
("perl" ,perl)
|
|
("zlib" ,zlib)
|
|
("seqan" ,seqan)))
|
|
(home-page "http://ccb.jhu.edu/software/tophat/index.shtml")
|
|
(synopsis "Spliced read mapper for RNA-Seq data")
|
|
(description
|
|
"TopHat is a fast splice junction mapper for nucleotide sequence
|
|
reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
|
|
mammalian-sized genomes using the ultra high-throughput short read
|
|
aligner Bowtie, and then analyzes the mapping results to identify
|
|
splice junctions between exons.")
|
|
;; TopHat is released under the Boost Software License, Version 1.0
|
|
;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
|
|
(license license:boost1.0)))
|
|
|
|
(define-public bwa
|
|
(package
|
|
(name "bwa")
|
|
(version "0.7.12")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/bio-bwa/bwa-"
|
|
version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"1330dpqncv0px3pbhjzz1gwgg39kkcv2r9qp2xs0sixf8z8wl7bh"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(alist-replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append
|
|
(assoc-ref outputs "out") "/bin"))
|
|
(doc (string-append
|
|
(assoc-ref outputs "out") "/share/doc/bwa"))
|
|
(man (string-append
|
|
(assoc-ref outputs "out") "/share/man/man1")))
|
|
(install-file "bwa" bin)
|
|
(install-file "README.md" doc)
|
|
(install-file "bwa.1" man)))
|
|
;; no "configure" script
|
|
(alist-delete 'configure %standard-phases))))
|
|
(inputs `(("zlib" ,zlib)))
|
|
;; Non-portable SSE instructions are used so building fails on platforms
|
|
;; other than x86_64.
|
|
(supported-systems '("x86_64-linux"))
|
|
(home-page "http://bio-bwa.sourceforge.net/")
|
|
(synopsis "Burrows-Wheeler sequence aligner")
|
|
(description
|
|
"BWA is a software package for mapping low-divergent sequences against a
|
|
large reference genome, such as the human genome. It consists of three
|
|
algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
|
|
designed for Illumina sequence reads up to 100bp, while the rest two for
|
|
longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
|
|
features such as long-read support and split alignment, but BWA-MEM, which is
|
|
the latest, is generally recommended for high-quality queries as it is faster
|
|
and more accurate. BWA-MEM also has better performance than BWA-backtrack for
|
|
70-100bp Illumina reads.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bwa-pssm
|
|
(package (inherit bwa)
|
|
(name "bwa-pssm")
|
|
(version "0.5.11")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/pkerpedjiev/bwa-pssm/"
|
|
"archive/" version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"02p7mpbs4mlxmn84g2x4ghak638vbj4lqix2ipx5g84pz9bhdavg"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("gdsl" ,gdsl)
|
|
("zlib" ,zlib)
|
|
("perl" ,perl)))
|
|
(home-page "http://bwa-pssm.binf.ku.dk/")
|
|
(synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
|
|
(description
|
|
"BWA-PSSM is a probabilistic short genomic sequence read aligner based on
|
|
the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
|
|
existing aligners it is fast and sensitive. Unlike most other aligners,
|
|
however, it is also adaptible in the sense that one can direct the alignment
|
|
based on known biases within the data set. It is coded as a modification of
|
|
the original BWA alignment program and shares the genome index structure as
|
|
well as many of the command line options.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public python2-bx-python
|
|
(package
|
|
(name "python2-bx-python")
|
|
(version "0.7.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://pypi.python.org/packages/source/b/bx-python/bx-python-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0ld49idhc5zjdvbhvjq1a2qmpjj7h5v58rqr25dzmfq7g34b50xh"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "setup.py"
|
|
;; remove dependency on outdated "distribute" module
|
|
(("^from distribute_setup import use_setuptools") "")
|
|
(("^use_setuptools\\(\\)") "")))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;tests fail because test data are not included
|
|
#:python ,python-2))
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-nose" ,python2-nose)))
|
|
(home-page "http://bitbucket.org/james_taylor/bx-python/")
|
|
(synopsis "Tools for manipulating biological data")
|
|
(description
|
|
"bx-python provides tools for manipulating biological data, particularly
|
|
multiple sequence alignments.")
|
|
(license license:expat)))
|
|
|
|
(define-public python-pysam
|
|
(package
|
|
(name "python-pysam")
|
|
(version "0.9.1.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
;; Test data is missing on PyPi.
|
|
(uri (string-append
|
|
"https://github.com/pysam-developers/pysam/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0y41ssbg6nvn2jgcbnrvkzblpjcwszaiv1rgyd8dwzjkrbfsgsmc"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
;; Drop bundled htslib. TODO: Also remove samtools and bcftools.
|
|
'(delete-file-recursively "htslib"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before 'build 'set-flags
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(setenv "HTSLIB_MODE" "external")
|
|
(setenv "HTSLIB_LIBRARY_DIR"
|
|
(string-append (assoc-ref inputs "htslib") "/lib"))
|
|
(setenv "HTSLIB_INCLUDE_DIR"
|
|
(string-append (assoc-ref inputs "htslib") "/include"))
|
|
(setenv "LDFLAGS" "-lncurses")
|
|
(setenv "CFLAGS" "-D_CURSES_LIB=1")
|
|
#t))
|
|
(delete 'check)
|
|
(add-after 'install 'check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(setenv "PYTHONPATH"
|
|
(string-append
|
|
(getenv "PYTHONPATH")
|
|
":" (assoc-ref outputs "out")
|
|
"/lib/python"
|
|
(string-take (string-take-right
|
|
(assoc-ref inputs "python") 5) 3)
|
|
"/site-packages"))
|
|
;; Step out of source dir so python does not import from CWD.
|
|
(chdir "tests")
|
|
(setenv "HOME" "/tmp")
|
|
(and (zero? (system* "make" "-C" "pysam_data"))
|
|
(zero? (system* "make" "-C" "cbcf_data"))
|
|
(zero? (system* "nosetests" "-v"))))))))
|
|
(propagated-inputs
|
|
`(("htslib" ,htslib))) ; Included from installed header files.
|
|
(inputs
|
|
`(("ncurses" ,ncurses)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
;; Dependencies below are are for tests only.
|
|
("samtools" ,samtools)
|
|
("bcftools" ,bcftools)
|
|
("python-nose" ,python-nose)))
|
|
(home-page "https://github.com/pysam-developers/pysam")
|
|
(synopsis "Python bindings to the SAMtools C API")
|
|
(description
|
|
"Pysam is a Python module for reading and manipulating files in the
|
|
SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
|
|
also includes an interface for tabix.")
|
|
(license license:expat)))
|
|
|
|
(define-public python2-pysam
|
|
(package-with-python2 python-pysam))
|
|
|
|
(define-public python-twobitreader
|
|
(package
|
|
(name "python-twobitreader")
|
|
(version "3.1.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "twobitreader" version))
|
|
(sha256
|
|
(base32
|
|
"1q8wnj2kga9nz1lwc4w7qv52smfm536hp6mc8w6s53lhyj0mpi22"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
'(;; Tests are not distributed in the PyPi release.
|
|
;; TODO Try building from the Git repo or asking the upstream maintainer
|
|
;; to distribute the tests on PyPi.
|
|
#:tests? #f))
|
|
(native-inputs
|
|
`(("python-sphinx" ,python-sphinx)))
|
|
(home-page "https://github.com/benjschiller/twobitreader")
|
|
(synopsis "Python library for reading .2bit files")
|
|
(description
|
|
"twobitreader is a Python library for reading .2bit files as used by the
|
|
UCSC genome browser.")
|
|
(license license:artistic2.0)))
|
|
|
|
(define-public python2-twobitreader
|
|
(package-with-python2 python-twobitreader))
|
|
|
|
(define-public python-plastid
|
|
(package
|
|
(name "python-plastid")
|
|
(version "0.4.6")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "plastid" version))
|
|
(sha256
|
|
(base32
|
|
"1sqkz5d3b9kf688mp7k771c87ins42j7j0whmkb49cb3fsg8s8lj"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
;; Some test files are not included.
|
|
`(#:tests? #f))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)
|
|
("python-scipy" ,python-scipy)
|
|
("python-pandas" ,python-pandas)
|
|
("python-pysam" ,python-pysam)
|
|
("python-matplotlib" ,python-matplotlib)
|
|
("python-biopython" ,python-biopython)
|
|
("python-twobitreader" ,python-twobitreader)
|
|
("python-termcolor" ,python-termcolor)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-nose" ,python-nose)))
|
|
(home-page "https://github.com/joshuagryphon/plastid")
|
|
(synopsis "Python library for genomic analysis")
|
|
(description
|
|
"plastid is a Python library for genomic analysis – in particular,
|
|
high-throughput sequencing data – with an emphasis on simplicity.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public python2-plastid
|
|
(package-with-python2 python-plastid))
|
|
|
|
(define-public cd-hit
|
|
(package
|
|
(name "cd-hit")
|
|
(version "4.6.6")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/weizhongli/cdhit"
|
|
"/releases/download/V" version
|
|
"/cd-hit-v" version "-2016-0711.tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1w8hd4fszgg29nqiz569fldwy012la77nljcmlhglgicws56z54p"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; there are no tests
|
|
#:make-flags
|
|
;; Executables are copied directly to the PREFIX.
|
|
(list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin"))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
;; No "configure" script
|
|
(delete 'configure)
|
|
;; Remove sources of non-determinism
|
|
(add-after 'unpack 'be-timeless
|
|
(lambda _
|
|
(substitute* "cdhit-utility.c++"
|
|
((" \\(built on \" __DATE__ \"\\)") ""))
|
|
(substitute* "cdhit-common.c++"
|
|
(("__DATE__") "\"0\"")
|
|
(("\", %s, \" __TIME__ \"\\\\n\", date") ""))
|
|
#t))
|
|
;; The "install" target does not create the target directory
|
|
(add-before 'install 'create-target-dir
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
|
|
#t)))))
|
|
(inputs
|
|
`(("perl" ,perl)))
|
|
(home-page "http://weizhongli-lab.org/cd-hit/")
|
|
(synopsis "Cluster and compare protein or nucleotide sequences")
|
|
(description
|
|
"CD-HIT is a program for clustering and comparing protein or nucleotide
|
|
sequences. CD-HIT is designed to be fast and handle extremely large
|
|
databases.")
|
|
;; The manual says: "It can be copied under the GNU General Public License
|
|
;; version 2 (GPLv2)."
|
|
(license license:gpl2)))
|
|
|
|
(define-public clipper
|
|
(package
|
|
(name "clipper")
|
|
(version "1.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/YeoLab/clipper/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0pflmsvhbf8izbgwhbhj1i7349sw1f55qpqj8ljmapp16hb0p0qi"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; remove unnecessary setup dependency
|
|
(substitute* "setup.py"
|
|
(("setup_requires = .*") ""))
|
|
(for-each delete-file
|
|
'("clipper/src/peaks.so"
|
|
"clipper/src/readsToWiggle.so"))
|
|
(delete-file-recursively "dist/")
|
|
#t))))
|
|
(build-system python-build-system)
|
|
(arguments `(#:python ,python-2)) ; only Python 2 is supported
|
|
(inputs
|
|
`(("htseq" ,htseq)
|
|
("python-pybedtools" ,python2-pybedtools)
|
|
("python-cython" ,python2-cython)
|
|
("python-scikit-learn" ,python2-scikit-learn)
|
|
("python-matplotlib" ,python2-matplotlib)
|
|
("python-pandas" ,python2-pandas)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-scipy" ,python2-scipy)))
|
|
(native-inputs
|
|
`(("python-mock" ,python2-mock) ; for tests
|
|
("python-pytz" ,python2-pytz))) ; for tests
|
|
(home-page "https://github.com/YeoLab/clipper")
|
|
(synopsis "CLIP peak enrichment recognition")
|
|
(description
|
|
"CLIPper is a tool to define peaks in CLIP-seq datasets.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public codingquarry
|
|
(package
|
|
(name "codingquarry")
|
|
(version "2.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"mirror://sourceforge/codingquarry/CodingQuarry_v"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ; no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(doc (string-append out "/share/doc/codingquarry")))
|
|
(install-file "INSTRUCTIONS.pdf" doc)
|
|
(copy-recursively "QuarryFiles"
|
|
(string-append out "/QuarryFiles"))
|
|
(install-file "CodingQuarry" bin)
|
|
(install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin)))))))
|
|
(inputs `(("openmpi" ,openmpi)))
|
|
(native-search-paths
|
|
(list (search-path-specification
|
|
(variable "QUARRY_PATH")
|
|
(files '("QuarryFiles")))))
|
|
(native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
|
|
(synopsis "Fungal gene predictor")
|
|
(description "CodingQuarry is a highly accurate, self-training GHMM fungal
|
|
gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
|
|
(home-page "https://sourceforge.net/projects/codingquarry/")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public couger
|
|
(package
|
|
(name "couger")
|
|
(version "1.8.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://couger.oit.duke.edu/static/assets/COUGER"
|
|
version ".zip"))
|
|
(sha256
|
|
(base32
|
|
"04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(delete 'build)
|
|
(replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out")))
|
|
(copy-recursively "src" (string-append out "/src"))
|
|
(mkdir (string-append out "/bin"))
|
|
;; Add "src" directory to module lookup path.
|
|
(substitute* "couger"
|
|
(("from argparse")
|
|
(string-append "import sys\nsys.path.append(\""
|
|
out "\")\nfrom argparse")))
|
|
(copy-file "couger" (string-append out "/bin/couger")))
|
|
#t))
|
|
(add-after
|
|
'install 'wrap-program
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; Make sure 'couger' runs with the correct PYTHONPATH.
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(path (getenv "PYTHONPATH")))
|
|
(wrap-program (string-append out "/bin/couger")
|
|
`("PYTHONPATH" ":" prefix (,path))))
|
|
#t)))))
|
|
(inputs
|
|
`(("python" ,python-2)
|
|
("python2-pillow" ,python2-pillow)
|
|
("python2-numpy" ,python2-numpy)
|
|
("python2-scipy" ,python2-scipy)
|
|
("python2-matplotlib" ,python2-matplotlib)))
|
|
(propagated-inputs
|
|
`(("r" ,r)
|
|
("libsvm" ,libsvm)
|
|
("randomjungle" ,randomjungle)))
|
|
(native-inputs
|
|
`(("unzip" ,unzip)))
|
|
(home-page "http://couger.oit.duke.edu")
|
|
(synopsis "Identify co-factors in sets of genomic regions")
|
|
(description
|
|
"COUGER can be applied to any two sets of genomic regions bound by
|
|
paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
|
|
putative co-factors that provide specificity to each TF. The framework
|
|
determines the genomic targets uniquely-bound by each TF, and identifies a
|
|
small set of co-factors that best explain the in vivo binding differences
|
|
between the two TFs.
|
|
|
|
COUGER uses classification algorithms (support vector machines and random
|
|
forests) with features that reflect the DNA binding specificities of putative
|
|
co-factors. The features are generated either from high-throughput TF-DNA
|
|
binding data (from protein binding microarray experiments), or from large
|
|
collections of DNA motifs.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public clustal-omega
|
|
(package
|
|
(name "clustal-omega")
|
|
(version "1.2.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://www.clustal.org/omega/clustal-omega-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"02ibkx0m0iwz8nscg998bh41gg251y56cgh86bvyrii5m8kjgwqf"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("argtable" ,argtable)))
|
|
(home-page "http://www.clustal.org/omega/")
|
|
(synopsis "Multiple sequence aligner for protein and DNA/RNA")
|
|
(description
|
|
"Clustal-Omega is a general purpose multiple sequence alignment (MSA)
|
|
program for protein and DNA/RNA. It produces high quality MSAs and is capable
|
|
of handling data-sets of hundreds of thousands of sequences in reasonable
|
|
time.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public crossmap
|
|
(package
|
|
(name "crossmap")
|
|
(version "0.2.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/crossmap/CrossMap-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"07y179f63d7qnzdvkqcziwk9bs3k4zhp81q392fp1hwszjdvy22f"))
|
|
;; This patch has been sent upstream already and is available
|
|
;; for download from Sourceforge, but it has not been merged.
|
|
(patches (search-patches "crossmap-allow-system-pysam.patch"))
|
|
(modules '((guix build utils)))
|
|
;; remove bundled copy of pysam
|
|
(snippet
|
|
'(delete-file-recursively "lib/pysam"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'set-env
|
|
(lambda _ (setenv "CROSSMAP_USE_SYSTEM_PYSAM" "1"))
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)
|
|
("python-pysam" ,python2-pysam)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-cython" ,python2-cython)
|
|
("python-nose" ,python2-nose)))
|
|
(home-page "http://crossmap.sourceforge.net/")
|
|
(synopsis "Convert genome coordinates between assemblies")
|
|
(description
|
|
"CrossMap is a program for conversion of genome coordinates or annotation
|
|
files between different genome assemblies. It supports most commonly used
|
|
file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public cufflinks
|
|
(package
|
|
(name "cufflinks")
|
|
(version "2.2.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "http://cole-trapnell-lab.github.io/"
|
|
"cufflinks/assets/downloads/cufflinks-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1bnm10p8m7zq4qiipjhjqb24csiqdm1pwc8c795z253r2xk6ncg8"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:make-flags
|
|
(list
|
|
;; The includes for "eigen" are located in a subdirectory.
|
|
(string-append "EIGEN_CPPFLAGS="
|
|
"-I" (assoc-ref %build-inputs "eigen")
|
|
"/include/eigen3/")
|
|
;; Cufflinks must be linked with various boost libraries.
|
|
(string-append "LDFLAGS="
|
|
(string-join '("-lboost_system"
|
|
"-lboost_serialization"
|
|
"-lboost_thread"))))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'fix-search-for-bam
|
|
(lambda _
|
|
(substitute* '("ax_bam.m4"
|
|
"configure"
|
|
"src/hits.h")
|
|
(("<bam/sam\\.h>") "<samtools/sam.h>")
|
|
(("<bam/bam\\.h>") "<samtools/bam.h>")
|
|
(("<bam/version\\.hpp>") "<samtools/version.h>"))
|
|
#t)))
|
|
#:configure-flags
|
|
(list (string-append "--with-bam="
|
|
(assoc-ref %build-inputs "samtools")))))
|
|
(inputs
|
|
`(("eigen" ,eigen)
|
|
("samtools" ,samtools-0.1)
|
|
("htslib" ,htslib)
|
|
("boost" ,boost)
|
|
("python" ,python-2)
|
|
("zlib" ,zlib)))
|
|
(home-page "http://cole-trapnell-lab.github.io/cufflinks/")
|
|
(synopsis "Transcriptome assembly and RNA-Seq expression analysis")
|
|
(description
|
|
"Cufflinks assembles RNA transcripts, estimates their abundances,
|
|
and tests for differential expression and regulation in RNA-Seq
|
|
samples. It accepts aligned RNA-Seq reads and assembles the
|
|
alignments into a parsimonious set of transcripts. Cufflinks then
|
|
estimates the relative abundances of these transcripts based on how
|
|
many reads support each one, taking into account biases in library
|
|
preparation protocols.")
|
|
(license license:boost1.0)))
|
|
|
|
(define-public cutadapt
|
|
(package
|
|
(name "cutadapt")
|
|
(version "1.12")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/marcelm/cutadapt/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"19smhh6444ikn4jlmyhvffw4m5aw7yg07rqsk7arg8dkwyga1i4v"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
;; The tests must be run after installation.
|
|
(delete 'check)
|
|
(add-after 'install 'check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(setenv "PYTHONPATH"
|
|
(string-append
|
|
(getenv "PYTHONPATH")
|
|
":" (assoc-ref outputs "out")
|
|
"/lib/python"
|
|
(string-take (string-take-right
|
|
(assoc-ref inputs "python") 5) 3)
|
|
"/site-packages"))
|
|
(zero? (system* "nosetests" "-P" "tests")))))))
|
|
(inputs
|
|
`(("python-xopen" ,python-xopen)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-nose" ,python-nose)))
|
|
(home-page "https://cutadapt.readthedocs.io/en/stable/")
|
|
(synopsis "Remove adapter sequences from nucleotide sequencing reads")
|
|
(description
|
|
"Cutadapt finds and removes adapter sequences, primers, poly-A tails and
|
|
other types of unwanted sequence from high-throughput sequencing reads.")
|
|
(license license:expat)))
|
|
|
|
(define-public libbigwig
|
|
(package
|
|
(name "libbigwig")
|
|
(version "0.1.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/dpryan79/libBigWig/"
|
|
"archive/" version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"098rjh35pi4a9q83n8wiwvyzykjqj6l8q189p1xgfw4ghywdlvw1"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:test-target "test"
|
|
#:make-flags
|
|
(list "CC=gcc"
|
|
(string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(add-before 'check 'disable-curl-test
|
|
(lambda _
|
|
(substitute* "Makefile"
|
|
(("./test/testRemote.*") ""))
|
|
#t))
|
|
;; This has been fixed with the upstream commit 4ff6959cd8a0, but
|
|
;; there has not yet been a release containing this change.
|
|
(add-before 'install 'create-target-dirs
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out")))
|
|
(mkdir-p (string-append out "/lib"))
|
|
(mkdir-p (string-append out "/include"))
|
|
#t))))))
|
|
(inputs
|
|
`(("zlib" ,zlib)
|
|
("curl" ,curl)))
|
|
(native-inputs
|
|
`(("doxygen" ,doxygen)))
|
|
(home-page "https://github.com/dpryan79/libBigWig")
|
|
(synopsis "C library for handling bigWig files")
|
|
(description
|
|
"This package provides a C library for parsing local and remote BigWig
|
|
files.")
|
|
(license license:expat)))
|
|
|
|
(define-public python-pybigwig
|
|
(package
|
|
(name "python-pybigwig")
|
|
(version "0.2.5")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "pyBigWig" version))
|
|
(sha256
|
|
(base32
|
|
"0yrpdxg3y0sny25x4w22lv1k47jzccqjmg7j4bp0hywklvp0hg7d"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Delete bundled libBigWig sources
|
|
(delete-file-recursively "libBigWig")))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'link-with-libBigWig
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "setup.py"
|
|
(("libs=\\[") "libs=[\"BigWig\", "))
|
|
#t)))))
|
|
(inputs
|
|
`(("libbigwig" ,libbigwig)
|
|
("zlib" ,zlib)
|
|
("curl" ,curl)))
|
|
(home-page "https://github.com/dpryan79/pyBigWig")
|
|
(synopsis "Access bigWig files in Python using libBigWig")
|
|
(description
|
|
"This package provides Python bindings to the libBigWig library for
|
|
accessing bigWig files.")
|
|
(license license:expat)))
|
|
|
|
(define-public python2-pybigwig
|
|
(package-with-python2 python-pybigwig))
|
|
|
|
(define-public python-dendropy
|
|
(package
|
|
(name "python-dendropy")
|
|
(version "4.2.0")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "DendroPy" version))
|
|
(sha256
|
|
(base32
|
|
"15c7s3d5gf19ljsxvq5advaa752wfi7pwrdjyhzmg85hccyvp47p"))))
|
|
(build-system python-build-system)
|
|
(home-page "http://packages.python.org/DendroPy/")
|
|
(synopsis "Library for phylogenetics and phylogenetic computing")
|
|
(description
|
|
"DendroPy is a library for phylogenetics and phylogenetic computing: reading,
|
|
writing, simulation, processing and manipulation of phylogenetic
|
|
trees (phylogenies) and characters.")
|
|
(license license:bsd-3)
|
|
(properties `((python2-variant . ,(delay python2-dendropy))))))
|
|
|
|
(define-public python2-dendropy
|
|
(let ((base (package-with-python2 (strip-python2-variant python-dendropy))))
|
|
(package
|
|
(inherit base)
|
|
(arguments
|
|
`(#:python ,python-2
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(replace 'check
|
|
;; There is currently a test failure that only happens on some
|
|
;; systems, and only using "setup.py test"
|
|
(lambda _ (zero? (system* "nosetests")))))))
|
|
(native-inputs `(("python2-nose" ,python2-nose)
|
|
,@(package-native-inputs base))))))
|
|
|
|
|
|
(define-public deeptools
|
|
(package
|
|
(name "deeptools")
|
|
(version "2.1.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/fidelram/deepTools/"
|
|
"archive/" version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1nmfin0zjdby3vay3r4flvz94dr6qjhj41ax4yz3vx13j6wz8izd"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2))
|
|
(inputs
|
|
`(("python-scipy" ,python2-scipy)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-numpydoc" ,python2-numpydoc)
|
|
("python-matplotlib" ,python2-matplotlib)
|
|
("python-bx-python" ,python2-bx-python)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-pybigwig" ,python2-pybigwig)))
|
|
(native-inputs
|
|
`(("python-mock" ,python2-mock) ;for tests
|
|
("python-pytz" ,python2-pytz))) ;for tests
|
|
(home-page "https://github.com/fidelram/deepTools")
|
|
(synopsis "Tools for normalizing and visualizing deep-sequencing data")
|
|
(description
|
|
"DeepTools addresses the challenge of handling the large amounts of data
|
|
that are now routinely generated from DNA sequencing centers. To do so,
|
|
deepTools contains useful modules to process the mapped reads data to create
|
|
coverage files in standard bedGraph and bigWig file formats. By doing so,
|
|
deepTools allows the creation of normalized coverage files or the comparison
|
|
between two files (for example, treatment and control). Finally, using such
|
|
normalized and standardized files, multiple visualizations can be created to
|
|
identify enrichments with functional annotations of the genome.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public diamond
|
|
(package
|
|
(name "diamond")
|
|
(version "0.8.31")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/bbuchfink/diamond/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0nh79f4rpgq8vmlga743r7vd0z0ik6spy34f7vfq0v9lcmvfr7xq"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
'(#:tests? #f ; no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'remove-native-compilation
|
|
(lambda _
|
|
(substitute* "CMakeLists.txt" (("-march=native") ""))
|
|
#t)))))
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(home-page "https://github.com/bbuchfink/diamond")
|
|
(synopsis "Accelerated BLAST compatible local sequence aligner")
|
|
(description
|
|
"DIAMOND is a BLAST-compatible local aligner for mapping protein and
|
|
translated DNA query sequences against a protein reference database (BLASTP
|
|
and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
|
|
reads at a typical sensitivity of 90-99% relative to BLAST depending on the
|
|
data and settings.")
|
|
;; diamond fails to build on other platforms
|
|
;; https://github.com/bbuchfink/diamond/issues/18
|
|
(supported-systems '("x86_64-linux"))
|
|
(license (license:non-copyleft "file://src/COPYING"
|
|
"See src/COPYING in the distribution."))))
|
|
|
|
(define-public discrover
|
|
(package
|
|
(name "discrover")
|
|
(version "1.6.0")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/maaskola/discrover/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0rah9ja4m0rl5mldd6vag9rwrivw1zrqxssfq8qx64m7961fp68k"))))
|
|
(build-system cmake-build-system)
|
|
(arguments `(#:tests? #f)) ; there are no tests
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("cairo" ,cairo)))
|
|
(native-inputs
|
|
`(("texlive" ,texlive)
|
|
("imagemagick" ,imagemagick)))
|
|
(home-page "http://dorina.mdc-berlin.de/public/rajewsky/discrover/")
|
|
(synopsis "Discover discriminative nucleotide sequence motifs")
|
|
(description "Discrover is a motif discovery method to find binding sites
|
|
of nucleic acid binding proteins.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public eigensoft
|
|
(let ((revision "1")
|
|
(commit "b14d1e202e21e532536ff8004f0419cd5e259dc7"))
|
|
(package
|
|
(name "eigensoft")
|
|
(version (string-append "6.1.2-"
|
|
revision "."
|
|
(string-take commit 9)))
|
|
(source
|
|
(origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/DReichLab/EIG.git")
|
|
(commit commit)))
|
|
(file-name (string-append "eigensoft-" commit "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"0f5m6k2j5c16xc3xbywcs989xyc26ncy1zfzp9j9n55n9r4xcaiq"))
|
|
(modules '((guix build utils)))
|
|
;; Remove pre-built binaries.
|
|
(snippet '(begin
|
|
(delete-file-recursively "bin")
|
|
(mkdir "bin")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; There are no tests.
|
|
#:make-flags '("CC=gcc")
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
;; There is no configure phase, but the Makefile is in a
|
|
;; sub-directory.
|
|
(replace 'configure
|
|
(lambda _
|
|
(chdir "src")
|
|
;; The link flags are incomplete.
|
|
(substitute* "Makefile"
|
|
(("-lgsl") "-lgsl -lm -llapack -llapacke -lpthread"))
|
|
#t))
|
|
;; The provided install target only copies executables to
|
|
;; the "bin" directory in the build root.
|
|
(add-after 'install 'actually-install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin")))
|
|
(for-each (lambda (file)
|
|
(install-file file bin))
|
|
(find-files "../bin" ".*"))
|
|
#t))))))
|
|
(inputs
|
|
`(("gsl" ,gsl)
|
|
("lapack" ,lapack)
|
|
("openblas" ,openblas)
|
|
("perl" ,perl)
|
|
("gfortran" ,gfortran "lib")))
|
|
(home-page "https://github.com/DReichLab/EIG")
|
|
(synopsis "Tools for population genetics")
|
|
(description "The EIGENSOFT package provides tools for population
|
|
genetics and stratification correction. EIGENSOFT implements methods commonly
|
|
used in population genetics analyses such as PCA, computation of Tracy-Widom
|
|
statistics, and finding related individuals in structured populations. It
|
|
comes with a built-in plotting script and supports multiple file formats and
|
|
quantitative phenotypes.")
|
|
;; The license of the eigensoft tools is Expat, but since it's
|
|
;; linking with the GNU Scientific Library (GSL) the effective
|
|
;; license is the GPL.
|
|
(license license:gpl3+))))
|
|
|
|
(define-public edirect
|
|
(package
|
|
(name "edirect")
|
|
(version "4.10")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/"
|
|
"versions/2016-05-03/edirect.tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"15zsprak5yh8c1yrz4r1knmb5s8qcmdid4xdhkh3lqcv64l60hli"))))
|
|
(build-system perl-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(delete 'build)
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((target (string-append (assoc-ref outputs "out")
|
|
"/bin")))
|
|
(mkdir-p target)
|
|
(copy-file "edirect.pl"
|
|
(string-append target "/edirect.pl"))
|
|
#t)))
|
|
(add-after
|
|
'install 'wrap-program
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; Make sure 'edirect.pl' finds all perl inputs at runtime.
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(path (getenv "PERL5LIB")))
|
|
(wrap-program (string-append out "/bin/edirect.pl")
|
|
`("PERL5LIB" ":" prefix (,path)))))))))
|
|
(inputs
|
|
`(("perl-html-parser" ,perl-html-parser)
|
|
("perl-encode-locale" ,perl-encode-locale)
|
|
("perl-file-listing" ,perl-file-listing)
|
|
("perl-html-tagset" ,perl-html-tagset)
|
|
("perl-html-tree" ,perl-html-tree)
|
|
("perl-http-cookies" ,perl-http-cookies)
|
|
("perl-http-date" ,perl-http-date)
|
|
("perl-http-message" ,perl-http-message)
|
|
("perl-http-negotiate" ,perl-http-negotiate)
|
|
("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
|
|
("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
|
|
("perl-net-http" ,perl-net-http)
|
|
("perl-uri" ,perl-uri)
|
|
("perl-www-robotrules" ,perl-www-robotrules)
|
|
("perl" ,perl)))
|
|
(home-page "http://www.ncbi.nlm.nih.gov/books/NBK179288/")
|
|
(synopsis "Tools for accessing the NCBI's set of databases")
|
|
(description
|
|
"Entrez Direct (EDirect) is a method for accessing the National Center
|
|
for Biotechnology Information's (NCBI) set of interconnected
|
|
databases (publication, sequence, structure, gene, variation, expression,
|
|
etc.) from a terminal. Functions take search terms from command-line
|
|
arguments. Individual operations are combined to build multi-step queries.
|
|
Record retrieval and formatting normally complete the process.
|
|
|
|
EDirect also provides an argument-driven function that simplifies the
|
|
extraction of data from document summaries or other results that are returned
|
|
in structured XML format. This can eliminate the need for writing custom
|
|
software to answer ad hoc questions.")
|
|
(license license:public-domain)))
|
|
|
|
(define-public exonerate
|
|
(package
|
|
(name "exonerate")
|
|
(version "2.4.0")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append
|
|
"http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
|
|
"exonerate-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f)) ; Building in parallel fails on some machines.
|
|
(native-inputs
|
|
`(("pkg-config" ,pkg-config)))
|
|
(inputs
|
|
`(("glib" ,glib)))
|
|
(home-page
|
|
"https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
|
|
(synopsis "Generic tool for biological sequence alignment")
|
|
(description
|
|
"Exonerate is a generic tool for pairwise sequence comparison. It allows
|
|
the alignment of sequences using a many alignment models, either exhaustive
|
|
dynamic programming or a variety of heuristics.")
|
|
(license license:gpl3)))
|
|
|
|
(define-public express
|
|
(package
|
|
(name "express")
|
|
(version "1.5.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append
|
|
"http://bio.math.berkeley.edu/eXpress/downloads/express-"
|
|
version "/express-" version "-src.tgz"))
|
|
(sha256
|
|
(base32
|
|
"03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'use-shared-boost-libs-and-set-bamtools-paths
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "CMakeLists.txt"
|
|
(("set\\(Boost_USE_STATIC_LIBS ON\\)")
|
|
"set(Boost_USE_STATIC_LIBS OFF)")
|
|
(("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
|
|
(string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
|
|
(substitute* "src/CMakeLists.txt"
|
|
(("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
|
|
(string-append (assoc-ref inputs "bamtools") "/lib/bamtools")))
|
|
#t)
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("bamtools" ,bamtools)
|
|
("protobuf" ,protobuf)
|
|
("zlib" ,zlib)))
|
|
(home-page "http://bio.math.berkeley.edu/eXpress")
|
|
(synopsis "Streaming quantification for high-throughput genomic sequencing")
|
|
(description
|
|
"eXpress is a streaming tool for quantifying the abundances of a set of
|
|
target sequences from sampled subsequences. Example applications include
|
|
transcript-level RNA-Seq quantification, allele-specific/haplotype expression
|
|
analysis (from RNA-Seq), transcription factor binding quantification in
|
|
ChIP-Seq, and analysis of metagenomic data.")
|
|
(license license:artistic2.0)))
|
|
|
|
(define-public express-beta-diversity
|
|
(package
|
|
(name "express-beta-diversity")
|
|
(version "1.0.7")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append
|
|
"https://github.com/dparks1134/ExpressBetaDiversity/archive/v"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1djvdlmqvjf6h0zq7w36y8cl5cli6rgj86x65znl48agnwmzxfxr"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(add-before 'build 'enter-source (lambda _ (chdir "source") #t))
|
|
(replace 'check
|
|
(lambda _ (zero? (system* "../bin/ExpressBetaDiversity"
|
|
"-u"))))
|
|
(add-after 'check 'exit-source (lambda _ (chdir "..") #t))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out")
|
|
"/bin")))
|
|
(mkdir-p bin)
|
|
(copy-file "scripts/convertToEBD.py"
|
|
(string-append bin "/convertToEBD.py"))
|
|
(copy-file "bin/ExpressBetaDiversity"
|
|
(string-append bin "/ExpressBetaDiversity"))
|
|
#t))))))
|
|
(inputs
|
|
`(("python" ,python-2)))
|
|
(home-page "http://kiwi.cs.dal.ca/Software/ExpressBetaDiversity")
|
|
(synopsis "Taxon- and phylogenetic-based beta diversity measures")
|
|
(description
|
|
"Express Beta Diversity (EBD) calculates ecological beta diversity
|
|
(dissimilarity) measures between biological communities. EBD implements a
|
|
variety of diversity measures including those that make use of phylogenetic
|
|
similarity of community members.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public fasttree
|
|
(package
|
|
(name "fasttree")
|
|
(version "2.1.9")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://www.microbesonline.org/fasttree/FastTree-"
|
|
version ".c"))
|
|
(sha256
|
|
(base32
|
|
"0ljvvw8i1als1wbfzvrf15c3ii2vw9db20a259g6pzg34xyyb97k"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'unpack)
|
|
(delete 'configure)
|
|
(replace 'build
|
|
(lambda* (#:key source #:allow-other-keys)
|
|
(and (zero? (system* "gcc"
|
|
"-O3"
|
|
"-finline-functions"
|
|
"-funroll-loops"
|
|
"-Wall"
|
|
"-o"
|
|
"FastTree"
|
|
source
|
|
"-lm"))
|
|
(zero? (system* "gcc"
|
|
"-DOPENMP"
|
|
"-fopenmp"
|
|
"-O3"
|
|
"-finline-functions"
|
|
"-funroll-loops"
|
|
"-Wall"
|
|
"-o"
|
|
"FastTreeMP"
|
|
source
|
|
"-lm")))))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out")
|
|
"/bin")))
|
|
(mkdir-p bin)
|
|
(copy-file "FastTree"
|
|
(string-append bin "/FastTree"))
|
|
(copy-file "FastTreeMP"
|
|
(string-append bin "/FastTreeMP"))
|
|
#t))))))
|
|
(home-page "http://www.microbesonline.org/fasttree")
|
|
(synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
|
|
(description
|
|
"FastTree can handle alignments with up to a million of sequences in a
|
|
reasonable amount of time and memory. For large alignments, FastTree is
|
|
100-1,000 times faster than PhyML 3.0 or RAxML 7.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public fastx-toolkit
|
|
(package
|
|
(name "fastx-toolkit")
|
|
(version "0.0.14")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append
|
|
"https://github.com/agordon/fastx_toolkit/releases/download/"
|
|
version "/fastx_toolkit-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("libgtextutils" ,libgtextutils)))
|
|
(native-inputs
|
|
`(("pkg-config" ,pkg-config)))
|
|
(home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
|
|
(synopsis "Tools for FASTA/FASTQ file preprocessing")
|
|
(description
|
|
"The FASTX-Toolkit is a collection of command line tools for Short-Reads
|
|
FASTA/FASTQ files preprocessing.
|
|
|
|
Next-Generation sequencing machines usually produce FASTA or FASTQ files,
|
|
containing multiple short-reads sequences. The main processing of such
|
|
FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
|
|
is sometimes more productive to preprocess the files before mapping the
|
|
sequences to the genome---manipulating the sequences to produce better mapping
|
|
results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
|
|
(license license:agpl3+)))
|
|
|
|
(define-public flexbar
|
|
(package
|
|
(name "flexbar")
|
|
(version "2.5")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append "mirror://sourceforge/flexbar/"
|
|
version "/flexbar_v" version "_src.tgz"))
|
|
(sha256
|
|
(base32
|
|
"13jaykc3y1x8y5nn9j8ljnb79s5y51kyxz46hdmvvjj6qhyympmf"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:configure-flags (list
|
|
(string-append "-DFLEXBAR_BINARY_DIR="
|
|
(assoc-ref %outputs "out")
|
|
"/bin/"))
|
|
#:phases
|
|
(alist-replace
|
|
'check
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(setenv "PATH" (string-append
|
|
(assoc-ref outputs "out") "/bin:"
|
|
(getenv "PATH")))
|
|
(chdir "../flexbar_v2.5_src/test")
|
|
(zero? (system* "bash" "flexbar_validate.sh")))
|
|
(alist-delete 'install %standard-phases))))
|
|
(inputs
|
|
`(("tbb" ,tbb)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("pkg-config" ,pkg-config)
|
|
("seqan" ,seqan)))
|
|
(home-page "http://flexbar.sourceforge.net")
|
|
(synopsis "Barcode and adapter removal tool for sequencing platforms")
|
|
(description
|
|
"Flexbar preprocesses high-throughput nucleotide sequencing data
|
|
efficiently. It demultiplexes barcoded runs and removes adapter sequences.
|
|
Moreover, trimming and filtering features are provided. Flexbar increases
|
|
read mapping rates and improves genome and transcriptome assemblies. It
|
|
supports next-generation sequencing data in fasta/q and csfasta/q format from
|
|
Illumina, Roche 454, and the SOLiD platform.")
|
|
(license license:gpl3)))
|
|
|
|
(define-public fraggenescan
|
|
(package
|
|
(name "fraggenescan")
|
|
(version "1.20")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri
|
|
(string-append "mirror://sourceforge/fraggenescan/"
|
|
"FragGeneScan" version ".tar.gz"))
|
|
(sha256
|
|
(base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(add-before 'build 'patch-paths
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (string-append (assoc-ref outputs "out")))
|
|
(share (string-append out "/share/fraggenescan/")))
|
|
(substitute* "run_FragGeneScan.pl"
|
|
(("system\\(\"rm")
|
|
(string-append "system(\"" (which "rm")))
|
|
(("system\\(\"mv")
|
|
(string-append "system(\"" (which "mv")))
|
|
;; This script and other programs expect the training files
|
|
;; to be in the non-standard location bin/train/XXX. Change
|
|
;; this to be share/fraggenescan/train/XXX instead.
|
|
(("^\\$train.file = \\$dir.*")
|
|
(string-append "$train_file = \""
|
|
share
|
|
"train/\".$FGS_train_file;")))
|
|
(substitute* "run_hmm.c"
|
|
(("^ strcat\\(train_dir, \\\"train/\\\"\\);")
|
|
(string-append " strcpy(train_dir, \"" share "/train/\");")))
|
|
(substitute* "post_process.pl"
|
|
(("^my \\$dir = substr.*")
|
|
(string-append "my $dir = \"" share "\";"))))
|
|
#t))
|
|
(replace 'build
|
|
(lambda _ (and (zero? (system* "make" "clean"))
|
|
(zero? (system* "make" "fgs")))))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (string-append (assoc-ref outputs "out")))
|
|
(bin (string-append out "/bin/"))
|
|
(share (string-append out "/share/fraggenescan/train")))
|
|
(install-file "run_FragGeneScan.pl" bin)
|
|
(install-file "FragGeneScan" bin)
|
|
(install-file "FGS_gff.py" bin)
|
|
(install-file "post_process.pl" bin)
|
|
(copy-recursively "train" share))))
|
|
(delete 'check)
|
|
(add-after 'install 'post-install-check
|
|
;; In lieu of 'make check', run one of the examples and check the
|
|
;; output files gets created.
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (string-append (assoc-ref outputs "out")))
|
|
(bin (string-append out "/bin/")))
|
|
(and (zero? (system* (string-append bin "run_FragGeneScan.pl")
|
|
"-genome=./example/NC_000913.fna"
|
|
"-out=./test2"
|
|
"-complete=1"
|
|
"-train=complete"))
|
|
(file-exists? "test2.faa")
|
|
(file-exists? "test2.ffn")
|
|
(file-exists? "test2.gff")
|
|
(file-exists? "test2.out"))))))))
|
|
(inputs
|
|
`(("perl" ,perl)
|
|
("python" ,python-2))) ;not compatible with python 3.
|
|
(home-page "https://sourceforge.net/projects/fraggenescan/")
|
|
(synopsis "Finds potentially fragmented genes in short reads")
|
|
(description
|
|
"FragGeneScan is a program for predicting bacterial and archaeal genes in
|
|
short and error-prone DNA sequencing reads. It can also be applied to predict
|
|
genes in incomplete assemblies or complete genomes.")
|
|
;; GPL3+ according to private correspondense with the authors.
|
|
(license license:gpl3+)))
|
|
|
|
(define-public fxtract
|
|
(let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
|
|
(package
|
|
(name "fxtract")
|
|
(version "2.3")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/ctSkennerton/fxtract/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append "ctstennerton-util-"
|
|
(string-take util-commit 7)
|
|
"-checkout"))
|
|
(sha256
|
|
(base32
|
|
"0275cfdhis8517hm01is62062swmi06fxzifq7mr3knbbxjlaiwj"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:make-flags (list
|
|
(string-append "PREFIX=" (assoc-ref %outputs "out"))
|
|
"CC=gcc")
|
|
#:test-target "fxtract_test"
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(add-before 'build 'copy-util
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(rmdir "util")
|
|
(copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
|
|
#t))
|
|
;; Do not use make install as this requires additional dependencies.
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out"/bin")))
|
|
(install-file "fxtract" bin)
|
|
#t))))))
|
|
(inputs
|
|
`(("pcre" ,pcre)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
;; ctskennerton-util is licensed under GPL2.
|
|
`(("ctskennerton-util"
|
|
,(origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/ctSkennerton/util.git")
|
|
(commit util-commit)))
|
|
(file-name (string-append
|
|
"ctstennerton-util-" util-commit "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
|
|
(home-page "https://github.com/ctSkennerton/fxtract")
|
|
(synopsis "Extract sequences from FASTA and FASTQ files")
|
|
(description
|
|
"Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
|
|
or FASTQ) file given a subsequence. It uses a simple substring search for
|
|
basic tasks but can change to using POSIX regular expressions, PCRE, hash
|
|
lookups or multi-pattern searching as required. By default fxtract looks in
|
|
the sequence of each record but can also be told to look in the header,
|
|
comment or quality sections.")
|
|
;; 'util' requires SSE instructions.
|
|
(supported-systems '("x86_64-linux"))
|
|
(license license:expat))))
|
|
|
|
(define-public grit
|
|
(package
|
|
(name "grit")
|
|
(version "2.0.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/nboley/grit/archive/"
|
|
version ".tar.gz"))
|
|
(file-name (string-append name "-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"157in84dj70wimbind3x7sy1whs3h57qfgcnj2s6lrd38fbrb7mj"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2
|
|
#:phases
|
|
(alist-cons-after
|
|
'unpack 'generate-from-cython-sources
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; Delete these C files to force fresh generation from pyx sources.
|
|
(delete-file "grit/sparsify_support_fns.c")
|
|
(delete-file "grit/call_peaks_support_fns.c")
|
|
(substitute* "setup.py"
|
|
(("Cython.Setup") "Cython.Build")
|
|
;; Add numpy include path to fix compilation
|
|
(("pyx\", \\]")
|
|
(string-append "pyx\", ], include_dirs = ['"
|
|
(assoc-ref inputs "python-numpy")
|
|
"/lib/python2.7/site-packages/numpy/core/include/"
|
|
"']"))) #t)
|
|
%standard-phases)))
|
|
(inputs
|
|
`(("python-scipy" ,python2-scipy)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-networkx" ,python2-networkx) |