You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
16035 lines
635 KiB
16035 lines
635 KiB
;;; GNU Guix --- Functional package management for GNU
|
|
;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
|
|
;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
|
|
;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
|
|
;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
|
|
;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org>
|
|
;;; Copyright © 2016, 2017, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
|
|
;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
|
|
;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
|
|
;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
|
|
;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
|
|
;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
|
|
;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
|
|
;;; Copyright © 2018, 2019 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
|
|
;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
|
|
;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
|
|
;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
|
|
;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
|
|
;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
|
|
;;;
|
|
;;; This file is part of GNU Guix.
|
|
;;;
|
|
;;; GNU Guix is free software; you can redistribute it and/or modify it
|
|
;;; under the terms of the GNU General Public License as published by
|
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
|
;;; your option) any later version.
|
|
;;;
|
|
;;; GNU Guix is distributed in the hope that it will be useful, but
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;;; GNU General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
(define-module (gnu packages bioinformatics)
|
|
#:use-module ((guix licenses) #:prefix license:)
|
|
#:use-module (guix packages)
|
|
#:use-module (guix utils)
|
|
#:use-module (guix download)
|
|
#:use-module (guix git-download)
|
|
#:use-module (guix hg-download)
|
|
#:use-module (guix build-system ant)
|
|
#:use-module (guix build-system gnu)
|
|
#:use-module (guix build-system cmake)
|
|
#:use-module (guix build-system go)
|
|
#:use-module (guix build-system haskell)
|
|
#:use-module (guix build-system meson)
|
|
#:use-module (guix build-system ocaml)
|
|
#:use-module (guix build-system perl)
|
|
#:use-module (guix build-system python)
|
|
#:use-module (guix build-system r)
|
|
#:use-module (guix build-system ruby)
|
|
#:use-module (guix build-system scons)
|
|
#:use-module (guix build-system trivial)
|
|
#:use-module (gnu packages)
|
|
#:use-module (gnu packages autotools)
|
|
#:use-module (gnu packages algebra)
|
|
#:use-module (gnu packages base)
|
|
#:use-module (gnu packages bash)
|
|
#:use-module (gnu packages bison)
|
|
#:use-module (gnu packages bioconductor)
|
|
#:use-module (gnu packages boost)
|
|
#:use-module (gnu packages check)
|
|
#:use-module (gnu packages code)
|
|
#:use-module (gnu packages compression)
|
|
#:use-module (gnu packages cpio)
|
|
#:use-module (gnu packages cran)
|
|
#:use-module (gnu packages curl)
|
|
#:use-module (gnu packages documentation)
|
|
#:use-module (gnu packages databases)
|
|
#:use-module (gnu packages datastructures)
|
|
#:use-module (gnu packages dlang)
|
|
#:use-module (gnu packages file)
|
|
#:use-module (gnu packages flex)
|
|
#:use-module (gnu packages gawk)
|
|
#:use-module (gnu packages gcc)
|
|
#:use-module (gnu packages gd)
|
|
#:use-module (gnu packages golang)
|
|
#:use-module (gnu packages glib)
|
|
#:use-module (gnu packages graph)
|
|
#:use-module (gnu packages graphviz)
|
|
#:use-module (gnu packages groff)
|
|
#:use-module (gnu packages gtk)
|
|
#:use-module (gnu packages guile)
|
|
#:use-module (gnu packages guile-xyz)
|
|
#:use-module (gnu packages haskell-check)
|
|
#:use-module (gnu packages haskell-web)
|
|
#:use-module (gnu packages haskell-xyz)
|
|
#:use-module (gnu packages image)
|
|
#:use-module (gnu packages imagemagick)
|
|
#:use-module (gnu packages java)
|
|
#:use-module (gnu packages java-compression)
|
|
#:use-module (gnu packages jemalloc)
|
|
#:use-module (gnu packages linux)
|
|
#:use-module (gnu packages lisp-xyz)
|
|
#:use-module (gnu packages logging)
|
|
#:use-module (gnu packages machine-learning)
|
|
#:use-module (gnu packages man)
|
|
#:use-module (gnu packages maths)
|
|
#:use-module (gnu packages mpi)
|
|
#:use-module (gnu packages ncurses)
|
|
#:use-module (gnu packages ocaml)
|
|
#:use-module (gnu packages pcre)
|
|
#:use-module (gnu packages parallel)
|
|
#:use-module (gnu packages pdf)
|
|
#:use-module (gnu packages perl)
|
|
#:use-module (gnu packages perl-check)
|
|
#:use-module (gnu packages pkg-config)
|
|
#:use-module (gnu packages popt)
|
|
#:use-module (gnu packages protobuf)
|
|
#:use-module (gnu packages python)
|
|
#:use-module (gnu packages python-compression)
|
|
#:use-module (gnu packages python-science)
|
|
#:use-module (gnu packages python-web)
|
|
#:use-module (gnu packages python-xyz)
|
|
#:use-module (gnu packages readline)
|
|
#:use-module (gnu packages ruby)
|
|
#:use-module (gnu packages serialization)
|
|
#:use-module (gnu packages shells)
|
|
#:use-module (gnu packages sphinx)
|
|
#:use-module (gnu packages statistics)
|
|
#:use-module (gnu packages swig)
|
|
#:use-module (gnu packages tbb)
|
|
#:use-module (gnu packages tex)
|
|
#:use-module (gnu packages texinfo)
|
|
#:use-module (gnu packages textutils)
|
|
#:use-module (gnu packages time)
|
|
#:use-module (gnu packages tls)
|
|
#:use-module (gnu packages vim)
|
|
#:use-module (gnu packages web)
|
|
#:use-module (gnu packages xml)
|
|
#:use-module (gnu packages xorg)
|
|
#:use-module (srfi srfi-1)
|
|
#:use-module (ice-9 match))
|
|
|
|
(define-public aragorn
|
|
(package
|
|
(name "aragorn")
|
|
(version "1.2.38")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
|
|
version ".tgz"))
|
|
(sha256
|
|
(base32
|
|
"09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; there are no tests
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'build
|
|
(lambda _
|
|
(invoke "gcc"
|
|
"-O3"
|
|
"-ffast-math"
|
|
"-finline-functions"
|
|
"-o"
|
|
"aragorn"
|
|
(string-append "aragorn" ,version ".c"))
|
|
#t))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(man (string-append out "/share/man/man1")))
|
|
(install-file "aragorn" bin)
|
|
(install-file "aragorn.1" man))
|
|
#t)))))
|
|
(home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
|
|
(synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
|
|
(description
|
|
"Aragorn identifies transfer RNA, mitochondrial RNA and
|
|
transfer-messenger RNA from nucleotide sequences, based on homology to known
|
|
tRNA consensus sequences and RNA structure. It also outputs the secondary
|
|
structure of the predicted RNA.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public bamm
|
|
(package
|
|
(name "bamm")
|
|
(version "1.7.3")
|
|
(source (origin
|
|
(method git-fetch)
|
|
;; BamM is not available on pypi.
|
|
(uri (git-reference
|
|
(url "https://github.com/Ecogenomics/BamM.git")
|
|
(commit version)
|
|
(recursive? #t)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
`(begin
|
|
;; Delete bundled htslib.
|
|
(delete-file-recursively "c/htslib-1.3.1")
|
|
#t))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2 ; BamM is Python 2 only.
|
|
;; Do not use bundled libhts. Do use the bundled libcfu because it has
|
|
;; been modified from its original form.
|
|
#:configure-flags
|
|
(let ((htslib (assoc-ref %build-inputs "htslib")))
|
|
(list "--with-libhts-lib" (string-append htslib "/lib")
|
|
"--with-libhts-inc" (string-append htslib "/include/htslib")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'autogen
|
|
(lambda _
|
|
(with-directory-excursion "c"
|
|
(let ((sh (which "sh")))
|
|
(for-each make-file-writable (find-files "." ".*"))
|
|
;; Use autogen so that 'configure' works.
|
|
(substitute* "autogen.sh" (("/bin/sh") sh))
|
|
(setenv "CONFIG_SHELL" sh)
|
|
(invoke "./autogen.sh")))
|
|
#t))
|
|
(delete 'build)
|
|
;; Run tests after installation so compilation only happens once.
|
|
(delete 'check)
|
|
(add-after 'install 'wrap-executable
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(path (getenv "PATH")))
|
|
(wrap-program (string-append out "/bin/bamm")
|
|
`("PATH" ":" prefix (,path))))
|
|
#t))
|
|
(add-after 'wrap-executable 'post-install-check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(setenv "PATH"
|
|
(string-append (assoc-ref outputs "out")
|
|
"/bin:"
|
|
(getenv "PATH")))
|
|
(setenv "PYTHONPATH"
|
|
(string-append
|
|
(assoc-ref outputs "out")
|
|
"/lib/python"
|
|
(string-take (string-take-right
|
|
(assoc-ref inputs "python") 5) 3)
|
|
"/site-packages:"
|
|
(getenv "PYTHONPATH")))
|
|
;; There are 2 errors printed, but they are safe to ignore:
|
|
;; 1) [E::hts_open_format] fail to open file ...
|
|
;; 2) samtools view: failed to open ...
|
|
(invoke "nosetests")
|
|
#t)))))
|
|
(native-inputs
|
|
`(("autoconf" ,autoconf)
|
|
("automake" ,automake)
|
|
("libtool" ,libtool)
|
|
("zlib" ,zlib)
|
|
("python-nose" ,python2-nose)
|
|
("python-pysam" ,python2-pysam)))
|
|
(inputs
|
|
`(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
|
|
("samtools" ,samtools)
|
|
("bwa" ,bwa)
|
|
("grep" ,grep)
|
|
("sed" ,sed)
|
|
("coreutils" ,coreutils)))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python2-numpy)))
|
|
(home-page "https://ecogenomics.github.io/BamM/")
|
|
(synopsis "Metagenomics-focused BAM file manipulator")
|
|
(description
|
|
"BamM is a C library, wrapped in python, to efficiently generate and
|
|
parse BAM files, specifically for the analysis of metagenomic data. For
|
|
instance, it implements several methods to assess contig-wise read coverage.")
|
|
(license license:lgpl3+)))
|
|
|
|
(define-public bamtools
|
|
(package
|
|
(name "bamtools")
|
|
(version "2.5.1")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/pezmaster31/bamtools.git")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before
|
|
'configure 'set-ldflags
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(setenv "LDFLAGS"
|
|
(string-append
|
|
"-Wl,-rpath="
|
|
(assoc-ref outputs "out") "/lib/bamtools"))
|
|
#t)))))
|
|
(inputs `(("zlib" ,zlib)))
|
|
(home-page "https://github.com/pezmaster31/bamtools")
|
|
(synopsis "C++ API and command-line toolkit for working with BAM data")
|
|
(description
|
|
"BamTools provides both a C++ API and a command-line toolkit for handling
|
|
BAM files.")
|
|
(license license:expat)))
|
|
|
|
(define-public bcftools
|
|
(package
|
|
(name "bcftools")
|
|
(version "1.9")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/samtools/bcftools/"
|
|
"releases/download/"
|
|
version "/bcftools-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
|
|
(modules '((guix build utils)))
|
|
(snippet '(begin
|
|
;; Delete bundled htslib.
|
|
(delete-file-recursively "htslib-1.9")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:configure-flags
|
|
(list "--enable-libgsl")
|
|
#:test-target "test"
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before 'check 'patch-tests
|
|
(lambda _
|
|
(substitute* "test/test.pl"
|
|
(("/bin/bash") (which "bash")))
|
|
#t)))))
|
|
(native-inputs
|
|
`(("htslib" ,htslib)
|
|
("perl" ,perl)))
|
|
(inputs
|
|
`(("gsl" ,gsl)
|
|
("zlib" ,zlib)))
|
|
(home-page "https://samtools.github.io/bcftools/")
|
|
(synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
|
|
(description
|
|
"BCFtools is a set of utilities that manipulate variant calls in the
|
|
Variant Call Format (VCF) and its binary counterpart BCF. All commands work
|
|
transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
|
|
;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
|
|
(license (list license:gpl3+ license:expat))))
|
|
|
|
(define-public bedops
|
|
(package
|
|
(name "bedops")
|
|
(version "2.4.35")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/bedops/bedops.git")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f
|
|
#:make-flags (list (string-append "BINDIR=" %output "/bin"))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'unpack-tarballs
|
|
(lambda _
|
|
;; FIXME: Bedops includes tarballs of minimally patched upstream
|
|
;; libraries jansson, zlib, and bzip2. We cannot just use stock
|
|
;; libraries because at least one of the libraries (zlib) is
|
|
;; patched to add a C++ function definition (deflateInit2cpp).
|
|
;; Until the Bedops developers offer a way to link against system
|
|
;; libraries we have to build the in-tree copies of these three
|
|
;; libraries.
|
|
|
|
;; See upstream discussion:
|
|
;; https://github.com/bedops/bedops/issues/124
|
|
|
|
;; Unpack the tarballs to benefit from shebang patching.
|
|
(with-directory-excursion "third-party"
|
|
(invoke "tar" "xvf" "jansson-2.6.tar.bz2")
|
|
(invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
|
|
(invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
|
|
;; Disable unpacking of tarballs in Makefile.
|
|
(substitute* "system.mk/Makefile.linux"
|
|
(("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
|
|
(("\\./configure") "CONFIG_SHELL=bash ./configure"))
|
|
(substitute* "third-party/zlib-1.2.7/Makefile.in"
|
|
(("^SHELL=.*$") "SHELL=bash\n"))
|
|
#t))
|
|
(delete 'configure))))
|
|
(home-page "https://github.com/bedops/bedops")
|
|
(synopsis "Tools for high-performance genomic feature operations")
|
|
(description
|
|
"BEDOPS is a suite of tools to address common questions raised in genomic
|
|
studies---mostly with regard to overlap and proximity relationships between
|
|
data sets. It aims to be scalable and flexible, facilitating the efficient
|
|
and accurate analysis and management of large-scale genomic data.
|
|
|
|
BEDOPS provides tools that perform highly efficient and scalable Boolean and
|
|
other set operations, statistical calculations, archiving, conversion and
|
|
other management of genomic data of arbitrary scale. Tasks can be easily
|
|
split by chromosome for distributing whole-genome analyses across a
|
|
computational cluster.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public bedtools
|
|
(package
|
|
(name "bedtools")
|
|
(version "2.29.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/arq5x/bedtools2/releases/"
|
|
"download/v" version "/"
|
|
"bedtools-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:test-target "test"
|
|
#:make-flags
|
|
(list (string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure))))
|
|
(native-inputs
|
|
`(("python" ,python-wrapper)))
|
|
(inputs
|
|
`(("samtools" ,samtools)
|
|
("zlib" ,zlib)))
|
|
(home-page "https://github.com/arq5x/bedtools2")
|
|
(synopsis "Tools for genome analysis and arithmetic")
|
|
(description
|
|
"Collectively, the bedtools utilities are a swiss-army knife of tools for
|
|
a wide-range of genomics analysis tasks. The most widely-used tools enable
|
|
genome arithmetic: that is, set theory on the genome. For example, bedtools
|
|
allows one to intersect, merge, count, complement, and shuffle genomic
|
|
intervals from multiple files in widely-used genomic file formats such as BAM,
|
|
BED, GFF/GTF, VCF.")
|
|
(license license:expat)))
|
|
|
|
;; Later releases of bedtools produce files with more columns than
|
|
;; what Ribotaper expects.
|
|
(define-public bedtools-2.18
|
|
(package (inherit bedtools)
|
|
(name "bedtools")
|
|
(version "2.18.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/arq5x/bedtools2/"
|
|
"releases/download/v" version
|
|
"/bedtools-" version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
|
|
(arguments
|
|
'(#:test-target "test"
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(for-each (lambda (file)
|
|
(install-file file bin))
|
|
(find-files "bin" ".*")))
|
|
#t)))))))
|
|
|
|
(define-public pbbam
|
|
(package
|
|
(name "pbbam")
|
|
(version "0.23.0")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/PacificBiosciences/pbbam.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
|
|
(build-system meson-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'find-googletest
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
;; It doesn't find gtest_main because there's no pkg-config file
|
|
;; for it. Find it another way.
|
|
(substitute* "tests/meson.build"
|
|
(("pbbam_gtest_dep = dependency\\('gtest_main'.*")
|
|
(format #f "cpp = meson.get_compiler('cpp')
|
|
pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
|
|
(assoc-ref inputs "googletest"))))
|
|
#t)))
|
|
;; TODO: tests/pbbam_test cannot be linked
|
|
;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
|
|
;; undefined reference to symbol '_ZTIN7testing4TestE'
|
|
;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
|
|
;; error adding symbols: DSO missing from command line
|
|
#:tests? #f
|
|
#:configure-flags '("-Dtests=false")))
|
|
;; These libraries are listed as "Required" in the pkg-config file.
|
|
(propagated-inputs
|
|
`(("htslib" ,htslib)
|
|
("zlib" ,zlib)))
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("samtools" ,samtools)))
|
|
(native-inputs
|
|
`(("googletest" ,googletest)
|
|
("pkg-config" ,pkg-config)
|
|
("python" ,python-wrapper))) ; for tests
|
|
(home-page "https://github.com/PacificBiosciences/pbbam")
|
|
(synopsis "Work with PacBio BAM files")
|
|
(description
|
|
"The pbbam software package provides components to create, query, and
|
|
edit PacBio BAM files and associated indices. These components include a core
|
|
C++ library, bindings for additional languages, and command-line utilities.
|
|
This library is not intended to be used as a general-purpose BAM utility - all
|
|
input and output BAMs must adhere to the PacBio BAM format specification.
|
|
Non-PacBio BAMs will cause exceptions to be thrown.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public blasr-libcpp
|
|
(package
|
|
(name "blasr-libcpp")
|
|
(version "5.3.3")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/PacificBiosciences/blasr_libcpp.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
|
|
(build-system meson-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'link-with-hdf5
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(let ((hdf5 (assoc-ref inputs "hdf5")))
|
|
(substitute* "meson.build"
|
|
(("libblasr_deps = \\[" m)
|
|
(string-append
|
|
m
|
|
(format #f "cpp.find_library('hdf5', dirs : '~a'), \
|
|
cpp.find_library('hdf5_cpp', dirs : '~a'), "
|
|
hdf5 hdf5)))))
|
|
#t))
|
|
(add-after 'unpack 'find-googletest
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
;; It doesn't find gtest_main because there's no pkg-config file
|
|
;; for it. Find it another way.
|
|
(substitute* "unittest/meson.build"
|
|
(("libblasr_gtest_dep = dependency\\('gtest_main'.*")
|
|
(format #f "cpp = meson.get_compiler('cpp')
|
|
libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
|
|
(assoc-ref inputs "googletest"))))
|
|
#t)))
|
|
;; TODO: unittest/libblasr_unittest cannot be linked
|
|
;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
|
|
;; undefined reference to symbol
|
|
;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
|
|
;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
|
|
;; error adding symbols: DSO missing from command line
|
|
#:tests? #f
|
|
#:configure-flags '("-Dtests=false")))
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("hdf5" ,hdf5)
|
|
("pbbam" ,pbbam)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("googletest" ,googletest)
|
|
("pkg-config" ,pkg-config)))
|
|
(home-page "https://github.com/PacificBiosciences/blasr_libcpp")
|
|
(synopsis "Library for analyzing PacBio genomic sequences")
|
|
(description
|
|
"This package provides three libraries used by applications for analyzing
|
|
PacBio genomic sequences. This library contains three sub-libraries: pbdata,
|
|
hdf and alignment.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public blasr
|
|
(package
|
|
(name "blasr")
|
|
(version "5.3.3")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/PacificBiosciences/blasr.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
|
|
(build-system meson-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'link-with-hdf5
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(let ((hdf5 (assoc-ref inputs "hdf5")))
|
|
(substitute* "meson.build"
|
|
(("blasr_deps = \\[" m)
|
|
(string-append
|
|
m
|
|
(format #f "cpp.find_library('hdf5', dirs : '~a'), \
|
|
cpp.find_library('hdf5_cpp', dirs : '~a'), "
|
|
hdf5 hdf5)))))
|
|
#t)))
|
|
;; Tests require "cram" executable, which is not packaged.
|
|
#:tests? #f
|
|
#:configure-flags '("-Dtests=false")))
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("blasr-libcpp" ,blasr-libcpp)
|
|
("hdf5" ,hdf5)
|
|
("pbbam" ,pbbam)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("pkg-config" ,pkg-config)))
|
|
(home-page "https://github.com/PacificBiosciences/blasr")
|
|
(synopsis "PacBio long read aligner")
|
|
(description
|
|
"Blasr is a genomic sequence aligner for processing PacBio long reads.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public ribotaper
|
|
(package
|
|
(name "ribotaper")
|
|
(version "1.3.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://ohlerlab.mdc-berlin.de/"
|
|
"files/RiboTaper/RiboTaper_Version_"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'install 'wrap-executables
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out")))
|
|
(for-each
|
|
(lambda (script)
|
|
(wrap-program (string-append out "/bin/" script)
|
|
`("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
|
|
'("create_annotations_files.bash"
|
|
"create_metaplots.bash"
|
|
"Ribotaper_ORF_find.sh"
|
|
"Ribotaper.sh")))
|
|
#t)))))
|
|
(inputs
|
|
`(("bedtools" ,bedtools-2.18)
|
|
("samtools" ,samtools-0.1)
|
|
("r-minimal" ,r-minimal)
|
|
("r-foreach" ,r-foreach)
|
|
("r-xnomial" ,r-xnomial)
|
|
("r-domc" ,r-domc)
|
|
("r-multitaper" ,r-multitaper)
|
|
("r-seqinr" ,r-seqinr)))
|
|
(home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
|
|
(synopsis "Define translated ORFs using ribosome profiling data")
|
|
(description
|
|
"Ribotaper is a method for defining translated @dfn{open reading
|
|
frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
|
|
provides the Ribotaper pipeline.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public ribodiff
|
|
(package
|
|
(name "ribodiff")
|
|
(version "0.2.2")
|
|
(source
|
|
(origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/ratschlab/RiboDiff.git")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
;; Generate an installable executable script wrapper.
|
|
(add-after 'unpack 'patch-setup.py
|
|
(lambda _
|
|
(substitute* "setup.py"
|
|
(("^(.*)packages=.*" line prefix)
|
|
(string-append line "\n"
|
|
prefix "scripts=['scripts/TE.py'],\n")))
|
|
#t)))))
|
|
(inputs
|
|
`(("python-numpy" ,python2-numpy)
|
|
("python-matplotlib" ,python2-matplotlib)
|
|
("python-scipy" ,python2-scipy)
|
|
("python-statsmodels" ,python2-statsmodels)))
|
|
(native-inputs
|
|
`(("python-mock" ,python2-mock)
|
|
("python-nose" ,python2-nose)))
|
|
(home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
|
|
(synopsis "Detect translation efficiency changes from ribosome footprints")
|
|
(description "RiboDiff is a statistical tool that detects the protein
|
|
translational efficiency change from Ribo-Seq (ribosome footprinting) and
|
|
RNA-Seq data. It uses a generalized linear model to detect genes showing
|
|
difference in translational profile taking mRNA abundance into account. It
|
|
facilitates us to decipher the translational regulation that behave
|
|
independently with transcriptional regulation.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bioawk
|
|
(package
|
|
(name "bioawk")
|
|
(version "1.0")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/lh3/bioawk.git")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("bison" ,bison)))
|
|
(arguments
|
|
`(#:tests? #f ; There are no tests to run.
|
|
;; Bison must generate files, before other targets can build.
|
|
#:parallel-build? #f
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure) ; There is no configure phase.
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(man (string-append out "/share/man/man1")))
|
|
(mkdir-p man)
|
|
(copy-file "awk.1" (string-append man "/bioawk.1"))
|
|
(install-file "bioawk" bin))
|
|
#t)))))
|
|
(home-page "https://github.com/lh3/bioawk")
|
|
(synopsis "AWK with bioinformatics extensions")
|
|
(description "Bioawk is an extension to Brian Kernighan's awk, adding the
|
|
support of several common biological data formats, including optionally gzip'ed
|
|
BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
|
|
also adds a few built-in functions and a command line option to use TAB as the
|
|
input/output delimiter. When the new functionality is not used, bioawk is
|
|
intended to behave exactly the same as the original BWK awk.")
|
|
(license license:x11)))
|
|
|
|
(define-public python-pybedtools
|
|
(package
|
|
(name "python-pybedtools")
|
|
(version "0.8.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "pybedtools" version))
|
|
(sha256
|
|
(base32
|
|
"14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:modules ((ice-9 ftw)
|
|
(srfi srfi-1)
|
|
(srfi srfi-26)
|
|
(guix build utils)
|
|
(guix build python-build-system))
|
|
;; See https://github.com/daler/pybedtools/issues/192
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'disable-broken-tests
|
|
(lambda _
|
|
(substitute* "pybedtools/test/test_scripts.py"
|
|
;; This test freezes.
|
|
(("def test_intron_exon_reads")
|
|
"def _do_not_test_intron_exon_reads")
|
|
;; This test fails in the Python 2 build.
|
|
(("def test_venn_mpl")
|
|
"def _do_not_test_venn_mpl"))
|
|
(substitute* "pybedtools/test/test_helpers.py"
|
|
;; Requires internet access.
|
|
(("def test_chromsizes")
|
|
"def _do_not_test_chromsizes")
|
|
;; Broken as a result of the workaround used in the check phase
|
|
;; (see: https://github.com/daler/pybedtools/issues/192).
|
|
(("def test_getting_example_beds")
|
|
"def _do_not_test_getting_example_beds"))
|
|
;; This issue still occurs on python2
|
|
(substitute* "pybedtools/test/test_issues.py"
|
|
(("def test_issue_303")
|
|
"def _test_issue_303"))
|
|
#t))
|
|
;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
|
|
;; build system.
|
|
;; Force the Cythonization of C++ files to guard against compilation
|
|
;; problems.
|
|
(add-after 'unpack 'remove-cython-generated-files
|
|
(lambda _
|
|
(let ((cython-sources (map (cut string-drop-right <> 4)
|
|
(find-files "." "\\.pyx$")))
|
|
(c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
|
|
(define (strip-extension filename)
|
|
(string-take filename (string-index-right filename #\.)))
|
|
(define (cythonized? c/c++-file)
|
|
(member (strip-extension c/c++-file) cython-sources))
|
|
(for-each delete-file (filter cythonized? c/c++-files))
|
|
#t)))
|
|
(add-after 'remove-cython-generated-files 'generate-cython-extensions
|
|
(lambda _
|
|
(invoke "python" "setup.py" "cythonize")))
|
|
(replace 'check
|
|
(lambda _
|
|
(let* ((cwd (getcwd))
|
|
(build-root-directory (string-append cwd "/build/"))
|
|
(build (string-append
|
|
build-root-directory
|
|
(find (cut string-prefix? "lib" <>)
|
|
(scandir (string-append
|
|
build-root-directory)))))
|
|
(scripts (string-append
|
|
build-root-directory
|
|
(find (cut string-prefix? "scripts" <>)
|
|
(scandir build-root-directory)))))
|
|
(setenv "PYTHONPATH"
|
|
(string-append build ":" (getenv "PYTHONPATH")))
|
|
;; Executable scripts such as 'intron_exon_reads.py' must be
|
|
;; available in the PATH.
|
|
(setenv "PATH"
|
|
(string-append scripts ":" (getenv "PATH"))))
|
|
;; The tests need to be run from elsewhere...
|
|
(mkdir-p "/tmp/test")
|
|
(copy-recursively "pybedtools/test" "/tmp/test")
|
|
(with-directory-excursion "/tmp/test"
|
|
(invoke "pytest" "-v" "--doctest-modules")))))))
|
|
(propagated-inputs
|
|
`(("bedtools" ,bedtools)
|
|
("samtools" ,samtools)
|
|
("python-matplotlib" ,python-matplotlib)
|
|
("python-pysam" ,python-pysam)
|
|
("python-pyyaml" ,python-pyyaml)))
|
|
(native-inputs
|
|
`(("python-numpy" ,python-numpy)
|
|
("python-pandas" ,python-pandas)
|
|
("python-cython" ,python-cython)
|
|
("kentutils" ,kentutils) ; for bedGraphToBigWig
|
|
("python-six" ,python-six)
|
|
;; For the test suite.
|
|
("python-pytest" ,python-pytest)
|
|
("python-psutil" ,python-psutil)))
|
|
(home-page "https://pythonhosted.org/pybedtools/")
|
|
(synopsis "Python wrapper for BEDtools programs")
|
|
(description
|
|
"pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
|
|
which are widely used for genomic interval manipulation or \"genome algebra\".
|
|
pybedtools extends BEDTools by offering feature-level manipulations from with
|
|
Python.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public python2-pybedtools
|
|
(let ((pybedtools (package-with-python2 python-pybedtools)))
|
|
(package
|
|
(inherit pybedtools)
|
|
(native-inputs
|
|
`(("python2-pathlib" ,python2-pathlib)
|
|
,@(package-native-inputs pybedtools))))))
|
|
|
|
(define-public python-biom-format
|
|
(package
|
|
(name "python-biom-format")
|
|
(version "2.1.7")
|
|
(source
|
|
(origin
|
|
(method git-fetch)
|
|
;; Use GitHub as source because PyPI distribution does not contain
|
|
;; test data: https://github.com/biocore/biom-format/issues/693
|
|
(uri (git-reference
|
|
(url "https://github.com/biocore/biom-format.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
|
|
(modules '((guix build utils)))
|
|
(snippet '(begin
|
|
;; Delete generated C files.
|
|
(for-each delete-file (find-files "." "\\.c"))
|
|
#t))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'use-cython
|
|
(lambda _ (setenv "USE_CYTHON" "1") #t))
|
|
(add-after 'unpack 'disable-broken-tests
|
|
(lambda _
|
|
(substitute* "biom/tests/test_cli/test_validate_table.py"
|
|
(("^(.+)def test_invalid_hdf5" m indent)
|
|
(string-append indent
|
|
"@npt.dec.skipif(True, msg='Guix')\n"
|
|
m)))
|
|
(substitute* "biom/tests/test_table.py"
|
|
(("^(.+)def test_from_hdf5_issue_731" m indent)
|
|
(string-append indent
|
|
"@npt.dec.skipif(True, msg='Guix')\n"
|
|
m)))
|
|
#t))
|
|
(add-before 'reset-gzip-timestamps 'make-files-writable
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out")))
|
|
(for-each (lambda (file) (chmod file #o644))
|
|
(find-files out "\\.gz"))
|
|
#t))))))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)
|
|
("python-scipy" ,python-scipy)
|
|
("python-flake8" ,python-flake8)
|
|
("python-future" ,python-future)
|
|
("python-click" ,python-click)
|
|
("python-h5py" ,python-h5py)
|
|
("python-pandas" ,python-pandas)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-pytest" ,python-pytest)
|
|
("python-pytest-cov" ,python-pytest-cov)
|
|
("python-nose" ,python-nose)))
|
|
(home-page "http://www.biom-format.org")
|
|
(synopsis "Biological Observation Matrix (BIOM) format utilities")
|
|
(description
|
|
"The BIOM file format is designed to be a general-use format for
|
|
representing counts of observations e.g. operational taxonomic units, KEGG
|
|
orthology groups or lipid types, in one or more biological samples
|
|
e.g. microbiome samples, genomes, metagenomes.")
|
|
(license license:bsd-3)
|
|
(properties `((python2-variant . ,(delay python2-biom-format))))))
|
|
|
|
(define-public python2-biom-format
|
|
(let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
|
|
(package
|
|
(inherit base)
|
|
(arguments
|
|
(substitute-keyword-arguments (package-arguments base)
|
|
((#:phases phases)
|
|
`(modify-phases ,phases
|
|
;; Do not require the unmaintained pyqi library.
|
|
(add-after 'unpack 'remove-pyqi
|
|
(lambda _
|
|
(substitute* "setup.py"
|
|
(("install_requires.append\\(\"pyqi\"\\)") "pass"))
|
|
#t)))))))))
|
|
|
|
(define-public python-pairtools
|
|
(package
|
|
(name "python-pairtools")
|
|
(version "0.3.0")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/mirnylab/pairtools")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"0gr8y13q7sd6yai6df4aavl2470n1f9s3cib6r473z4hr8hcbwmc"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'fix-references
|
|
(lambda _
|
|
(substitute* '("pairtools/pairtools_merge.py"
|
|
"pairtools/pairtools_sort.py")
|
|
(("/bin/bash") (which "bash")))
|
|
#t))
|
|
(replace 'check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(add-installed-pythonpath inputs outputs)
|
|
(with-directory-excursion "/tmp"
|
|
(invoke "pytest" "-v")))))))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-nose" ,python-nose)
|
|
("python-pytest" ,python-pytest)))
|
|
(inputs
|
|
`(("python" ,python-wrapper)))
|
|
(propagated-inputs
|
|
`(("htslib" ,htslib) ; for bgzip, looked up in PATH
|
|
("samtools" ,samtools) ; looked up in PATH
|
|
("lz4" ,lz4) ; for lz4c
|
|
("python-click" ,python-click)
|
|
("python-numpy" ,python-numpy)))
|
|
(home-page "https://github.com/mirnylab/pairtools")
|
|
(synopsis "Process mapped Hi-C data")
|
|
(description "Pairtools is a simple and fast command-line framework to
|
|
process sequencing data from a Hi-C experiment. Process pair-end sequence
|
|
alignments and perform the following operations:
|
|
|
|
@itemize
|
|
@item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
|
|
sequences of Hi-C DNA molecules
|
|
@item sort @code{.pairs} files for downstream analyses
|
|
@item detect, tag and remove PCR/optical duplicates
|
|
@item generate extensive statistics of Hi-C datasets
|
|
@item select Hi-C pairs given flexibly defined criteria
|
|
@item restore @code{.sam} alignments from Hi-C pairs.
|
|
@end itemize
|
|
")
|
|
(license license:expat)))
|
|
|
|
(define-public bioperl-minimal
|
|
(let* ((inputs `(("perl-module-build" ,perl-module-build)
|
|
("perl-data-stag" ,perl-data-stag)
|
|
("perl-libwww" ,perl-libwww)
|
|
("perl-uri" ,perl-uri)))
|
|
(transitive-inputs
|
|
(map (compose package-name cadr)
|
|
(delete-duplicates
|
|
(concatenate
|
|
(map (compose package-transitive-target-inputs cadr) inputs))))))
|
|
(package
|
|
(name "bioperl-minimal")
|
|
(version "1.7.0")
|
|
(source
|
|
(origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/bioperl/bioperl-live")
|
|
(commit (string-append "release-"
|
|
(string-map (lambda (c)
|
|
(if (char=? c #\.)
|
|
#\- c)) version)))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
|
|
(build-system perl-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after
|
|
'install 'wrap-programs
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
;; Make sure all executables in "bin" find the required Perl
|
|
;; modules at runtime. As the PERL5LIB variable contains also
|
|
;; the paths of native inputs, we pick the transitive target
|
|
;; inputs from %build-inputs.
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin/"))
|
|
(path (string-join
|
|
(cons (string-append out "/lib/perl5/site_perl")
|
|
(map (lambda (name)
|
|
(assoc-ref %build-inputs name))
|
|
',transitive-inputs))
|
|
":")))
|
|
(for-each (lambda (file)
|
|
(wrap-program file
|
|
`("PERL5LIB" ":" prefix (,path))))
|
|
(find-files bin "\\.pl$"))
|
|
#t))))))
|
|
(inputs inputs)
|
|
(native-inputs
|
|
`(("perl-test-most" ,perl-test-most)))
|
|
(home-page "https://metacpan.org/release/BioPerl")
|
|
(synopsis "Bioinformatics toolkit")
|
|
(description
|
|
"BioPerl is the product of a community effort to produce Perl code which
|
|
is useful in biology. Examples include Sequence objects, Alignment objects
|
|
and database searching objects. These objects not only do what they are
|
|
advertised to do in the documentation, but they also interact - Alignment
|
|
objects are made from the Sequence objects, Sequence objects have access to
|
|
Annotation and SeqFeature objects and databases, Blast objects can be
|
|
converted to Alignment objects, and so on. This means that the objects
|
|
provide a coordinated and extensible framework to do computational biology.")
|
|
(license license:perl-license))))
|
|
|
|
(define-public python-biopython
|
|
(package
|
|
(name "python-biopython")
|
|
(version "1.70")
|
|
(source (origin
|
|
(method url-fetch)
|
|
;; use PyPi rather than biopython.org to ease updating
|
|
(uri (pypi-uri "biopython" version))
|
|
(sha256
|
|
(base32
|
|
"0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before 'check 'set-home
|
|
;; Some tests require a home directory to be set.
|
|
(lambda _ (setenv "HOME" "/tmp") #t)))))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)))
|
|
(home-page "https://biopython.org/")
|
|
(synopsis "Tools for biological computation in Python")
|
|
(description
|
|
"Biopython is a set of tools for biological computation including parsers
|
|
for bioinformatics files into Python data structures; interfaces to common
|
|
bioinformatics programs; a standard sequence class and tools for performing
|
|
common operations on them; code to perform data classification; code for
|
|
dealing with alignments; code making it easy to split up parallelizable tasks
|
|
into separate processes; and more.")
|
|
(license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
|
|
|
|
(define-public python2-biopython
|
|
(package-with-python2 python-biopython))
|
|
|
|
(define-public python-fastalite
|
|
(package
|
|
(name "python-fastalite")
|
|
(version "0.3")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "fastalite" version))
|
|
(sha256
|
|
(base32
|
|
"1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:tests? #f)) ; Test data is not distributed.
|
|
(home-page "https://github.com/nhoffman/fastalite")
|
|
(synopsis "Simplest possible FASTA parser")
|
|
(description "This library implements a FASTA and a FASTQ parser without
|
|
relying on a complex dependency tree.")
|
|
(license license:expat)))
|
|
|
|
(define-public python2-fastalite
|
|
(package-with-python2 python-fastalite))
|
|
|
|
(define-public bpp-core
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
|
|
(package
|
|
(name "bpp-core")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bpp-core")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f))
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "C++ libraries for Bioinformatics")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. It is
|
|
Object Oriented and is designed to be both easy to use and computer efficient.
|
|
Bio++ intends to help programmers to write computer expensive programs, by
|
|
providing them a set of re-usable tools.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public bpp-phyl
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
|
|
(package
|
|
(name "bpp-phyl")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bpp-phyl")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f
|
|
;; If out-of-source, test data is not copied into the build directory
|
|
;; so the tests fail.
|
|
#:out-of-source? #f))
|
|
(inputs
|
|
`(("bpp-core" ,bpp-core)
|
|
("bpp-seq" ,bpp-seq)))
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "Bio++ phylogenetic Library")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. This
|
|
library provides phylogenetics-related modules.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public bpp-popgen
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
|
|
(package
|
|
(name "bpp-popgen")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bpp-popgen")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f
|
|
#:tests? #f)) ; There are no tests.
|
|
(inputs
|
|
`(("bpp-core" ,bpp-core)
|
|
("bpp-seq" ,bpp-seq)))
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "Bio++ population genetics library")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. This
|
|
library provides population genetics-related modules.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public bpp-seq
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
|
|
(package
|
|
(name "bpp-seq")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bpp-seq")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f
|
|
;; If out-of-source, test data is not copied into the build directory
|
|
;; so the tests fail.
|
|
#:out-of-source? #f))
|
|
(inputs
|
|
`(("bpp-core" ,bpp-core)))
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "Bio++ sequence library")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. This
|
|
library provides sequence-related modules.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public bppsuite
|
|
;; The last release was in 2014 and the recommended way to install from source
|
|
;; is to clone the git repository, so we do this.
|
|
;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
|
|
(let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
|
|
(package
|
|
(name "bppsuite")
|
|
(version (string-append "2.2.0-1." (string-take commit 7)))
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "http://biopp.univ-montp2.fr/git/bppsuite")
|
|
(commit commit)))
|
|
(file-name (string-append name "-" version "-checkout"))
|
|
(sha256
|
|
(base32
|
|
"1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
`(#:parallel-build? #f
|
|
#:tests? #f)) ; There are no tests.
|
|
(native-inputs
|
|
`(("groff" ,groff)
|
|
("man-db" ,man-db)
|
|
("texinfo" ,texinfo)))
|
|
(inputs
|
|
`(("bpp-core" ,bpp-core)
|
|
("bpp-seq" ,bpp-seq)
|
|
("bpp-phyl" ,bpp-phyl)
|
|
("bpp-phyl" ,bpp-popgen)))
|
|
(home-page "http://biopp.univ-montp2.fr")
|
|
(synopsis "Bioinformatics tools written with the Bio++ libraries")
|
|
(description
|
|
"Bio++ is a set of C++ libraries for Bioinformatics, including sequence
|
|
analysis, phylogenetics, molecular evolution and population genetics. This
|
|
package provides command line tools using the Bio++ library.")
|
|
(license license:cecill-c))))
|
|
|
|
(define-public blast+
|
|
(package
|
|
(name "blast+")
|
|
(version "2.7.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
|
|
version "/ncbi-blast-" version "+-src.tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1jlq0afxxgczpp35k6mxh8mn4jzq7vqcnaixk166sfj10wq8v9qh"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Remove bundled bzip2, zlib and pcre.
|
|
(delete-file-recursively "c++/src/util/compress/bzip2")
|
|
(delete-file-recursively "c++/src/util/compress/zlib")
|
|
(delete-file-recursively "c++/src/util/regexp")
|
|
(substitute* "c++/src/util/compress/Makefile.in"
|
|
(("bzip2 zlib api") "api"))
|
|
;; Remove useless msbuild directory
|
|
(delete-file-recursively
|
|
"c++/src/build-system/project_tree_builder/msbuild")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(;; There are two(!) tests for this massive library, and both fail with
|
|
;; "unparsable timing stats".
|
|
;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
|
|
;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
|
|
#:tests? #f
|
|
#:out-of-source? #t
|
|
#:parallel-build? #f ; not supported
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before 'configure 'set-HOME
|
|
;; $HOME needs to be set at some point during the configure phase
|
|
(lambda _ (setenv "HOME" "/tmp") #t))
|
|
(add-after 'unpack 'enter-dir
|
|
(lambda _ (chdir "c++") #t))
|
|
(add-after 'enter-dir 'fix-build-system
|
|
(lambda _
|
|
(define (which* cmd)
|
|
(cond ((string=? cmd "date")
|
|
;; make call to "date" deterministic
|
|
"date -d @0")
|
|
((which cmd)
|
|
=> identity)
|
|
(else
|
|
(format (current-error-port)
|
|
"WARNING: Unable to find absolute path for ~s~%"
|
|
cmd)
|
|
#f)))
|
|
|
|
;; Rewrite hardcoded paths to various tools
|
|
(substitute* (append '("src/build-system/configure.ac"
|
|
"src/build-system/configure"
|
|
"src/build-system/helpers/run_with_lock.c"
|
|
"scripts/common/impl/if_diff.sh"
|
|
"scripts/common/impl/run_with_lock.sh"
|
|
"src/build-system/Makefile.configurables.real"
|
|
"src/build-system/Makefile.in.top"
|
|
"src/build-system/Makefile.meta.gmake=no"
|
|
"src/build-system/Makefile.meta.in"
|
|
"src/build-system/Makefile.meta_l"
|
|
"src/build-system/Makefile.meta_p"
|
|
"src/build-system/Makefile.meta_r"
|
|
"src/build-system/Makefile.mk.in"
|
|
"src/build-system/Makefile.requirements"
|
|
"src/build-system/Makefile.rules_with_autodep.in")
|
|
(find-files "scripts/common/check" "\\.sh$"))
|
|
(("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
|
|
(or (which* cmd) all)))
|
|
|
|
(substitute* (find-files "src/build-system" "^config.*")
|
|
(("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
|
|
(("^PATH=.*") ""))
|
|
|
|
;; rewrite "/var/tmp" in check script
|
|
(substitute* "scripts/common/check/check_make_unix.sh"
|
|
(("/var/tmp") "/tmp"))
|
|
|
|
;; do not reset PATH
|
|
(substitute* (find-files "scripts/common/impl/" "\\.sh$")
|
|
(("^ *PATH=.*") "")
|
|
(("action=/bin/") "action=")
|
|
(("export PATH") ":"))
|
|
#t))
|
|
(replace 'configure
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(let ((out (assoc-ref outputs "out"))
|
|
(lib (string-append (assoc-ref outputs "lib") "/lib"))
|
|
(include (string-append (assoc-ref outputs "include")
|
|
"/include/ncbi-tools++")))
|
|
;; The 'configure' script doesn't recognize things like
|
|
;; '--enable-fast-install'.
|
|
(invoke "./configure.orig"
|
|
(string-append "--with-build-root=" (getcwd) "/build")
|
|
(string-append "--prefix=" out)
|
|
(string-append "--libdir=" lib)
|
|
(string-append "--includedir=" include)
|
|
(string-append "--with-bz2="
|
|
(assoc-ref inputs "bzip2"))
|
|
(string-append "--with-z="
|
|
(assoc-ref inputs "zlib"))
|
|
(string-append "--with-pcre="
|
|
(assoc-ref inputs "pcre"))
|
|
;; Each library is built twice by default, once
|
|
;; with "-static" in its name, and again
|
|
;; without.
|
|
"--without-static"
|
|
"--with-dll")
|
|
#t))))))
|
|
(outputs '("out" ; 21 MB
|
|
"lib" ; 226 MB
|
|
"include")) ; 33 MB
|
|
(inputs
|
|
`(("bzip2" ,bzip2)
|
|
("lmdb" ,lmdb)
|
|
("zlib" ,zlib)
|
|
("pcre" ,pcre)
|
|
("perl" ,perl)
|
|
("python" ,python-wrapper)))
|
|
(native-inputs
|
|
`(("cpio" ,cpio)))
|
|
(home-page "https://blast.ncbi.nlm.nih.gov")
|
|
(synopsis "Basic local alignment search tool")
|
|
(description
|
|
"BLAST is a popular method of performing a DNA or protein sequence
|
|
similarity search, using heuristics to produce results quickly. It also
|
|
calculates an “expect value” that estimates how many matches would have
|
|
occurred at a given score by chance, which can aid a user in judging how much
|
|
confidence to have in an alignment.")
|
|
;; Most of the sources are in the public domain, with the following
|
|
;; exceptions:
|
|
;; * Expat:
|
|
;; * ./c++/include/util/bitset/
|
|
;; * ./c++/src/html/ncbi_menu*.js
|
|
;; * Boost license:
|
|
;; * ./c++/include/util/impl/floating_point_comparison.hpp
|
|
;; * LGPL 2+:
|
|
;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
|
|
;; * ASL 2.0:
|
|
;; * ./c++/src/corelib/teamcity_*
|
|
(license (list license:public-domain
|
|
license:expat
|
|
license:boost1.0
|
|
license:lgpl2.0+
|
|
license:asl2.0))))
|
|
|
|
(define-public bless
|
|
(package
|
|
(name "bless")
|
|
(version "1p02")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/bless-ec/bless.v"
|
|
version ".tgz"))
|
|
(sha256
|
|
(base32
|
|
"0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
`(begin
|
|
;; Remove bundled boost, pigz, zlib, and .git directory
|
|
;; FIXME: also remove bundled sources for murmurhash3 and
|
|
;; kmc once packaged.
|
|
(delete-file-recursively "boost")
|
|
(delete-file-recursively "pigz")
|
|
(delete-file-recursively "google-sparsehash")
|
|
(delete-file-recursively "zlib")
|
|
(delete-file-recursively ".git")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ;no "check" target
|
|
#:make-flags
|
|
(list (string-append "ZLIB="
|
|
(assoc-ref %build-inputs "zlib:static")
|
|
"/lib/libz.a")
|
|
(string-append "LDFLAGS="
|
|
(string-join '("-lboost_filesystem"
|
|
"-lboost_system"
|
|
"-lboost_iostreams"
|
|
"-lz"
|
|
"-fopenmp"))))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'do-not-build-bundled-pigz
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(substitute* "Makefile"
|
|
(("cd pigz/pigz-2.3.3; make") ""))
|
|
#t))
|
|
(add-after 'unpack 'patch-paths-to-executables
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
(substitute* "parse_args.cpp"
|
|
(("kmc_binary = .*")
|
|
(string-append "kmc_binary = \""
|
|
(assoc-ref outputs "out")
|
|
"/bin/kmc\";"))
|
|
(("pigz_binary = .*")
|
|
(string-append "pigz_binary = \""
|
|
(assoc-ref inputs "pigz")
|
|
"/bin/pigz\";")))
|
|
#t))
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
|
|
(for-each (lambda (file)
|
|
(install-file file bin))
|
|
'("bless" "kmc/bin/kmc"))
|
|
#t)))
|
|
(delete 'configure))))
|
|
(native-inputs
|
|
`(("perl" ,perl)))
|
|
(inputs
|
|
`(("openmpi" ,openmpi)
|
|
("boost" ,boost)
|
|
("sparsehash" ,sparsehash)
|
|
("pigz" ,pigz)
|
|
("zlib:static" ,zlib "static")
|
|
("zlib" ,zlib)))
|
|
(supported-systems '("x86_64-linux"))
|
|
(home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
|
|
(synopsis "Bloom-filter-based error correction tool for NGS reads")
|
|
(description
|
|
"@dfn{Bloom-filter-based error correction solution for high-throughput
|
|
sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
|
|
correction tool for genomic reads produced by @dfn{Next-generation
|
|
sequencing} (NGS). BLESS produces accurate correction results with much less
|
|
memory compared with previous solutions and is also able to tolerate a higher
|
|
false-positive rate. BLESS can extend reads like DNA assemblers to correct
|
|
errors at the end of reads.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bowtie
|
|
(package
|
|
(name "bowtie")
|
|
(version "2.3.4.3")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/BenLangmead/bowtie2.git")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
(substitute* "Makefile"
|
|
;; replace BUILD_HOST and BUILD_TIME for deterministic build
|
|
(("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
|
|
(("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:make-flags
|
|
(list "allall"
|
|
"WITH_TBB=1"
|
|
(string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'check
|
|
(lambda _
|
|
(invoke "perl"
|
|
"scripts/test/simple_tests.pl"
|
|
"--bowtie2=./bowtie2"
|
|
"--bowtie2-build=./bowtie2-build")
|
|
#t)))))
|
|
(inputs
|
|
`(("tbb" ,tbb)
|
|
("zlib" ,zlib)
|
|
("python" ,python-wrapper)))
|
|
(native-inputs
|
|
`(("perl" ,perl)
|
|
("perl-clone" ,perl-clone)
|
|
("perl-test-deep" ,perl-test-deep)
|
|
("perl-test-simple" ,perl-test-simple)))
|
|
(home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
|
|
(synopsis "Fast and sensitive nucleotide sequence read aligner")
|
|
(description
|
|
"Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
|
|
reads to long reference sequences. It is particularly good at aligning reads
|
|
of about 50 up to 100s or 1,000s of characters, and particularly good at
|
|
aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
|
|
genome with an FM Index to keep its memory footprint small: for the human
|
|
genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
|
|
gapped, local, and paired-end alignment modes.")
|
|
(supported-systems '("x86_64-linux"))
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bowtie1
|
|
(package
|
|
(name "bowtie1")
|
|
(version "1.2.3")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
|
|
version "/bowtie-src-x86_64.zip"))
|
|
(sha256
|
|
(base32
|
|
"0vmiqdhc9dzyfy9sh6vgi7k9xy2hiw8g87vbamnc6cgpm179zsa4"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(substitute* "Makefile"
|
|
;; replace BUILD_HOST and BUILD_TIME for deterministic build
|
|
(("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
|
|
(("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ; no "check" target
|
|
#:make-flags
|
|
(list "all"
|
|
(string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure))))
|
|
(inputs
|
|
`(("tbb" ,tbb)
|
|
("zlib" ,zlib)))
|
|
(supported-systems '("x86_64-linux"))
|
|
(home-page "http://bowtie-bio.sourceforge.net/index.shtml")
|
|
(synopsis "Fast aligner for short nucleotide sequence reads")
|
|
(description
|
|
"Bowtie is a fast, memory-efficient short read aligner. It aligns short
|
|
DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
|
|
reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
|
|
keep its memory footprint small: typically about 2.2 GB for the human
|
|
genome (2.9 GB for paired-end).")
|
|
(license license:artistic2.0)))
|
|
|
|
(define-public tophat
|
|
(package
|
|
(name "tophat")
|
|
(version "2.1.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://ccb.jhu.edu/software/tophat/downloads/tophat-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Remove bundled SeqAn and samtools
|
|
(delete-file-recursively "src/SeqAn-1.4.2")
|
|
(delete-file-recursively "src/samtools-0.1.18")
|
|
#t))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:parallel-build? #f ; not supported
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'use-system-samtools
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "src/Makefile.in"
|
|
(("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
|
|
(("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
|
|
(("SAMPROG = samtools_0\\.1\\.18") "")
|
|
(("\\$\\(samtools_0_1_18_SOURCES\\)") "")
|
|
(("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
|
|
(substitute* '("src/common.cpp"
|
|
"src/tophat.py")
|
|
(("samtools_0.1.18") (which "samtools")))
|
|
(substitute* '("src/common.h"
|
|
"src/bam2fastx.cpp")
|
|
(("#include \"bam.h\"") "#include <samtools/bam.h>")
|
|
(("#include \"sam.h\"") "#include <samtools/sam.h>"))
|
|
(substitute* '("src/bwt_map.h"
|
|
"src/map2gtf.h"
|
|
"src/align_status.h")
|
|
(("#include <bam.h>") "#include <samtools/bam.h>")
|
|
(("#include <sam.h>") "#include <samtools/sam.h>"))
|
|
#t)))))
|
|
(native-inputs
|
|
`(("gcc" ,gcc-5))) ;; doesn't build with later versions
|
|
(inputs
|
|
`(("boost" ,boost)
|
|
("bowtie" ,bowtie)
|
|
("ncurses" ,ncurses)
|
|
("perl" ,perl)
|
|
("python" ,python-2)
|
|
("samtools" ,samtools-0.1)
|
|
("seqan" ,seqan-1)
|
|
("zlib" ,zlib)))
|
|
(home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
|
|
(synopsis "Spliced read mapper for RNA-Seq data")
|
|
(description
|
|
"TopHat is a fast splice junction mapper for nucleotide sequence
|
|
reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
|
|
mammalian-sized genomes using the ultra high-throughput short read
|
|
aligner Bowtie, and then analyzes the mapping results to identify
|
|
splice junctions between exons.")
|
|
;; TopHat is released under the Boost Software License, Version 1.0
|
|
;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
|
|
(license license:boost1.0)))
|
|
|
|
(define-public bwa
|
|
(package
|
|
(name "bwa")
|
|
(version "0.7.17")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"https://github.com/lh3/bwa/releases/download/v"
|
|
version "/bwa-" version ".tar.bz2"))
|
|
(sha256
|
|
(base32
|
|
"1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ;no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(lib (string-append out "/lib"))
|
|
(doc (string-append out "/share/doc/bwa"))
|
|
(man (string-append out "/share/man/man1")))
|
|
(install-file "bwa" bin)
|
|
(install-file "libbwa.a" lib)
|
|
(install-file "README.md" doc)
|
|
(install-file "bwa.1" man))
|
|
#t))
|
|
;; no "configure" script
|
|
(delete 'configure))))
|
|
(inputs `(("zlib" ,zlib)))
|
|
;; Non-portable SSE instructions are used so building fails on platforms
|
|
;; other than x86_64.
|
|
(supported-systems '("x86_64-linux"))
|
|
(home-page "http://bio-bwa.sourceforge.net/")
|
|
(synopsis "Burrows-Wheeler sequence aligner")
|
|
(description
|
|
"BWA is a software package for mapping low-divergent sequences against a
|
|
large reference genome, such as the human genome. It consists of three
|
|
algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
|
|
designed for Illumina sequence reads up to 100bp, while the rest two for
|
|
longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
|
|
features such as long-read support and split alignment, but BWA-MEM, which is
|
|
the latest, is generally recommended for high-quality queries as it is faster
|
|
and more accurate. BWA-MEM also has better performance than BWA-backtrack for
|
|
70-100bp Illumina reads.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bwa-pssm
|
|
(package (inherit bwa)
|
|
(name "bwa-pssm")
|
|
(version "0.5.11")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/pkerpedjiev/bwa-pssm.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("gdsl" ,gdsl)
|
|
("zlib" ,zlib)
|
|
("perl" ,perl)))
|
|
(home-page "http://bwa-pssm.binf.ku.dk/")
|
|
(synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
|
|
(description
|
|
"BWA-PSSM is a probabilistic short genomic sequence read aligner based on
|
|
the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
|
|
existing aligners it is fast and sensitive. Unlike most other aligners,
|
|
however, it is also adaptible in the sense that one can direct the alignment
|
|
based on known biases within the data set. It is coded as a modification of
|
|
the original BWA alignment program and shares the genome index structure as
|
|
well as many of the command line options.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public bwa-meth
|
|
(package
|
|
(name "bwa-meth")
|
|
(version "0.2.2")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/brentp/bwa-meth.git")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'keep-references-to-bwa
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "bwameth.py"
|
|
(("bwa (mem|index)" _ command)
|
|
(string-append (which "bwa") " " command))
|
|
;; There's an ill-advised check for "samtools" on PATH.
|
|
(("^checkX.*") ""))
|
|
#t)))))
|
|
(inputs
|
|
`(("bwa" ,bwa)))
|
|
(native-inputs
|
|
`(("python-toolshed" ,python-toolshed)))
|
|
(home-page "https://github.com/brentp/bwa-meth")
|
|
(synopsis "Fast and accurante alignment of BS-Seq reads")
|
|
(description
|
|
"BWA-Meth works for single-end reads and for paired-end reads from the
|
|
directional protocol (most common). It uses the method employed by
|
|
methylcoder and Bismark of in silico conversion of all C's to T's in both
|
|
reference and reads. It recovers the original read (needed to tabulate
|
|
methylation) by attaching it as a comment which BWA appends as a tag to the
|
|
read. It performs favorably to existing aligners gauged by number of on and
|
|
off-target reads for a capture method that targets CpG-rich region.")
|
|
(license license:expat)))
|
|
|
|
(define-public python-bx-python
|
|
(package
|
|
(name "python-bx-python")
|
|
(version "0.8.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "bx-python" version))
|
|
(sha256
|
|
(base32
|
|
"11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
|
|
(build-system python-build-system)
|
|
;; Tests fail because test data are not included
|
|
(arguments '(#:tests? #f))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)
|
|
("python-six" ,python-six)))
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-lzo" ,python-lzo)
|
|
("python-nose" ,python-nose)
|
|
("python-cython" ,python-cython)))
|
|
(home-page "https://github.com/bxlab/bx-python")
|
|
(synopsis "Tools for manipulating biological data")
|
|
(description
|
|
"bx-python provides tools for manipulating biological data, particularly
|
|
multiple sequence alignments.")
|
|
(license license:expat)))
|
|
|
|
(define-public python2-bx-python
|
|
(package-with-python2 python-bx-python))
|
|
|
|
(define-public python-pysam
|
|
(package
|
|
(name "python-pysam")
|
|
(version "0.15.1")
|
|
(source (origin
|
|
(method git-fetch)
|
|
;; Test data is missing on PyPi.
|
|
(uri (git-reference
|
|
(url "https://github.com/pysam-developers/pysam.git")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
|
|
(modules '((guix build utils)))
|
|
(snippet '(begin
|
|
;; Drop bundled htslib. TODO: Also remove samtools
|
|
;; and bcftools.
|
|
(delete-file-recursively "htslib")
|
|
#t))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:modules ((ice-9 ftw)
|
|
(srfi srfi-26)
|
|
(guix build python-build-system)
|
|
(guix build utils))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-before 'build 'set-flags
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(setenv "HTSLIB_MODE" "external")
|
|
(setenv "HTSLIB_LIBRARY_DIR"
|
|
(string-append (assoc-ref inputs "htslib") "/lib"))
|
|
(setenv "HTSLIB_INCLUDE_DIR"
|
|
(string-append (assoc-ref inputs "htslib") "/include"))
|
|
(setenv "LDFLAGS" "-lncurses")
|
|
(setenv "CFLAGS" "-D_CURSES_LIB=1")
|
|
#t))
|
|
(replace 'check
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; This file contains tests that require a connection to the
|
|
;; internet.
|
|
(delete-file "tests/tabix_test.py")
|
|
;; FIXME: This test fails
|
|
(delete-file "tests/AlignmentFile_test.py")
|
|
;; Add first subdirectory of "build" directory to PYTHONPATH.
|
|
(setenv "PYTHONPATH"
|
|
(string-append
|
|
(getenv "PYTHONPATH")
|
|
":" (getcwd) "/build/"
|
|
(car (scandir "build"
|
|
(negate (cut string-prefix? "." <>))))))
|
|
;; Step out of source dir so python does not import from CWD.
|
|
(with-directory-excursion "tests"
|
|
(setenv "HOME" "/tmp")
|
|
(invoke "make" "-C" "pysam_data")
|
|
(invoke "make" "-C" "cbcf_data")
|
|
;; Running nosetests without explicitly asking for a single
|
|
;; process leads to a crash. Running with multiple processes
|
|
;; fails because the tests are not designed to run in parallel.
|
|
|
|
;; FIXME: tests keep timing out on some systems.
|
|
(invoke "nosetests" "-v" "--processes" "1")))))))
|
|
(propagated-inputs
|
|
`(("htslib" ,htslib))) ; Included from installed header files.
|
|
(inputs
|
|
`(("ncurses" ,ncurses)
|
|
("curl" ,curl)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
;; Dependencies below are are for tests only.
|
|
("samtools" ,samtools)
|
|
("bcftools" ,bcftools)
|
|
("python-nose" ,python-nose)))
|
|
(home-page "https://github.com/pysam-developers/pysam")
|
|
(synopsis "Python bindings to the SAMtools C API")
|
|
(description
|
|
"Pysam is a Python module for reading and manipulating files in the
|
|
SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
|
|
also includes an interface for tabix.")
|
|
(license license:expat)))
|
|
|
|
(define-public python2-pysam
|
|
(package-with-python2 python-pysam))
|
|
|
|
(define-public python-twobitreader
|
|
(package
|
|
(name "python-twobitreader")
|
|
(version "3.1.6")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/benjschiller/twobitreader")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
|
|
(build-system python-build-system)
|
|
;; Tests are not included
|
|
(arguments '(#:tests? #f))
|
|
(native-inputs
|
|
`(("python-sphinx" ,python-sphinx)))
|
|
(home-page "https://github.com/benjschiller/twobitreader")
|
|
(synopsis "Python library for reading .2bit files")
|
|
(description
|
|
"twobitreader is a Python library for reading .2bit files as used by the
|
|
UCSC genome browser.")
|
|
(license license:artistic2.0)))
|
|
|
|
(define-public python2-twobitreader
|
|
(package-with-python2 python-twobitreader))
|
|
|
|
(define-public python-plastid
|
|
(package
|
|
(name "python-plastid")
|
|
(version "0.4.8")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "plastid" version))
|
|
(sha256
|
|
(base32
|
|
"0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
;; Some test files are not included.
|
|
`(#:tests? #f))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)
|
|
("python-scipy" ,python-scipy)
|
|
("python-pandas" ,python-pandas)
|
|
("python-pysam" ,python-pysam)
|
|
("python-matplotlib" ,python-matplotlib)
|
|
("python-biopython" ,python-biopython)
|
|
("python-twobitreader" ,python-twobitreader)
|
|
("python-termcolor" ,python-termcolor)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-nose" ,python-nose)))
|
|
(home-page "https://github.com/joshuagryphon/plastid")
|
|
(synopsis "Python library for genomic analysis")
|
|
(description
|
|
"plastid is a Python library for genomic analysis – in particular,
|
|
high-throughput sequencing data – with an emphasis on simplicity.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public python2-plastid
|
|
(package-with-python2 python-plastid))
|
|
|
|
(define-public tetoolkit
|
|
(package
|
|
(name "tetoolkit")
|
|
(version "2.0.3")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/mhammell-laboratory/tetoolkit.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2 ; not guaranteed to work with Python 3
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'make-writable
|
|
(lambda _
|
|
(for-each make-file-writable (find-files "."))
|
|
#t))
|
|
(add-after 'unpack 'patch-invocations
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* '("bin/TEtranscripts"
|
|
"bin/TEcount")
|
|
(("'sort ")
|
|
(string-append "'" (which "sort") " "))
|
|
(("'rm -f ")
|
|
(string-append "'" (which "rm") " -f "))
|
|
(("'Rscript'") (string-append "'" (which "Rscript") "'")))
|
|
(substitute* "TEToolkit/IO/ReadInputs.py"
|
|
(("BamToBED") (which "bamToBed")))
|
|
(substitute* "TEToolkit/Normalization.py"
|
|
(("\"Rscript\"")
|
|
(string-append "\"" (which "Rscript") "\"")))
|
|
#t))
|
|
(add-after 'install 'wrap-program
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
;; Make sure the executables find R packages.
|
|
(let ((out (assoc-ref outputs "out")))
|
|
(for-each
|
|
(lambda (script)
|
|
(wrap-program (string-append out "/bin/" script)
|
|
`("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
|
|
'("TEtranscripts"
|
|
"TEcount")))
|
|
#t)))))
|
|
(inputs
|
|
`(("coreutils" ,coreutils)
|
|
("bedtools" ,bedtools)
|
|
("python-argparse" ,python2-argparse)
|
|
("python-pysam" ,python2-pysam)
|
|
("r-minimal" ,r-minimal)
|
|
("r-deseq2" ,r-deseq2)))
|
|
(home-page "https://github.com/mhammell-laboratory/tetoolkit")
|
|
(synopsis "Transposable elements in differential enrichment analysis")
|
|
(description
|
|
"This is package for including transposable elements in differential
|
|
enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
|
|
RNA-seq (and similar data) and annotates reads to both genes and transposable
|
|
elements. TEtranscripts then performs differential analysis using DESeq2.
|
|
Note that TEtranscripts and TEcount rely on specially curated GTF files, which
|
|
are not included due to their size.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public cd-hit
|
|
(package
|
|
(name "cd-hit")
|
|
(version "4.6.8")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "https://github.com/weizhongli/cdhit"
|
|
"/releases/download/V" version
|
|
"/cd-hit-v" version
|
|
"-2017-0621-source.tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f ; there are no tests
|
|
#:make-flags
|
|
;; Executables are copied directly to the PREFIX.
|
|
(list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
|
|
;; Support longer sequences (e.g. Pacbio sequences)
|
|
"MAX_SEQ=60000000")
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
;; No "configure" script
|
|
(delete 'configure)
|
|
;; Remove sources of non-determinism
|
|
(add-after 'unpack 'be-timeless
|
|
(lambda _
|
|
(substitute* "cdhit-utility.c++"
|
|
((" \\(built on \" __DATE__ \"\\)") ""))
|
|
(substitute* "cdhit-common.c++"
|
|
(("__DATE__") "\"0\"")
|
|
(("\", %s, \" __TIME__ \"\\\\n\", date") ""))
|
|
#t))
|
|
;; The "install" target does not create the target directory.
|
|
(add-before 'install 'create-target-dir
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
|
|
#t)))))
|
|
(inputs
|
|
`(("perl" ,perl)))
|
|
(home-page "http://weizhongli-lab.org/cd-hit/")
|
|
(synopsis "Cluster and compare protein or nucleotide sequences")
|
|
(description
|
|
"CD-HIT is a program for clustering and comparing protein or nucleotide
|
|
sequences. CD-HIT is designed to be fast and handle extremely large
|
|
databases.")
|
|
;; The manual says: "It can be copied under the GNU General Public License
|
|
;; version 2 (GPLv2)."
|
|
(license license:gpl2)))
|
|
|
|
(define-public clipper
|
|
(package
|
|
(name "clipper")
|
|
(version "1.2.1")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/YeoLab/clipper.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"0fja1rj84wp9vpj8rxpj3n8zqzcqq454m904yp9as1w4phccirjb"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; remove unnecessary setup dependency
|
|
(substitute* "setup.py"
|
|
(("setup_requires = .*") ""))
|
|
#t))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:python ,python-2 ; only Python 2 is supported
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
;; This is fixed in upstream commit
|
|
;; f6c2990198f906bf97730d95695b4bd5a6d01ddb.
|
|
(add-after 'unpack 'fix-typo
|
|
(lambda _
|
|
(substitute* "clipper/src/readsToWiggle.pyx"
|
|
(("^sc.*") ""))
|
|
#t)))))
|
|
(inputs
|
|
`(("htseq" ,python2-htseq)
|
|
("python-pybedtools" ,python2-pybedtools)
|
|
("python-cython" ,python2-cython)
|
|
("python-scikit-learn" ,python2-scikit-learn)
|
|
("python-matplotlib" ,python2-matplotlib)
|
|
("python-pandas" ,python2-pandas)
|
|
("python-pysam" ,python2-pysam)
|
|
("python-numpy" ,python2-numpy)
|
|
("python-scipy" ,python2-scipy)))
|
|
(native-inputs
|
|
`(("python-mock" ,python2-mock) ; for tests
|
|
("python-nose" ,python2-nose) ; for tests
|
|
("python-pytz" ,python2-pytz))) ; for tests
|
|
(home-page "https://github.com/YeoLab/clipper")
|
|
(synopsis "CLIP peak enrichment recognition")
|
|
(description
|
|
"CLIPper is a tool to define peaks in CLIP-seq datasets.")
|
|
(license license:gpl2)))
|
|
|
|
(define-public codingquarry
|
|
(package
|
|
(name "codingquarry")
|
|
(version "2.0")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"mirror://sourceforge/codingquarry/CodingQuarry_v"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
'(#:tests? #f ; no "check" target
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(replace 'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin"))
|
|
(doc (string-append out "/share/doc/codingquarry")))
|
|
(install-file "INSTRUCTIONS.pdf" doc)
|
|
(copy-recursively "QuarryFiles"
|
|
(string-append out "/QuarryFiles"))
|
|
(install-file "CodingQuarry" bin)
|
|
(install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
|
|
#t)))))
|
|
(inputs `(("openmpi" ,openmpi)))
|
|
(native-search-paths
|
|
(list (search-path-specification
|
|
(variable "QUARRY_PATH")
|
|
(files '("QuarryFiles")))))
|
|
(native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
|
|
(synopsis "Fungal gene predictor")
|
|
(description "CodingQuarry is a highly accurate, self-training GHMM fungal
|
|
gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
|
|
(home-page "https://sourceforge.net/projects/codingquarry/")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public couger
|
|
(package
|
|
(name "couger")
|
|
(version "1.8.2")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append
|
|
"http://couger.oit.duke.edu/static/assets/COUGER"
|
|
version ".zip"))
|
|
(sha256
|
|
(base32
|
|
"04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:tests? #f
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure)
|
|
(delete 'build)
|
|
(replace
|
|
'install
|
|
(lambda* (#:key outputs #:allow-other-keys)
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(bin (string-append out "/bin")))
|
|
(copy-recursively "src" (string-append out "/src"))
|
|
(mkdir bin)
|
|
;; Add "src" directory to module lookup path.
|
|
(substitute* "couger"
|
|
(("from argparse")
|
|
(string-append "import sys\nsys.path.append(\""
|
|
out "\")\nfrom argparse")))
|
|
(install-file "couger" bin))
|
|
#t))
|
|
(add-after
|
|
'install 'wrap-program
|
|
(lambda* (#:key inputs outputs #:allow-other-keys)
|
|
;; Make sure 'couger' runs with the correct PYTHONPATH.
|
|
(let* ((out (assoc-ref outputs "out"))
|
|
(path (getenv "PYTHONPATH")))
|
|
(wrap-program (string-append out "/bin/couger")
|
|
`("PYTHONPATH" ":" prefix (,path))))
|
|
#t)))))
|
|
(inputs
|
|
`(("python" ,python-2)
|
|
("python2-pillow" ,python2-pillow)
|
|
("python2-numpy" ,python2-numpy)
|
|
("python2-scipy" ,python2-scipy)
|
|
("python2-matplotlib" ,python2-matplotlib)))
|
|
(propagated-inputs
|
|
`(("r-minimal" ,r-minimal)
|
|
("libsvm" ,libsvm)
|
|
("randomjungle" ,randomjungle)))
|
|
(native-inputs
|
|
`(("unzip" ,unzip)))
|
|
(home-page "http://couger.oit.duke.edu")
|
|
(synopsis "Identify co-factors in sets of genomic regions")
|
|
(description
|
|
"COUGER can be applied to any two sets of genomic regions bound by
|
|
paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
|
|
putative co-factors that provide specificity to each TF. The framework
|
|
determines the genomic targets uniquely-bound by each TF, and identifies a
|
|
small set of co-factors that best explain the in vivo binding differences
|
|
between the two TFs.
|
|
|
|
COUGER uses classification algorithms (support vector machines and random
|
|
forests) with features that reflect the DNA binding specificities of putative
|
|
co-factors. The features are generated either from high-throughput TF-DNA
|
|
binding data (from protein binding microarray experiments), or from large
|
|
collections of DNA motifs.")
|
|
(license license:gpl3+)))
|
|
|
|
(define-public clustal-omega
|
|
(package
|
|
(name "clustal-omega")
|
|
(version "1.2.4")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (string-append "http://www.clustal.org/omega/clustal-omega-"
|
|
version ".tar.gz"))
|
|
(sha256
|
|
(base32
|
|
"1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
|
|
(build-system gnu-build-system)
|
|
(inputs
|
|
`(("argtable" ,argtable)))
|
|
(home-page "http://www.clustal.org/omega/")
|
|
(synopsis "Multiple sequence aligner for protein and DNA/RNA")
|
|
(description
|
|
"Clustal-Omega is a general purpose multiple sequence alignment (MSA)
|
|
program for protein and DNA/RNA. It produces high quality MSAs and is capable
|
|
of handling data-sets of hundreds of thousands of sequences in reasonable
|
|
time.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public crossmap
|
|
(package
|
|
(name "crossmap")
|
|
(version "0.3.8")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "CrossMap" version))
|
|
(sha256
|
|
(base32
|
|
"1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
|
|
(build-system python-build-system)
|
|
(inputs
|
|
`(("python-bx-python" ,python-bx-python)
|
|
("python-numpy" ,python-numpy)
|
|
("python-pybigwig" ,python-pybigwig)
|
|
("python-pysam" ,python-pysam)
|
|
("zlib" ,zlib)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-nose" ,python-nose)))
|
|
(home-page "http://crossmap.sourceforge.net/")
|
|
(synopsis "Convert genome coordinates between assemblies")
|
|
(description
|
|
"CrossMap is a program for conversion of genome coordinates or annotation
|
|
files between different genome assemblies. It supports most commonly used
|
|
file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
|
|
(license license:gpl2+)))
|
|
|
|
(define-public python-dnaio
|
|
(package
|
|
(name "python-dnaio")
|
|
(version "0.3")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "dnaio" version))
|
|
(sha256
|
|
(base32
|
|
"0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
|
|
(build-system python-build-system)
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-pytest" ,python-pytest)
|
|
("python-xopen" ,python-xopen)))
|
|
(home-page "https://github.com/marcelm/dnaio/")
|
|
(synopsis "Read FASTA and FASTQ files efficiently")
|
|
(description
|
|
"dnaio is a Python library for fast parsing of FASTQ and also FASTA
|
|
files. The code was previously part of the cutadapt tool.")
|
|
(license license:expat)))
|
|
|
|
(define-public python-deeptoolsintervals
|
|
(package
|
|
(name "python-deeptoolsintervals")
|
|
(version "0.1.9")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "deeptoolsintervals" version))
|
|
(sha256
|
|
(base32
|
|
"1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
|
|
(build-system python-build-system)
|
|
(inputs
|
|
`(("zlib" ,zlib)))
|
|
(home-page "https://github.com/deeptools/deeptools_intervals")
|
|
(synopsis "Create GTF-based interval trees with associated meta-data")
|
|
(description
|
|
"This package provides a Python module creating/accessing GTF-based
|
|
interval trees with associated meta-data. It is primarily used by the
|
|
@code{deeptools} package.")
|
|
(license license:expat)))
|
|
|
|
(define-public python-deeptools
|
|
(package
|
|
(name "python-deeptools")
|
|
(version "3.4.3")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "deepTools" version))
|
|
(sha256
|
|
(base32
|
|
"1azgjniss5ff6a90nicdjkxyjwqmi3gzfn09gra42hwlz19hipxb"))))
|
|
(build-system python-build-system)
|
|
(propagated-inputs
|
|
`(("python-matplotlib" ,python-matplotlib)
|
|
("python-numpy" ,python-numpy)
|
|
("python-numpydoc" ,python-numpydoc)
|
|
("python-py2bit" ,python-py2bit)
|
|
("python-pybigwig" ,python-pybigwig)
|
|
("python-pysam" ,python-pysam)
|
|
("python-scipy" ,python-scipy)
|
|
("python-deeptoolsintervals" ,python-deeptoolsintervals)
|
|
("python-plotly" ,python-plotly)))
|
|
(home-page "https://pypi.org/project/deepTools/")
|
|
(synopsis "Useful tools for exploring deep sequencing data")
|
|
(description "This package addresses the challenge of handling large amounts
|
|
of data that are now routinely generated from DNA sequencing centers.
|
|
@code{deepTools} contains useful modules to process the mapped reads data for
|
|
multiple quality checks, creating normalized coverage files in standard bedGraph
|
|
and bigWig file formats, that allow comparison between different files. Finally,
|
|
using such normalized and standardized files, deepTools can create many
|
|
publication-ready visualizations to identify enrichments and for functional
|
|
annotations of the genome.")
|
|
;; The file deeptools/cm.py is licensed under the BSD license. The
|
|
;; remainder of the code is licensed under the MIT license.
|
|
(license (list license:bsd-3 license:expat))))
|
|
|
|
(define-public cutadapt
|
|
(package
|
|
(name "cutadapt")
|
|
(version "2.1")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "cutadapt" version))
|
|
(sha256
|
|
(base32
|
|
"1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
|
|
(build-system python-build-system)
|
|
(inputs
|
|
`(("python-dnaio" ,python-dnaio)
|
|
("python-xopen" ,python-xopen)))
|
|
(native-inputs
|
|
`(("python-cython" ,python-cython)
|
|
("python-pytest" ,python-pytest)
|
|
("python-setuptools-scm" ,python-setuptools-scm)))
|
|
(home-page "https://cutadapt.readthedocs.io/en/stable/")
|
|
(synopsis "Remove adapter sequences from nucleotide sequencing reads")
|
|
(description
|
|
"Cutadapt finds and removes adapter sequences, primers, poly-A tails and
|
|
other types of unwanted sequence from high-throughput sequencing reads.")
|
|
(license license:expat)))
|
|
|
|
(define-public libbigwig
|
|
(package
|
|
(name "libbigwig")
|
|
(version "0.4.4")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/dpryan79/libBigWig.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
|
|
(build-system gnu-build-system)
|
|
(arguments
|
|
`(#:test-target "test"
|
|
#:tests? #f ; tests require access to the web
|
|
#:make-flags
|
|
(list "CC=gcc"
|
|
(string-append "prefix=" (assoc-ref %outputs "out")))
|
|
#:phases
|
|
(modify-phases %standard-phases
|
|
(delete 'configure))))
|
|
(inputs
|
|
`(("zlib" ,zlib)
|
|
("curl" ,curl)))
|
|
(native-inputs
|
|
`(("doxygen" ,doxygen)
|
|
;; Need for tests
|
|
("python" ,python-2)))
|
|
(home-page "https://github.com/dpryan79/libBigWig")
|
|
(synopsis "C library for handling bigWig files")
|
|
(description
|
|
"This package provides a C library for parsing local and remote BigWig
|
|
files.")
|
|
(license license:expat)))
|
|
|
|
(define-public python-pybigwig
|
|
(package
|
|
(name "python-pybigwig")
|
|
(version "0.3.17")
|
|
(source (origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "pyBigWig" version))
|
|
(sha256
|
|
(base32
|
|
"157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Delete bundled libBigWig sources
|
|
(delete-file-recursively "libBigWig")
|
|
#t))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'link-with-libBigWig
|
|
(lambda* (#:key inputs #:allow-other-keys)
|
|
(substitute* "setup.py"
|
|
(("libs=\\[") "libs=[\"BigWig\", "))
|
|
#t)))))
|
|
(propagated-inputs
|
|
`(("python-numpy" ,python-numpy)))
|
|
(inputs
|
|
`(("libbigwig" ,libbigwig)
|
|
("zlib" ,zlib)
|
|
("curl" ,curl)))
|
|
(home-page "https://github.com/dpryan79/pyBigWig")
|
|
(synopsis "Access bigWig files in Python using libBigWig")
|
|
(description
|
|
"This package provides Python bindings to the libBigWig library for
|
|
accessing bigWig files.")
|
|
(license license:expat)))
|
|
|
|
(define-public python2-pybigwig
|
|
(package-with-python2 python-pybigwig))
|
|
|
|
(define-public python-dendropy
|
|
(package
|
|
(name "python-dendropy")
|
|
(version "4.4.0")
|
|
(source
|
|
(origin
|
|
(method git-fetch)
|
|
;; Source from GitHub so that tests are included.
|
|
(uri (git-reference
|
|
(url "https://github.com/jeetsukumaran/DendroPy.git")
|
|
(commit (string-append "v" version))))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
|
|
(build-system python-build-system)
|
|
(home-page "https://dendropy.org/")
|
|
(synopsis "Library for phylogenetics and phylogenetic computing")
|
|
(description
|
|
"DendroPy is a library for phylogenetics and phylogenetic computing: reading,
|
|
writing, simulation, processing and manipulation of phylogenetic
|
|
trees (phylogenies) and characters.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public python2-dendropy
|
|
(let ((base (package-with-python2 python-dendropy)))
|
|
(package
|
|
(inherit base)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
(add-after 'unpack 'remove-failing-test
|
|
(lambda _
|
|
;; This test fails when the full test suite is run, as documented
|
|
;; at https://github.com/jeetsukumaran/DendroPy/issues/74
|
|
(substitute* "tests/test_dataio_nexml_reader_tree_list.py"
|
|
(("test_collection_comments_and_annotations")
|
|
"do_not_test_collection_comments_and_annotations"))
|
|
#t)))
|
|
,@(package-arguments base))))))
|
|
|
|
(define-public python-py2bit
|
|
(package
|
|
(name "python-py2bit")
|
|
(version "0.3.0")
|
|
(source
|
|
(origin
|
|
(method url-fetch)
|
|
(uri (pypi-uri "py2bit" version))
|
|
(sha256
|
|
(base32
|
|
"1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
|
|
(build-system python-build-system)
|
|
(home-page "https://github.com/dpryan79/py2bit")
|
|
(synopsis "Access 2bit files using lib2bit")
|
|
(description
|
|
"This package provides Python bindings for lib2bit to access 2bit files
|
|
with Python.")
|
|
(license license:expat)))
|
|
|
|
(define-public deeptools
|
|
(package
|
|
(name "deeptools")
|
|
(version "3.1.3")
|
|
(source (origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/deeptools/deepTools.git")
|
|
(commit version)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"1vggnf52g6q2vifdl4cyi7s2fnfqq0ky2zrkj5zv2qfzsc3p3siw"))))
|
|
(build-system python-build-system)
|
|
(arguments
|
|
`(#:phases
|
|
(modify-phases %standard-phases
|
|
;; This phase fails, but it's not needed.
|
|
(delete 'reset-gzip-timestamps))))
|
|
(inputs
|
|
`(("python-plotly" ,python-plotly)
|
|
("python-scipy" ,python-scipy)
|
|
("python-numpy" ,python-numpy)
|
|
("python-numpydoc" ,python-numpydoc)
|
|
("python-matplotlib" ,python-matplotlib)
|
|
("python-pysam" ,python-pysam)
|
|
("python-py2bit" ,python-py2bit)
|
|
("python-pybigwig" ,python-pybigwig)))
|
|
(native-inputs
|
|
`(("python-mock" ,python-mock) ;for tests
|
|
("python-nose" ,python-nose) ;for tests
|
|
("python-pytz" ,python-pytz))) ;for tests
|
|
(home-page "https://github.com/deeptools/deepTools")
|
|
(synopsis "Tools for normalizing and visualizing deep-sequencing data")
|
|
(description
|
|
"DeepTools addresses the challenge of handling the large amounts of data
|
|
that are now routinely generated from DNA sequencing centers. To do so,
|
|
deepTools contains useful modules to process the mapped reads data to create
|
|
coverage files in standard bedGraph and bigWig file formats. By doing so,
|
|
deepTools allows the creation of normalized coverage files or the comparison
|
|
between two files (for example, treatment and control). Finally, using such
|
|
normalized and standardized files, multiple visualizations can be created to
|
|
identify enrichments with functional annotations of the genome.")
|
|
|