You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

15771 lines
625 KiB

  1. ;;; GNU Guix --- Functional package management for GNU
  2. ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
  3. ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
  4. ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
  5. ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
  6. ;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org>
  7. ;;; Copyright © 2016, 2017, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
  8. ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
  9. ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
  10. ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
  11. ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
  12. ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
  13. ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
  14. ;;; Copyright © 2018, 2019 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
  15. ;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
  16. ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
  17. ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
  18. ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
  19. ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
  20. ;;;
  21. ;;; This file is part of GNU Guix.
  22. ;;;
  23. ;;; GNU Guix is free software; you can redistribute it and/or modify it
  24. ;;; under the terms of the GNU General Public License as published by
  25. ;;; the Free Software Foundation; either version 3 of the License, or (at
  26. ;;; your option) any later version.
  27. ;;;
  28. ;;; GNU Guix is distributed in the hope that it will be useful, but
  29. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  30. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  31. ;;; GNU General Public License for more details.
  32. ;;;
  33. ;;; You should have received a copy of the GNU General Public License
  34. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
  35. (define-module (gnu packages bioinformatics)
  36. #:use-module ((guix licenses) #:prefix license:)
  37. #:use-module (guix packages)
  38. #:use-module (guix utils)
  39. #:use-module (guix download)
  40. #:use-module (guix git-download)
  41. #:use-module (guix hg-download)
  42. #:use-module (guix build-system ant)
  43. #:use-module (guix build-system gnu)
  44. #:use-module (guix build-system cmake)
  45. #:use-module (guix build-system go)
  46. #:use-module (guix build-system haskell)
  47. #:use-module (guix build-system meson)
  48. #:use-module (guix build-system ocaml)
  49. #:use-module (guix build-system perl)
  50. #:use-module (guix build-system python)
  51. #:use-module (guix build-system r)
  52. #:use-module (guix build-system ruby)
  53. #:use-module (guix build-system scons)
  54. #:use-module (guix build-system trivial)
  55. #:use-module (gnu packages)
  56. #:use-module (gnu packages autotools)
  57. #:use-module (gnu packages algebra)
  58. #:use-module (gnu packages base)
  59. #:use-module (gnu packages bash)
  60. #:use-module (gnu packages bison)
  61. #:use-module (gnu packages bioconductor)
  62. #:use-module (gnu packages boost)
  63. #:use-module (gnu packages check)
  64. #:use-module (gnu packages code)
  65. #:use-module (gnu packages compression)
  66. #:use-module (gnu packages cpio)
  67. #:use-module (gnu packages cran)
  68. #:use-module (gnu packages curl)
  69. #:use-module (gnu packages documentation)
  70. #:use-module (gnu packages databases)
  71. #:use-module (gnu packages datastructures)
  72. #:use-module (gnu packages dlang)
  73. #:use-module (gnu packages file)
  74. #:use-module (gnu packages flex)
  75. #:use-module (gnu packages gawk)
  76. #:use-module (gnu packages gcc)
  77. #:use-module (gnu packages gd)
  78. #:use-module (gnu packages golang)
  79. #:use-module (gnu packages glib)
  80. #:use-module (gnu packages graph)
  81. #:use-module (gnu packages groff)
  82. #:use-module (gnu packages gtk)
  83. #:use-module (gnu packages guile)
  84. #:use-module (gnu packages guile-xyz)
  85. #:use-module (gnu packages haskell-check)
  86. #:use-module (gnu packages haskell-web)
  87. #:use-module (gnu packages haskell-xyz)
  88. #:use-module (gnu packages image)
  89. #:use-module (gnu packages imagemagick)
  90. #:use-module (gnu packages java)
  91. #:use-module (gnu packages java-compression)
  92. #:use-module (gnu packages jemalloc)
  93. #:use-module (gnu packages linux)
  94. #:use-module (gnu packages lisp-xyz)
  95. #:use-module (gnu packages logging)
  96. #:use-module (gnu packages machine-learning)
  97. #:use-module (gnu packages man)
  98. #:use-module (gnu packages maths)
  99. #:use-module (gnu packages mpi)
  100. #:use-module (gnu packages ncurses)
  101. #:use-module (gnu packages ocaml)
  102. #:use-module (gnu packages pcre)
  103. #:use-module (gnu packages parallel)
  104. #:use-module (gnu packages pdf)
  105. #:use-module (gnu packages perl)
  106. #:use-module (gnu packages perl-check)
  107. #:use-module (gnu packages pkg-config)
  108. #:use-module (gnu packages popt)
  109. #:use-module (gnu packages protobuf)
  110. #:use-module (gnu packages python)
  111. #:use-module (gnu packages python-compression)
  112. #:use-module (gnu packages python-science)
  113. #:use-module (gnu packages python-web)
  114. #:use-module (gnu packages python-xyz)
  115. #:use-module (gnu packages readline)
  116. #:use-module (gnu packages ruby)
  117. #:use-module (gnu packages serialization)
  118. #:use-module (gnu packages shells)
  119. #:use-module (gnu packages sphinx)
  120. #:use-module (gnu packages statistics)
  121. #:use-module (gnu packages swig)
  122. #:use-module (gnu packages tbb)
  123. #:use-module (gnu packages tex)
  124. #:use-module (gnu packages texinfo)
  125. #:use-module (gnu packages textutils)
  126. #:use-module (gnu packages time)
  127. #:use-module (gnu packages tls)
  128. #:use-module (gnu packages vim)
  129. #:use-module (gnu packages web)
  130. #:use-module (gnu packages xml)
  131. #:use-module (gnu packages xorg)
  132. #:use-module (srfi srfi-1)
  133. #:use-module (ice-9 match))
  134. (define-public aragorn
  135. (package
  136. (name "aragorn")
  137. (version "1.2.38")
  138. (source (origin
  139. (method url-fetch)
  140. (uri (string-append
  141. "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
  142. version ".tgz"))
  143. (sha256
  144. (base32
  145. "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
  146. (build-system gnu-build-system)
  147. (arguments
  148. `(#:tests? #f ; there are no tests
  149. #:phases
  150. (modify-phases %standard-phases
  151. (delete 'configure)
  152. (replace 'build
  153. (lambda _
  154. (invoke "gcc"
  155. "-O3"
  156. "-ffast-math"
  157. "-finline-functions"
  158. "-o"
  159. "aragorn"
  160. (string-append "aragorn" ,version ".c"))
  161. #t))
  162. (replace 'install
  163. (lambda* (#:key outputs #:allow-other-keys)
  164. (let* ((out (assoc-ref outputs "out"))
  165. (bin (string-append out "/bin"))
  166. (man (string-append out "/share/man/man1")))
  167. (install-file "aragorn" bin)
  168. (install-file "aragorn.1" man))
  169. #t)))))
  170. (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
  171. (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
  172. (description
  173. "Aragorn identifies transfer RNA, mitochondrial RNA and
  174. transfer-messenger RNA from nucleotide sequences, based on homology to known
  175. tRNA consensus sequences and RNA structure. It also outputs the secondary
  176. structure of the predicted RNA.")
  177. (license license:gpl2)))
  178. (define-public bamm
  179. (package
  180. (name "bamm")
  181. (version "1.7.3")
  182. (source (origin
  183. (method git-fetch)
  184. ;; BamM is not available on pypi.
  185. (uri (git-reference
  186. (url "https://github.com/Ecogenomics/BamM.git")
  187. (commit version)
  188. (recursive? #t)))
  189. (file-name (git-file-name name version))
  190. (sha256
  191. (base32
  192. "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
  193. (modules '((guix build utils)))
  194. (snippet
  195. `(begin
  196. ;; Delete bundled htslib.
  197. (delete-file-recursively "c/htslib-1.3.1")
  198. #t))))
  199. (build-system python-build-system)
  200. (arguments
  201. `(#:python ,python-2 ; BamM is Python 2 only.
  202. ;; Do not use bundled libhts. Do use the bundled libcfu because it has
  203. ;; been modified from its original form.
  204. #:configure-flags
  205. (let ((htslib (assoc-ref %build-inputs "htslib")))
  206. (list "--with-libhts-lib" (string-append htslib "/lib")
  207. "--with-libhts-inc" (string-append htslib "/include/htslib")))
  208. #:phases
  209. (modify-phases %standard-phases
  210. (add-after 'unpack 'autogen
  211. (lambda _
  212. (with-directory-excursion "c"
  213. (let ((sh (which "sh")))
  214. (for-each make-file-writable (find-files "." ".*"))
  215. ;; Use autogen so that 'configure' works.
  216. (substitute* "autogen.sh" (("/bin/sh") sh))
  217. (setenv "CONFIG_SHELL" sh)
  218. (invoke "./autogen.sh")))
  219. #t))
  220. (delete 'build)
  221. ;; Run tests after installation so compilation only happens once.
  222. (delete 'check)
  223. (add-after 'install 'wrap-executable
  224. (lambda* (#:key outputs #:allow-other-keys)
  225. (let* ((out (assoc-ref outputs "out"))
  226. (path (getenv "PATH")))
  227. (wrap-program (string-append out "/bin/bamm")
  228. `("PATH" ":" prefix (,path))))
  229. #t))
  230. (add-after 'wrap-executable 'post-install-check
  231. (lambda* (#:key inputs outputs #:allow-other-keys)
  232. (setenv "PATH"
  233. (string-append (assoc-ref outputs "out")
  234. "/bin:"
  235. (getenv "PATH")))
  236. (setenv "PYTHONPATH"
  237. (string-append
  238. (assoc-ref outputs "out")
  239. "/lib/python"
  240. (string-take (string-take-right
  241. (assoc-ref inputs "python") 5) 3)
  242. "/site-packages:"
  243. (getenv "PYTHONPATH")))
  244. ;; There are 2 errors printed, but they are safe to ignore:
  245. ;; 1) [E::hts_open_format] fail to open file ...
  246. ;; 2) samtools view: failed to open ...
  247. (invoke "nosetests")
  248. #t)))))
  249. (native-inputs
  250. `(("autoconf" ,autoconf)
  251. ("automake" ,automake)
  252. ("libtool" ,libtool)
  253. ("zlib" ,zlib)
  254. ("python-nose" ,python2-nose)
  255. ("python-pysam" ,python2-pysam)))
  256. (inputs
  257. `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
  258. ("samtools" ,samtools)
  259. ("bwa" ,bwa)
  260. ("grep" ,grep)
  261. ("sed" ,sed)
  262. ("coreutils" ,coreutils)))
  263. (propagated-inputs
  264. `(("python-numpy" ,python2-numpy)))
  265. (home-page "https://ecogenomics.github.io/BamM/")
  266. (synopsis "Metagenomics-focused BAM file manipulator")
  267. (description
  268. "BamM is a C library, wrapped in python, to efficiently generate and
  269. parse BAM files, specifically for the analysis of metagenomic data. For
  270. instance, it implements several methods to assess contig-wise read coverage.")
  271. (license license:lgpl3+)))
  272. (define-public bamtools
  273. (package
  274. (name "bamtools")
  275. (version "2.5.1")
  276. (source (origin
  277. (method git-fetch)
  278. (uri (git-reference
  279. (url "https://github.com/pezmaster31/bamtools.git")
  280. (commit (string-append "v" version))))
  281. (file-name (git-file-name name version))
  282. (sha256
  283. (base32
  284. "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
  285. (build-system cmake-build-system)
  286. (arguments
  287. `(#:tests? #f ;no "check" target
  288. #:phases
  289. (modify-phases %standard-phases
  290. (add-before
  291. 'configure 'set-ldflags
  292. (lambda* (#:key outputs #:allow-other-keys)
  293. (setenv "LDFLAGS"
  294. (string-append
  295. "-Wl,-rpath="
  296. (assoc-ref outputs "out") "/lib/bamtools"))
  297. #t)))))
  298. (inputs `(("zlib" ,zlib)))
  299. (home-page "https://github.com/pezmaster31/bamtools")
  300. (synopsis "C++ API and command-line toolkit for working with BAM data")
  301. (description
  302. "BamTools provides both a C++ API and a command-line toolkit for handling
  303. BAM files.")
  304. (license license:expat)))
  305. (define-public bcftools
  306. (package
  307. (name "bcftools")
  308. (version "1.9")
  309. (source (origin
  310. (method url-fetch)
  311. (uri (string-append "https://github.com/samtools/bcftools/"
  312. "releases/download/"
  313. version "/bcftools-" version ".tar.bz2"))
  314. (sha256
  315. (base32
  316. "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
  317. (modules '((guix build utils)))
  318. (snippet '(begin
  319. ;; Delete bundled htslib.
  320. (delete-file-recursively "htslib-1.9")
  321. #t))))
  322. (build-system gnu-build-system)
  323. (arguments
  324. `(#:configure-flags
  325. (list "--enable-libgsl")
  326. #:test-target "test"
  327. #:phases
  328. (modify-phases %standard-phases
  329. (add-before 'check 'patch-tests
  330. (lambda _
  331. (substitute* "test/test.pl"
  332. (("/bin/bash") (which "bash")))
  333. #t)))))
  334. (native-inputs
  335. `(("htslib" ,htslib)
  336. ("perl" ,perl)))
  337. (inputs
  338. `(("gsl" ,gsl)
  339. ("zlib" ,zlib)))
  340. (home-page "https://samtools.github.io/bcftools/")
  341. (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
  342. (description
  343. "BCFtools is a set of utilities that manipulate variant calls in the
  344. Variant Call Format (VCF) and its binary counterpart BCF. All commands work
  345. transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
  346. ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
  347. (license (list license:gpl3+ license:expat))))
  348. (define-public bedops
  349. (package
  350. (name "bedops")
  351. (version "2.4.35")
  352. (source (origin
  353. (method git-fetch)
  354. (uri (git-reference
  355. (url "https://github.com/bedops/bedops.git")
  356. (commit (string-append "v" version))))
  357. (file-name (git-file-name name version))
  358. (sha256
  359. (base32
  360. "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
  361. (build-system gnu-build-system)
  362. (arguments
  363. '(#:tests? #f
  364. #:make-flags (list (string-append "BINDIR=" %output "/bin"))
  365. #:phases
  366. (modify-phases %standard-phases
  367. (add-after 'unpack 'unpack-tarballs
  368. (lambda _
  369. ;; FIXME: Bedops includes tarballs of minimally patched upstream
  370. ;; libraries jansson, zlib, and bzip2. We cannot just use stock
  371. ;; libraries because at least one of the libraries (zlib) is
  372. ;; patched to add a C++ function definition (deflateInit2cpp).
  373. ;; Until the Bedops developers offer a way to link against system
  374. ;; libraries we have to build the in-tree copies of these three
  375. ;; libraries.
  376. ;; See upstream discussion:
  377. ;; https://github.com/bedops/bedops/issues/124
  378. ;; Unpack the tarballs to benefit from shebang patching.
  379. (with-directory-excursion "third-party"
  380. (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
  381. (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
  382. (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
  383. ;; Disable unpacking of tarballs in Makefile.
  384. (substitute* "system.mk/Makefile.linux"
  385. (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
  386. (("\\./configure") "CONFIG_SHELL=bash ./configure"))
  387. (substitute* "third-party/zlib-1.2.7/Makefile.in"
  388. (("^SHELL=.*$") "SHELL=bash\n"))
  389. #t))
  390. (delete 'configure))))
  391. (home-page "https://github.com/bedops/bedops")
  392. (synopsis "Tools for high-performance genomic feature operations")
  393. (description
  394. "BEDOPS is a suite of tools to address common questions raised in genomic
  395. studies---mostly with regard to overlap and proximity relationships between
  396. data sets. It aims to be scalable and flexible, facilitating the efficient
  397. and accurate analysis and management of large-scale genomic data.
  398. BEDOPS provides tools that perform highly efficient and scalable Boolean and
  399. other set operations, statistical calculations, archiving, conversion and
  400. other management of genomic data of arbitrary scale. Tasks can be easily
  401. split by chromosome for distributing whole-genome analyses across a
  402. computational cluster.")
  403. (license license:gpl2+)))
  404. (define-public bedtools
  405. (package
  406. (name "bedtools")
  407. (version "2.29.2")
  408. (source (origin
  409. (method url-fetch)
  410. (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
  411. "download/v" version "/"
  412. "bedtools-" version ".tar.gz"))
  413. (sha256
  414. (base32
  415. "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
  416. (build-system gnu-build-system)
  417. (arguments
  418. '(#:test-target "test"
  419. #:make-flags
  420. (list (string-append "prefix=" (assoc-ref %outputs "out")))
  421. #:phases
  422. (modify-phases %standard-phases
  423. (delete 'configure))))
  424. (native-inputs
  425. `(("python" ,python-wrapper)))
  426. (inputs
  427. `(("samtools" ,samtools)
  428. ("zlib" ,zlib)))
  429. (home-page "https://github.com/arq5x/bedtools2")
  430. (synopsis "Tools for genome analysis and arithmetic")
  431. (description
  432. "Collectively, the bedtools utilities are a swiss-army knife of tools for
  433. a wide-range of genomics analysis tasks. The most widely-used tools enable
  434. genome arithmetic: that is, set theory on the genome. For example, bedtools
  435. allows one to intersect, merge, count, complement, and shuffle genomic
  436. intervals from multiple files in widely-used genomic file formats such as BAM,
  437. BED, GFF/GTF, VCF.")
  438. (license license:expat)))
  439. ;; Later releases of bedtools produce files with more columns than
  440. ;; what Ribotaper expects.
  441. (define-public bedtools-2.18
  442. (package (inherit bedtools)
  443. (name "bedtools")
  444. (version "2.18.0")
  445. (source (origin
  446. (method url-fetch)
  447. (uri (string-append "https://github.com/arq5x/bedtools2/"
  448. "releases/download/v" version
  449. "/bedtools-" version ".tar.gz"))
  450. (sha256
  451. (base32
  452. "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
  453. (arguments
  454. '(#:test-target "test"
  455. #:phases
  456. (modify-phases %standard-phases
  457. (delete 'configure)
  458. (replace 'install
  459. (lambda* (#:key outputs #:allow-other-keys)
  460. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  461. (for-each (lambda (file)
  462. (install-file file bin))
  463. (find-files "bin" ".*")))
  464. #t)))))))
  465. ;; Needed for pybedtools.
  466. (define-public bedtools-2.26
  467. (package (inherit bedtools)
  468. (name "bedtools")
  469. (version "2.26.0")
  470. (source (origin
  471. (method url-fetch)
  472. (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
  473. "download/v" version "/"
  474. "bedtools-" version ".tar.gz"))
  475. (sha256
  476. (base32
  477. "0jhavwifnf7lmkb11h9y7dynr8d699h0rd2l52j1pfgircr2zwv5"))))))
  478. (define-public pbbam
  479. (package
  480. (name "pbbam")
  481. (version "0.23.0")
  482. (source (origin
  483. (method git-fetch)
  484. (uri (git-reference
  485. (url "https://github.com/PacificBiosciences/pbbam.git")
  486. (commit version)))
  487. (file-name (git-file-name name version))
  488. (sha256
  489. (base32
  490. "0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
  491. (build-system meson-build-system)
  492. (arguments
  493. `(#:phases
  494. (modify-phases %standard-phases
  495. (add-after 'unpack 'find-googletest
  496. (lambda* (#:key inputs #:allow-other-keys)
  497. ;; It doesn't find gtest_main because there's no pkg-config file
  498. ;; for it. Find it another way.
  499. (substitute* "tests/meson.build"
  500. (("pbbam_gtest_dep = dependency\\('gtest_main'.*")
  501. (format #f "cpp = meson.get_compiler('cpp')
  502. pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
  503. (assoc-ref inputs "googletest"))))
  504. #t)))
  505. ;; TODO: tests/pbbam_test cannot be linked
  506. ;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
  507. ;; undefined reference to symbol '_ZTIN7testing4TestE'
  508. ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
  509. ;; error adding symbols: DSO missing from command line
  510. #:tests? #f
  511. #:configure-flags '("-Dtests=false")))
  512. ;; These libraries are listed as "Required" in the pkg-config file.
  513. (propagated-inputs
  514. `(("htslib" ,htslib)
  515. ("zlib" ,zlib)))
  516. (inputs
  517. `(("boost" ,boost)
  518. ("samtools" ,samtools)))
  519. (native-inputs
  520. `(("googletest" ,googletest)
  521. ("pkg-config" ,pkg-config)
  522. ("python" ,python-wrapper))) ; for tests
  523. (home-page "https://github.com/PacificBiosciences/pbbam")
  524. (synopsis "Work with PacBio BAM files")
  525. (description
  526. "The pbbam software package provides components to create, query, and
  527. edit PacBio BAM files and associated indices. These components include a core
  528. C++ library, bindings for additional languages, and command-line utilities.
  529. This library is not intended to be used as a general-purpose BAM utility - all
  530. input and output BAMs must adhere to the PacBio BAM format specification.
  531. Non-PacBio BAMs will cause exceptions to be thrown.")
  532. (license license:bsd-3)))
  533. (define-public blasr-libcpp
  534. (package
  535. (name "blasr-libcpp")
  536. (version "5.3.3")
  537. (source (origin
  538. (method git-fetch)
  539. (uri (git-reference
  540. (url "https://github.com/PacificBiosciences/blasr_libcpp.git")
  541. (commit version)))
  542. (file-name (git-file-name name version))
  543. (sha256
  544. (base32
  545. "0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
  546. (build-system meson-build-system)
  547. (arguments
  548. `(#:phases
  549. (modify-phases %standard-phases
  550. (add-after 'unpack 'link-with-hdf5
  551. (lambda* (#:key inputs #:allow-other-keys)
  552. (let ((hdf5 (assoc-ref inputs "hdf5")))
  553. (substitute* "meson.build"
  554. (("libblasr_deps = \\[" m)
  555. (string-append
  556. m
  557. (format #f "cpp.find_library('hdf5', dirs : '~a'), \
  558. cpp.find_library('hdf5_cpp', dirs : '~a'), "
  559. hdf5 hdf5)))))
  560. #t))
  561. (add-after 'unpack 'find-googletest
  562. (lambda* (#:key inputs #:allow-other-keys)
  563. ;; It doesn't find gtest_main because there's no pkg-config file
  564. ;; for it. Find it another way.
  565. (substitute* "unittest/meson.build"
  566. (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
  567. (format #f "cpp = meson.get_compiler('cpp')
  568. libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
  569. (assoc-ref inputs "googletest"))))
  570. #t)))
  571. ;; TODO: unittest/libblasr_unittest cannot be linked
  572. ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
  573. ;; undefined reference to symbol
  574. ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
  575. ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
  576. ;; error adding symbols: DSO missing from command line
  577. #:tests? #f
  578. #:configure-flags '("-Dtests=false")))
  579. (inputs
  580. `(("boost" ,boost)
  581. ("hdf5" ,hdf5)
  582. ("pbbam" ,pbbam)
  583. ("zlib" ,zlib)))
  584. (native-inputs
  585. `(("googletest" ,googletest)
  586. ("pkg-config" ,pkg-config)))
  587. (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
  588. (synopsis "Library for analyzing PacBio genomic sequences")
  589. (description
  590. "This package provides three libraries used by applications for analyzing
  591. PacBio genomic sequences. This library contains three sub-libraries: pbdata,
  592. hdf and alignment.")
  593. (license license:bsd-3)))
  594. (define-public blasr
  595. (package
  596. (name "blasr")
  597. (version "5.3.3")
  598. (source (origin
  599. (method git-fetch)
  600. (uri (git-reference
  601. (url "https://github.com/PacificBiosciences/blasr.git")
  602. (commit version)))
  603. (file-name (git-file-name name version))
  604. (sha256
  605. (base32
  606. "1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
  607. (build-system meson-build-system)
  608. (arguments
  609. `(#:phases
  610. (modify-phases %standard-phases
  611. (add-after 'unpack 'link-with-hdf5
  612. (lambda* (#:key inputs #:allow-other-keys)
  613. (let ((hdf5 (assoc-ref inputs "hdf5")))
  614. (substitute* "meson.build"
  615. (("blasr_deps = \\[" m)
  616. (string-append
  617. m
  618. (format #f "cpp.find_library('hdf5', dirs : '~a'), \
  619. cpp.find_library('hdf5_cpp', dirs : '~a'), "
  620. hdf5 hdf5)))))
  621. #t)))
  622. ;; Tests require "cram" executable, which is not packaged.
  623. #:tests? #f
  624. #:configure-flags '("-Dtests=false")))
  625. (inputs
  626. `(("boost" ,boost)
  627. ("blasr-libcpp" ,blasr-libcpp)
  628. ("hdf5" ,hdf5)
  629. ("pbbam" ,pbbam)
  630. ("zlib" ,zlib)))
  631. (native-inputs
  632. `(("pkg-config" ,pkg-config)))
  633. (home-page "https://github.com/PacificBiosciences/blasr")
  634. (synopsis "PacBio long read aligner")
  635. (description
  636. "Blasr is a genomic sequence aligner for processing PacBio long reads.")
  637. (license license:bsd-3)))
  638. (define-public ribotaper
  639. (package
  640. (name "ribotaper")
  641. (version "1.3.1")
  642. (source (origin
  643. (method url-fetch)
  644. (uri (string-append "https://ohlerlab.mdc-berlin.de/"
  645. "files/RiboTaper/RiboTaper_Version_"
  646. version ".tar.gz"))
  647. (sha256
  648. (base32
  649. "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
  650. (build-system gnu-build-system)
  651. (arguments
  652. `(#:phases
  653. (modify-phases %standard-phases
  654. (add-after 'install 'wrap-executables
  655. (lambda* (#:key inputs outputs #:allow-other-keys)
  656. (let* ((out (assoc-ref outputs "out")))
  657. (for-each
  658. (lambda (script)
  659. (wrap-program (string-append out "/bin/" script)
  660. `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
  661. '("create_annotations_files.bash"
  662. "create_metaplots.bash"
  663. "Ribotaper_ORF_find.sh"
  664. "Ribotaper.sh")))
  665. #t)))))
  666. (inputs
  667. `(("bedtools" ,bedtools-2.18)
  668. ("samtools" ,samtools-0.1)
  669. ("r-minimal" ,r-minimal)
  670. ("r-foreach" ,r-foreach)
  671. ("r-xnomial" ,r-xnomial)
  672. ("r-domc" ,r-domc)
  673. ("r-multitaper" ,r-multitaper)
  674. ("r-seqinr" ,r-seqinr)))
  675. (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
  676. (synopsis "Define translated ORFs using ribosome profiling data")
  677. (description
  678. "Ribotaper is a method for defining translated @dfn{open reading
  679. frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
  680. provides the Ribotaper pipeline.")
  681. (license license:gpl3+)))
  682. (define-public ribodiff
  683. (package
  684. (name "ribodiff")
  685. (version "0.2.2")
  686. (source
  687. (origin
  688. (method git-fetch)
  689. (uri (git-reference
  690. (url "https://github.com/ratschlab/RiboDiff.git")
  691. (commit (string-append "v" version))))
  692. (file-name (git-file-name name version))
  693. (sha256
  694. (base32
  695. "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
  696. (build-system python-build-system)
  697. (arguments
  698. `(#:python ,python-2
  699. #:phases
  700. (modify-phases %standard-phases
  701. ;; Generate an installable executable script wrapper.
  702. (add-after 'unpack 'patch-setup.py
  703. (lambda _
  704. (substitute* "setup.py"
  705. (("^(.*)packages=.*" line prefix)
  706. (string-append line "\n"
  707. prefix "scripts=['scripts/TE.py'],\n")))
  708. #t)))))
  709. (inputs
  710. `(("python-numpy" ,python2-numpy)
  711. ("python-matplotlib" ,python2-matplotlib)
  712. ("python-scipy" ,python2-scipy)
  713. ("python-statsmodels" ,python2-statsmodels)))
  714. (native-inputs
  715. `(("python-mock" ,python2-mock)
  716. ("python-nose" ,python2-nose)))
  717. (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
  718. (synopsis "Detect translation efficiency changes from ribosome footprints")
  719. (description "RiboDiff is a statistical tool that detects the protein
  720. translational efficiency change from Ribo-Seq (ribosome footprinting) and
  721. RNA-Seq data. It uses a generalized linear model to detect genes showing
  722. difference in translational profile taking mRNA abundance into account. It
  723. facilitates us to decipher the translational regulation that behave
  724. independently with transcriptional regulation.")
  725. (license license:gpl3+)))
  726. (define-public bioawk
  727. (package
  728. (name "bioawk")
  729. (version "1.0")
  730. (source (origin
  731. (method git-fetch)
  732. (uri (git-reference
  733. (url "https://github.com/lh3/bioawk.git")
  734. (commit (string-append "v" version))))
  735. (file-name (git-file-name name version))
  736. (sha256
  737. (base32
  738. "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
  739. (build-system gnu-build-system)
  740. (inputs
  741. `(("zlib" ,zlib)))
  742. (native-inputs
  743. `(("bison" ,bison)))
  744. (arguments
  745. `(#:tests? #f ; There are no tests to run.
  746. ;; Bison must generate files, before other targets can build.
  747. #:parallel-build? #f
  748. #:phases
  749. (modify-phases %standard-phases
  750. (delete 'configure) ; There is no configure phase.
  751. (replace 'install
  752. (lambda* (#:key outputs #:allow-other-keys)
  753. (let* ((out (assoc-ref outputs "out"))
  754. (bin (string-append out "/bin"))
  755. (man (string-append out "/share/man/man1")))
  756. (mkdir-p man)
  757. (copy-file "awk.1" (string-append man "/bioawk.1"))
  758. (install-file "bioawk" bin))
  759. #t)))))
  760. (home-page "https://github.com/lh3/bioawk")
  761. (synopsis "AWK with bioinformatics extensions")
  762. (description "Bioawk is an extension to Brian Kernighan's awk, adding the
  763. support of several common biological data formats, including optionally gzip'ed
  764. BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
  765. also adds a few built-in functions and a command line option to use TAB as the
  766. input/output delimiter. When the new functionality is not used, bioawk is
  767. intended to behave exactly the same as the original BWK awk.")
  768. (license license:x11)))
  769. (define-public python-pybedtools
  770. (package
  771. (name "python-pybedtools")
  772. (version "0.8.1")
  773. (source (origin
  774. (method url-fetch)
  775. (uri (pypi-uri "pybedtools" version))
  776. (sha256
  777. (base32
  778. "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
  779. (build-system python-build-system)
  780. (arguments
  781. `(#:modules ((ice-9 ftw)
  782. (srfi srfi-1)
  783. (srfi srfi-26)
  784. (guix build utils)
  785. (guix build python-build-system))
  786. ;; See https://github.com/daler/pybedtools/issues/192
  787. #:phases
  788. (modify-phases %standard-phases
  789. ;; See https://github.com/daler/pybedtools/issues/261
  790. (add-after 'unpack 'disable-broken-tests
  791. (lambda _
  792. ;; This test (pybedtools.test.test_scripts.test_venn_mpl) needs a
  793. ;; graphical environment.
  794. (substitute* "pybedtools/test/test_scripts.py"
  795. (("def test_venn_mpl")
  796. "def _do_not_test_venn_mpl"))
  797. (substitute* "pybedtools/test/test_helpers.py"
  798. ;; Requires internet access.
  799. (("def test_chromsizes")
  800. "def _do_not_test_chromsizes")
  801. ;; Broken as a result of the workaround used in the check phase
  802. ;; (see: https://github.com/daler/pybedtools/issues/192).
  803. (("def test_getting_example_beds")
  804. "def _do_not_test_getting_example_beds"))
  805. ;; This issue still occurs on python2
  806. (substitute* "pybedtools/test/test_issues.py"
  807. (("def test_issue_303")
  808. "def _test_issue_303"))
  809. #t))
  810. ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
  811. ;; build system.
  812. ;; Force the Cythonization of C++ files to guard against compilation
  813. ;; problems.
  814. (add-after 'unpack 'remove-cython-generated-files
  815. (lambda _
  816. (let ((cython-sources (map (cut string-drop-right <> 4)
  817. (find-files "." "\\.pyx$")))
  818. (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
  819. (define (strip-extension filename)
  820. (string-take filename (string-index-right filename #\.)))
  821. (define (cythonized? c/c++-file)
  822. (member (strip-extension c/c++-file) cython-sources))
  823. (for-each delete-file (filter cythonized? c/c++-files))
  824. #t)))
  825. (add-after 'remove-cython-generated-files 'generate-cython-extensions
  826. (lambda _
  827. (invoke "python" "setup.py" "cythonize")))
  828. (replace 'check
  829. (lambda _
  830. (let* ((cwd (getcwd))
  831. (build-root-directory (string-append cwd "/build/"))
  832. (build (string-append
  833. build-root-directory
  834. (find (cut string-prefix? "lib" <>)
  835. (scandir (string-append
  836. build-root-directory)))))
  837. (scripts (string-append
  838. build-root-directory
  839. (find (cut string-prefix? "scripts" <>)
  840. (scandir build-root-directory)))))
  841. (setenv "PYTHONPATH"
  842. (string-append build ":" (getenv "PYTHONPATH")))
  843. ;; Executable scripts such as 'intron_exon_reads.py' must be
  844. ;; available in the PATH.
  845. (setenv "PATH"
  846. (string-append scripts ":" (getenv "PATH"))))
  847. ;; The tests need to be run from elsewhere...
  848. (mkdir-p "/tmp/test")
  849. (copy-recursively "pybedtools/test" "/tmp/test")
  850. (with-directory-excursion "/tmp/test"
  851. (invoke "pytest")))))))
  852. (propagated-inputs
  853. `(("bedtools" ,bedtools)
  854. ("samtools" ,samtools)
  855. ("python-matplotlib" ,python-matplotlib)
  856. ("python-pysam" ,python-pysam)
  857. ("python-pyyaml" ,python-pyyaml)))
  858. (native-inputs
  859. `(("python-numpy" ,python-numpy)
  860. ("python-pandas" ,python-pandas)
  861. ("python-cython" ,python-cython)
  862. ("kentutils" ,kentutils) ; for bedGraphToBigWig
  863. ("python-six" ,python-six)
  864. ;; For the test suite.
  865. ("python-pytest" ,python-pytest)
  866. ("python-psutil" ,python-psutil)))
  867. (home-page "https://pythonhosted.org/pybedtools/")
  868. (synopsis "Python wrapper for BEDtools programs")
  869. (description
  870. "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
  871. which are widely used for genomic interval manipulation or \"genome algebra\".
  872. pybedtools extends BEDTools by offering feature-level manipulations from with
  873. Python.")
  874. (license license:gpl2+)))
  875. (define-public python2-pybedtools
  876. (let ((pybedtools (package-with-python2 python-pybedtools)))
  877. (package
  878. (inherit pybedtools)
  879. (native-inputs
  880. `(("python2-pathlib" ,python2-pathlib)
  881. ,@(package-native-inputs pybedtools))))))
  882. (define-public python-biom-format
  883. (package
  884. (name "python-biom-format")
  885. (version "2.1.7")
  886. (source
  887. (origin
  888. (method git-fetch)
  889. ;; Use GitHub as source because PyPI distribution does not contain
  890. ;; test data: https://github.com/biocore/biom-format/issues/693
  891. (uri (git-reference
  892. (url "https://github.com/biocore/biom-format.git")
  893. (commit version)))
  894. (file-name (git-file-name name version))
  895. (sha256
  896. (base32
  897. "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
  898. (modules '((guix build utils)))
  899. (snippet '(begin
  900. ;; Delete generated C files.
  901. (for-each delete-file (find-files "." "\\.c"))
  902. #t))))
  903. (build-system python-build-system)
  904. (arguments
  905. `(#:phases
  906. (modify-phases %standard-phases
  907. (add-after 'unpack 'use-cython
  908. (lambda _ (setenv "USE_CYTHON" "1") #t))
  909. (add-after 'unpack 'disable-broken-tests
  910. (lambda _
  911. (substitute* "biom/tests/test_cli/test_validate_table.py"
  912. (("^(.+)def test_invalid_hdf5" m indent)
  913. (string-append indent
  914. "@npt.dec.skipif(True, msg='Guix')\n"
  915. m)))
  916. (substitute* "biom/tests/test_table.py"
  917. (("^(.+)def test_from_hdf5_issue_731" m indent)
  918. (string-append indent
  919. "@npt.dec.skipif(True, msg='Guix')\n"
  920. m)))
  921. #t))
  922. (add-before 'reset-gzip-timestamps 'make-files-writable
  923. (lambda* (#:key outputs #:allow-other-keys)
  924. (let ((out (assoc-ref outputs "out")))
  925. (for-each (lambda (file) (chmod file #o644))
  926. (find-files out "\\.gz"))
  927. #t))))))
  928. (propagated-inputs
  929. `(("python-numpy" ,python-numpy)
  930. ("python-scipy" ,python-scipy)
  931. ("python-flake8" ,python-flake8)
  932. ("python-future" ,python-future)
  933. ("python-click" ,python-click)
  934. ("python-h5py" ,python-h5py)
  935. ("python-pandas" ,python-pandas)))
  936. (native-inputs
  937. `(("python-cython" ,python-cython)
  938. ("python-pytest" ,python-pytest)
  939. ("python-pytest-cov" ,python-pytest-cov)
  940. ("python-nose" ,python-nose)))
  941. (home-page "http://www.biom-format.org")
  942. (synopsis "Biological Observation Matrix (BIOM) format utilities")
  943. (description
  944. "The BIOM file format is designed to be a general-use format for
  945. representing counts of observations e.g. operational taxonomic units, KEGG
  946. orthology groups or lipid types, in one or more biological samples
  947. e.g. microbiome samples, genomes, metagenomes.")
  948. (license license:bsd-3)
  949. (properties `((python2-variant . ,(delay python2-biom-format))))))
  950. (define-public python2-biom-format
  951. (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
  952. (package
  953. (inherit base)
  954. (arguments
  955. (substitute-keyword-arguments (package-arguments base)
  956. ((#:phases phases)
  957. `(modify-phases ,phases
  958. ;; Do not require the unmaintained pyqi library.
  959. (add-after 'unpack 'remove-pyqi
  960. (lambda _
  961. (substitute* "setup.py"
  962. (("install_requires.append\\(\"pyqi\"\\)") "pass"))
  963. #t)))))))))
  964. (define-public bioperl-minimal
  965. (let* ((inputs `(("perl-module-build" ,perl-module-build)
  966. ("perl-data-stag" ,perl-data-stag)
  967. ("perl-libwww" ,perl-libwww)
  968. ("perl-uri" ,perl-uri)))
  969. (transitive-inputs
  970. (map (compose package-name cadr)
  971. (delete-duplicates
  972. (concatenate
  973. (map (compose package-transitive-target-inputs cadr) inputs))))))
  974. (package
  975. (name "bioperl-minimal")
  976. (version "1.7.0")
  977. (source
  978. (origin
  979. (method git-fetch)
  980. (uri (git-reference
  981. (url "https://github.com/bioperl/bioperl-live")
  982. (commit (string-append "release-"
  983. (string-map (lambda (c)
  984. (if (char=? c #\.)
  985. #\- c)) version)))))
  986. (file-name (git-file-name name version))
  987. (sha256
  988. (base32
  989. "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
  990. (build-system perl-build-system)
  991. (arguments
  992. `(#:phases
  993. (modify-phases %standard-phases
  994. (add-after
  995. 'install 'wrap-programs
  996. (lambda* (#:key outputs #:allow-other-keys)
  997. ;; Make sure all executables in "bin" find the required Perl
  998. ;; modules at runtime. As the PERL5LIB variable contains also
  999. ;; the paths of native inputs, we pick the transitive target
  1000. ;; inputs from %build-inputs.
  1001. (let* ((out (assoc-ref outputs "out"))
  1002. (bin (string-append out "/bin/"))
  1003. (path (string-join
  1004. (cons (string-append out "/lib/perl5/site_perl")
  1005. (map (lambda (name)
  1006. (assoc-ref %build-inputs name))
  1007. ',transitive-inputs))
  1008. ":")))
  1009. (for-each (lambda (file)
  1010. (wrap-program file
  1011. `("PERL5LIB" ":" prefix (,path))))
  1012. (find-files bin "\\.pl$"))
  1013. #t))))))
  1014. (inputs inputs)
  1015. (native-inputs
  1016. `(("perl-test-most" ,perl-test-most)))
  1017. (home-page "https://metacpan.org/release/BioPerl")
  1018. (synopsis "Bioinformatics toolkit")
  1019. (description
  1020. "BioPerl is the product of a community effort to produce Perl code which
  1021. is useful in biology. Examples include Sequence objects, Alignment objects
  1022. and database searching objects. These objects not only do what they are
  1023. advertised to do in the documentation, but they also interact - Alignment
  1024. objects are made from the Sequence objects, Sequence objects have access to
  1025. Annotation and SeqFeature objects and databases, Blast objects can be
  1026. converted to Alignment objects, and so on. This means that the objects
  1027. provide a coordinated and extensible framework to do computational biology.")
  1028. (license license:perl-license))))
  1029. (define-public python-biopython
  1030. (package
  1031. (name "python-biopython")
  1032. (version "1.70")
  1033. (source (origin
  1034. (method url-fetch)
  1035. ;; use PyPi rather than biopython.org to ease updating
  1036. (uri (pypi-uri "biopython" version))
  1037. (sha256
  1038. (base32
  1039. "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
  1040. (build-system python-build-system)
  1041. (arguments
  1042. `(#:phases
  1043. (modify-phases %standard-phases
  1044. (add-before 'check 'set-home
  1045. ;; Some tests require a home directory to be set.
  1046. (lambda _ (setenv "HOME" "/tmp") #t)))))
  1047. (propagated-inputs
  1048. `(("python-numpy" ,python-numpy)))
  1049. (home-page "https://biopython.org/")
  1050. (synopsis "Tools for biological computation in Python")
  1051. (description
  1052. "Biopython is a set of tools for biological computation including parsers
  1053. for bioinformatics files into Python data structures; interfaces to common
  1054. bioinformatics programs; a standard sequence class and tools for performing
  1055. common operations on them; code to perform data classification; code for
  1056. dealing with alignments; code making it easy to split up parallelizable tasks
  1057. into separate processes; and more.")
  1058. (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
  1059. (define-public python2-biopython
  1060. (package-with-python2 python-biopython))
  1061. (define-public python-fastalite
  1062. (package
  1063. (name "python-fastalite")
  1064. (version "0.3")
  1065. (source
  1066. (origin
  1067. (method url-fetch)
  1068. (uri (pypi-uri "fastalite" version))
  1069. (sha256
  1070. (base32
  1071. "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
  1072. (build-system python-build-system)
  1073. (arguments
  1074. `(#:tests? #f)) ; Test data is not distributed.
  1075. (home-page "https://github.com/nhoffman/fastalite")
  1076. (synopsis "Simplest possible FASTA parser")
  1077. (description "This library implements a FASTA and a FASTQ parser without
  1078. relying on a complex dependency tree.")
  1079. (license license:expat)))
  1080. (define-public python2-fastalite
  1081. (package-with-python2 python-fastalite))
  1082. (define-public bpp-core
  1083. ;; The last release was in 2014 and the recommended way to install from source
  1084. ;; is to clone the git repository, so we do this.
  1085. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1086. (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
  1087. (package
  1088. (name "bpp-core")
  1089. (version (string-append "2.2.0-1." (string-take commit 7)))
  1090. (source (origin
  1091. (method git-fetch)
  1092. (uri (git-reference
  1093. (url "http://biopp.univ-montp2.fr/git/bpp-core")
  1094. (commit commit)))
  1095. (file-name (string-append name "-" version "-checkout"))
  1096. (sha256
  1097. (base32
  1098. "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
  1099. (build-system cmake-build-system)
  1100. (arguments
  1101. `(#:parallel-build? #f))
  1102. (home-page "http://biopp.univ-montp2.fr")
  1103. (synopsis "C++ libraries for Bioinformatics")
  1104. (description
  1105. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1106. analysis, phylogenetics, molecular evolution and population genetics. It is
  1107. Object Oriented and is designed to be both easy to use and computer efficient.
  1108. Bio++ intends to help programmers to write computer expensive programs, by
  1109. providing them a set of re-usable tools.")
  1110. (license license:cecill-c))))
  1111. (define-public bpp-phyl
  1112. ;; The last release was in 2014 and the recommended way to install from source
  1113. ;; is to clone the git repository, so we do this.
  1114. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1115. (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
  1116. (package
  1117. (name "bpp-phyl")
  1118. (version (string-append "2.2.0-1." (string-take commit 7)))
  1119. (source (origin
  1120. (method git-fetch)
  1121. (uri (git-reference
  1122. (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
  1123. (commit commit)))
  1124. (file-name (string-append name "-" version "-checkout"))
  1125. (sha256
  1126. (base32
  1127. "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
  1128. (build-system cmake-build-system)
  1129. (arguments
  1130. `(#:parallel-build? #f
  1131. ;; If out-of-source, test data is not copied into the build directory
  1132. ;; so the tests fail.
  1133. #:out-of-source? #f))
  1134. (inputs
  1135. `(("bpp-core" ,bpp-core)
  1136. ("bpp-seq" ,bpp-seq)))
  1137. (home-page "http://biopp.univ-montp2.fr")
  1138. (synopsis "Bio++ phylogenetic Library")
  1139. (description
  1140. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1141. analysis, phylogenetics, molecular evolution and population genetics. This
  1142. library provides phylogenetics-related modules.")
  1143. (license license:cecill-c))))
  1144. (define-public bpp-popgen
  1145. ;; The last release was in 2014 and the recommended way to install from source
  1146. ;; is to clone the git repository, so we do this.
  1147. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1148. (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
  1149. (package
  1150. (name "bpp-popgen")
  1151. (version (string-append "2.2.0-1." (string-take commit 7)))
  1152. (source (origin
  1153. (method git-fetch)
  1154. (uri (git-reference
  1155. (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
  1156. (commit commit)))
  1157. (file-name (string-append name "-" version "-checkout"))
  1158. (sha256
  1159. (base32
  1160. "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
  1161. (build-system cmake-build-system)
  1162. (arguments
  1163. `(#:parallel-build? #f
  1164. #:tests? #f)) ; There are no tests.
  1165. (inputs
  1166. `(("bpp-core" ,bpp-core)
  1167. ("bpp-seq" ,bpp-seq)))
  1168. (home-page "http://biopp.univ-montp2.fr")
  1169. (synopsis "Bio++ population genetics library")
  1170. (description
  1171. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1172. analysis, phylogenetics, molecular evolution and population genetics. This
  1173. library provides population genetics-related modules.")
  1174. (license license:cecill-c))))
  1175. (define-public bpp-seq
  1176. ;; The last release was in 2014 and the recommended way to install from source
  1177. ;; is to clone the git repository, so we do this.
  1178. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1179. (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
  1180. (package
  1181. (name "bpp-seq")
  1182. (version (string-append "2.2.0-1." (string-take commit 7)))
  1183. (source (origin
  1184. (method git-fetch)
  1185. (uri (git-reference
  1186. (url "http://biopp.univ-montp2.fr/git/bpp-seq")
  1187. (commit commit)))
  1188. (file-name (string-append name "-" version "-checkout"))
  1189. (sha256
  1190. (base32
  1191. "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
  1192. (build-system cmake-build-system)
  1193. (arguments
  1194. `(#:parallel-build? #f
  1195. ;; If out-of-source, test data is not copied into the build directory
  1196. ;; so the tests fail.
  1197. #:out-of-source? #f))
  1198. (inputs
  1199. `(("bpp-core" ,bpp-core)))
  1200. (home-page "http://biopp.univ-montp2.fr")
  1201. (synopsis "Bio++ sequence library")
  1202. (description
  1203. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1204. analysis, phylogenetics, molecular evolution and population genetics. This
  1205. library provides sequence-related modules.")
  1206. (license license:cecill-c))))
  1207. (define-public bppsuite
  1208. ;; The last release was in 2014 and the recommended way to install from source
  1209. ;; is to clone the git repository, so we do this.
  1210. ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
  1211. (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
  1212. (package
  1213. (name "bppsuite")
  1214. (version (string-append "2.2.0-1." (string-take commit 7)))
  1215. (source (origin
  1216. (method git-fetch)
  1217. (uri (git-reference
  1218. (url "http://biopp.univ-montp2.fr/git/bppsuite")
  1219. (commit commit)))
  1220. (file-name (string-append name "-" version "-checkout"))
  1221. (sha256
  1222. (base32
  1223. "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
  1224. (build-system cmake-build-system)
  1225. (arguments
  1226. `(#:parallel-build? #f
  1227. #:tests? #f)) ; There are no tests.
  1228. (native-inputs
  1229. `(("groff" ,groff)
  1230. ("man-db" ,man-db)
  1231. ("texinfo" ,texinfo)))
  1232. (inputs
  1233. `(("bpp-core" ,bpp-core)
  1234. ("bpp-seq" ,bpp-seq)
  1235. ("bpp-phyl" ,bpp-phyl)
  1236. ("bpp-phyl" ,bpp-popgen)))
  1237. (home-page "http://biopp.univ-montp2.fr")
  1238. (synopsis "Bioinformatics tools written with the Bio++ libraries")
  1239. (description
  1240. "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
  1241. analysis, phylogenetics, molecular evolution and population genetics. This
  1242. package provides command line tools using the Bio++ library.")
  1243. (license license:cecill-c))))
  1244. (define-public blast+
  1245. (package
  1246. (name "blast+")
  1247. (version "2.7.1")
  1248. (source (origin
  1249. (method url-fetch)
  1250. (uri (string-append
  1251. "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
  1252. version "/ncbi-blast-" version "+-src.tar.gz"))
  1253. (sha256
  1254. (base32
  1255. "1jlq0afxxgczpp35k6mxh8mn4jzq7vqcnaixk166sfj10wq8v9qh"))
  1256. (modules '((guix build utils)))
  1257. (snippet
  1258. '(begin
  1259. ;; Remove bundled bzip2, zlib and pcre.
  1260. (delete-file-recursively "c++/src/util/compress/bzip2")
  1261. (delete-file-recursively "c++/src/util/compress/zlib")
  1262. (delete-file-recursively "c++/src/util/regexp")
  1263. (substitute* "c++/src/util/compress/Makefile.in"
  1264. (("bzip2 zlib api") "api"))
  1265. ;; Remove useless msbuild directory
  1266. (delete-file-recursively
  1267. "c++/src/build-system/project_tree_builder/msbuild")
  1268. #t))))
  1269. (build-system gnu-build-system)
  1270. (arguments
  1271. `(;; There are two(!) tests for this massive library, and both fail with
  1272. ;; "unparsable timing stats".
  1273. ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
  1274. ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
  1275. #:tests? #f
  1276. #:out-of-source? #t
  1277. #:parallel-build? #f ; not supported
  1278. #:phases
  1279. (modify-phases %standard-phases
  1280. (add-before 'configure 'set-HOME
  1281. ;; $HOME needs to be set at some point during the configure phase
  1282. (lambda _ (setenv "HOME" "/tmp") #t))
  1283. (add-after 'unpack 'enter-dir
  1284. (lambda _ (chdir "c++") #t))
  1285. (add-after 'enter-dir 'fix-build-system
  1286. (lambda _
  1287. (define (which* cmd)
  1288. (cond ((string=? cmd "date")
  1289. ;; make call to "date" deterministic
  1290. "date -d @0")
  1291. ((which cmd)
  1292. => identity)
  1293. (else
  1294. (format (current-error-port)
  1295. "WARNING: Unable to find absolute path for ~s~%"
  1296. cmd)
  1297. #f)))
  1298. ;; Rewrite hardcoded paths to various tools
  1299. (substitute* (append '("src/build-system/configure.ac"
  1300. "src/build-system/configure"
  1301. "src/build-system/helpers/run_with_lock.c"
  1302. "scripts/common/impl/if_diff.sh"
  1303. "scripts/common/impl/run_with_lock.sh"
  1304. "src/build-system/Makefile.configurables.real"
  1305. "src/build-system/Makefile.in.top"
  1306. "src/build-system/Makefile.meta.gmake=no"
  1307. "src/build-system/Makefile.meta.in"
  1308. "src/build-system/Makefile.meta_l"
  1309. "src/build-system/Makefile.meta_p"
  1310. "src/build-system/Makefile.meta_r"
  1311. "src/build-system/Makefile.mk.in"
  1312. "src/build-system/Makefile.requirements"
  1313. "src/build-system/Makefile.rules_with_autodep.in")
  1314. (find-files "scripts/common/check" "\\.sh$"))
  1315. (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
  1316. (or (which* cmd) all)))
  1317. (substitute* (find-files "src/build-system" "^config.*")
  1318. (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
  1319. (("^PATH=.*") ""))
  1320. ;; rewrite "/var/tmp" in check script
  1321. (substitute* "scripts/common/check/check_make_unix.sh"
  1322. (("/var/tmp") "/tmp"))
  1323. ;; do not reset PATH
  1324. (substitute* (find-files "scripts/common/impl/" "\\.sh$")
  1325. (("^ *PATH=.*") "")
  1326. (("action=/bin/") "action=")
  1327. (("export PATH") ":"))
  1328. #t))
  1329. (replace 'configure
  1330. (lambda* (#:key inputs outputs #:allow-other-keys)
  1331. (let ((out (assoc-ref outputs "out"))
  1332. (lib (string-append (assoc-ref outputs "lib") "/lib"))
  1333. (include (string-append (assoc-ref outputs "include")
  1334. "/include/ncbi-tools++")))
  1335. ;; The 'configure' script doesn't recognize things like
  1336. ;; '--enable-fast-install'.
  1337. (invoke "./configure.orig"
  1338. (string-append "--with-build-root=" (getcwd) "/build")
  1339. (string-append "--prefix=" out)
  1340. (string-append "--libdir=" lib)
  1341. (string-append "--includedir=" include)
  1342. (string-append "--with-bz2="
  1343. (assoc-ref inputs "bzip2"))
  1344. (string-append "--with-z="
  1345. (assoc-ref inputs "zlib"))
  1346. (string-append "--with-pcre="
  1347. (assoc-ref inputs "pcre"))
  1348. ;; Each library is built twice by default, once
  1349. ;; with "-static" in its name, and again
  1350. ;; without.
  1351. "--without-static"
  1352. "--with-dll")
  1353. #t))))))
  1354. (outputs '("out" ; 21 MB
  1355. "lib" ; 226 MB
  1356. "include")) ; 33 MB
  1357. (inputs
  1358. `(("bzip2" ,bzip2)
  1359. ("lmdb" ,lmdb)
  1360. ("zlib" ,zlib)
  1361. ("pcre" ,pcre)
  1362. ("perl" ,perl)
  1363. ("python" ,python-wrapper)))
  1364. (native-inputs
  1365. `(("cpio" ,cpio)))
  1366. (home-page "http://blast.ncbi.nlm.nih.gov")
  1367. (synopsis "Basic local alignment search tool")
  1368. (description
  1369. "BLAST is a popular method of performing a DNA or protein sequence
  1370. similarity search, using heuristics to produce results quickly. It also
  1371. calculates an “expect value” that estimates how many matches would have
  1372. occurred at a given score by chance, which can aid a user in judging how much
  1373. confidence to have in an alignment.")
  1374. ;; Most of the sources are in the public domain, with the following
  1375. ;; exceptions:
  1376. ;; * Expat:
  1377. ;; * ./c++/include/util/bitset/
  1378. ;; * ./c++/src/html/ncbi_menu*.js
  1379. ;; * Boost license:
  1380. ;; * ./c++/include/util/impl/floating_point_comparison.hpp
  1381. ;; * LGPL 2+:
  1382. ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
  1383. ;; * ASL 2.0:
  1384. ;; * ./c++/src/corelib/teamcity_*
  1385. (license (list license:public-domain
  1386. license:expat
  1387. license:boost1.0
  1388. license:lgpl2.0+
  1389. license:asl2.0))))
  1390. (define-public bless
  1391. (package
  1392. (name "bless")
  1393. (version "1p02")
  1394. (source (origin
  1395. (method url-fetch)
  1396. (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
  1397. version ".tgz"))
  1398. (sha256
  1399. (base32
  1400. "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
  1401. (modules '((guix build utils)))
  1402. (snippet
  1403. `(begin
  1404. ;; Remove bundled boost, pigz, zlib, and .git directory
  1405. ;; FIXME: also remove bundled sources for murmurhash3 and
  1406. ;; kmc once packaged.
  1407. (delete-file-recursively "boost")
  1408. (delete-file-recursively "pigz")
  1409. (delete-file-recursively "google-sparsehash")
  1410. (delete-file-recursively "zlib")
  1411. (delete-file-recursively ".git")
  1412. #t))))
  1413. (build-system gnu-build-system)
  1414. (arguments
  1415. '(#:tests? #f ;no "check" target
  1416. #:make-flags
  1417. (list (string-append "ZLIB="
  1418. (assoc-ref %build-inputs "zlib:static")
  1419. "/lib/libz.a")
  1420. (string-append "LDFLAGS="
  1421. (string-join '("-lboost_filesystem"
  1422. "-lboost_system"
  1423. "-lboost_iostreams"
  1424. "-lz"
  1425. "-fopenmp"))))
  1426. #:phases
  1427. (modify-phases %standard-phases
  1428. (add-after 'unpack 'do-not-build-bundled-pigz
  1429. (lambda* (#:key inputs outputs #:allow-other-keys)
  1430. (substitute* "Makefile"
  1431. (("cd pigz/pigz-2.3.3; make") ""))
  1432. #t))
  1433. (add-after 'unpack 'patch-paths-to-executables
  1434. (lambda* (#:key inputs outputs #:allow-other-keys)
  1435. (substitute* "parse_args.cpp"
  1436. (("kmc_binary = .*")
  1437. (string-append "kmc_binary = \""
  1438. (assoc-ref outputs "out")
  1439. "/bin/kmc\";"))
  1440. (("pigz_binary = .*")
  1441. (string-append "pigz_binary = \""
  1442. (assoc-ref inputs "pigz")
  1443. "/bin/pigz\";")))
  1444. #t))
  1445. (replace 'install
  1446. (lambda* (#:key outputs #:allow-other-keys)
  1447. (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
  1448. (for-each (lambda (file)
  1449. (install-file file bin))
  1450. '("bless" "kmc/bin/kmc"))
  1451. #t)))
  1452. (delete 'configure))))
  1453. (native-inputs
  1454. `(("perl" ,perl)))
  1455. (inputs
  1456. `(("openmpi" ,openmpi)
  1457. ("boost" ,boost)
  1458. ("sparsehash" ,sparsehash)
  1459. ("pigz" ,pigz)
  1460. ("zlib:static" ,zlib "static")
  1461. ("zlib" ,zlib)))
  1462. (supported-systems '("x86_64-linux"))
  1463. (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
  1464. (synopsis "Bloom-filter-based error correction tool for NGS reads")
  1465. (description
  1466. "@dfn{Bloom-filter-based error correction solution for high-throughput
  1467. sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
  1468. correction tool for genomic reads produced by @dfn{Next-generation
  1469. sequencing} (NGS). BLESS produces accurate correction results with much less
  1470. memory compared with previous solutions and is also able to tolerate a higher
  1471. false-positive rate. BLESS can extend reads like DNA assemblers to correct
  1472. errors at the end of reads.")
  1473. (license license:gpl3+)))
  1474. (define-public bowtie
  1475. (package
  1476. (name "bowtie")
  1477. (version "2.3.4.3")
  1478. (source (origin
  1479. (method git-fetch)
  1480. (uri (git-reference
  1481. (url "https://github.com/BenLangmead/bowtie2.git")
  1482. (commit (string-append "v" version))))
  1483. (file-name (git-file-name name version))
  1484. (sha256
  1485. (base32
  1486. "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
  1487. (modules '((guix build utils)))
  1488. (snippet
  1489. '(begin
  1490. (substitute* "Makefile"
  1491. ;; replace BUILD_HOST and BUILD_TIME for deterministic build
  1492. (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
  1493. (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
  1494. #t))))
  1495. (build-system gnu-build-system)
  1496. (arguments
  1497. '(#:make-flags
  1498. (list "allall"
  1499. "WITH_TBB=1"
  1500. (string-append "prefix=" (assoc-ref %outputs "out")))
  1501. #:phases
  1502. (modify-phases %standard-phases
  1503. (delete 'configure)
  1504. (replace 'check
  1505. (lambda _
  1506. (invoke "perl"
  1507. "scripts/test/simple_tests.pl"
  1508. "--bowtie2=./bowtie2"
  1509. "--bowtie2-build=./bowtie2-build")
  1510. #t)))))
  1511. (inputs
  1512. `(("tbb" ,tbb)
  1513. ("zlib" ,zlib)
  1514. ("python" ,python-wrapper)))
  1515. (native-inputs
  1516. `(("perl" ,perl)
  1517. ("perl-clone" ,perl-clone)
  1518. ("perl-test-deep" ,perl-test-deep)
  1519. ("perl-test-simple" ,perl-test-simple)))
  1520. (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
  1521. (synopsis "Fast and sensitive nucleotide sequence read aligner")
  1522. (description
  1523. "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
  1524. reads to long reference sequences. It is particularly good at aligning reads
  1525. of about 50 up to 100s or 1,000s of characters, and particularly good at
  1526. aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
  1527. genome with an FM Index to keep its memory footprint small: for the human
  1528. genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
  1529. gapped, local, and paired-end alignment modes.")
  1530. (supported-systems '("x86_64-linux"))
  1531. (license license:gpl3+)))
  1532. (define-public bowtie1
  1533. (package
  1534. (name "bowtie1")
  1535. (version "1.2.3")
  1536. (source (origin
  1537. (method url-fetch)
  1538. (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
  1539. version "/bowtie-src-x86_64.zip"))
  1540. (sha256
  1541. (base32
  1542. "0vmiqdhc9dzyfy9sh6vgi7k9xy2hiw8g87vbamnc6cgpm179zsa4"))
  1543. (modules '((guix build utils)))
  1544. (snippet
  1545. '(substitute* "Makefile"
  1546. ;; replace BUILD_HOST and BUILD_TIME for deterministic build
  1547. (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
  1548. (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
  1549. (build-system gnu-build-system)
  1550. (arguments
  1551. '(#:tests? #f ; no "check" target
  1552. #:make-flags
  1553. (list "all"
  1554. (string-append "prefix=" (assoc-ref %outputs "out")))
  1555. #:phases
  1556. (modify-phases %standard-phases
  1557. (delete 'configure))))
  1558. (inputs
  1559. `(("tbb" ,tbb)
  1560. ("zlib" ,zlib)))
  1561. (supported-systems '("x86_64-linux"))
  1562. (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
  1563. (synopsis "Fast aligner for short nucleotide sequence reads")
  1564. (description
  1565. "Bowtie is a fast, memory-efficient short read aligner. It aligns short
  1566. DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
  1567. reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
  1568. keep its memory footprint small: typically about 2.2 GB for the human
  1569. genome (2.9 GB for paired-end).")
  1570. (license license:artistic2.0)))
  1571. (define-public tophat
  1572. (package
  1573. (name "tophat")
  1574. (version "2.1.1")
  1575. (source (origin
  1576. (method url-fetch)
  1577. (uri (string-append
  1578. "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
  1579. version ".tar.gz"))
  1580. (sha256
  1581. (base32
  1582. "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
  1583. (modules '((guix build utils)))
  1584. (snippet
  1585. '(begin
  1586. ;; Remove bundled SeqAn and samtools
  1587. (delete-file-recursively "src/SeqAn-1.4.2")
  1588. (delete-file-recursively "src/samtools-0.1.18")
  1589. #t))))
  1590. (build-system gnu-build-system)
  1591. (arguments
  1592. '(#:parallel-build? #f ; not supported
  1593. #:phases
  1594. (modify-phases %standard-phases
  1595. (add-after 'unpack 'use-system-samtools
  1596. (lambda* (#:key inputs #:allow-other-keys)
  1597. (substitute* "src/Makefile.in"
  1598. (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
  1599. (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
  1600. (("SAMPROG = samtools_0\\.1\\.18") "")
  1601. (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
  1602. (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
  1603. (substitute* '("src/common.cpp"
  1604. "src/tophat.py")
  1605. (("samtools_0.1.18") (which "samtools")))
  1606. (substitute* '("src/common.h"
  1607. "src/bam2fastx.cpp")
  1608. (("#include \"bam.h\"") "#include <samtools/bam.h>")
  1609. (("#include \"sam.h\"") "#include <samtools/sam.h>"))
  1610. (substitute* '("src/bwt_map.h"
  1611. "src/map2gtf.h"
  1612. "src/align_status.h")
  1613. (("#include <bam.h>") "#include <samtools/bam.h>")
  1614. (("#include <sam.h>") "#include <samtools/sam.h>"))
  1615. #t)))))
  1616. (native-inputs
  1617. `(("gcc" ,gcc-5))) ;; doesn't build with later versions
  1618. (inputs
  1619. `(("boost" ,boost)
  1620. ("bowtie" ,bowtie)
  1621. ("ncurses" ,ncurses)
  1622. ("perl" ,perl)
  1623. ("python" ,python-2)
  1624. ("samtools" ,samtools-0.1)
  1625. ("seqan" ,seqan-1)
  1626. ("zlib" ,zlib)))
  1627. (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
  1628. (synopsis "Spliced read mapper for RNA-Seq data")
  1629. (description
  1630. "TopHat is a fast splice junction mapper for nucleotide sequence
  1631. reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
  1632. mammalian-sized genomes using the ultra high-throughput short read
  1633. aligner Bowtie, and then analyzes the mapping results to identify
  1634. splice junctions between exons.")
  1635. ;; TopHat is released under the Boost Software License, Version 1.0
  1636. ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
  1637. (license license:boost1.0)))
  1638. (define-public bwa
  1639. (package
  1640. (name "bwa")
  1641. (version "0.7.17")
  1642. (source (origin
  1643. (method url-fetch)
  1644. (uri (string-append
  1645. "https://github.com/lh3/bwa/releases/download/v"
  1646. version "/bwa-" version ".tar.bz2"))
  1647. (sha256
  1648. (base32
  1649. "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
  1650. (build-system gnu-build-system)
  1651. (arguments
  1652. '(#:tests? #f ;no "check" target
  1653. #:phases
  1654. (modify-phases %standard-phases
  1655. (replace 'install
  1656. (lambda* (#:key outputs #:allow-other-keys)
  1657. (let* ((out (assoc-ref outputs "out"))
  1658. (bin (string-append out "/bin"))
  1659. (lib (string-append out "/lib"))
  1660. (doc (string-append out "/share/doc/bwa"))
  1661. (man (string-append out "/share/man/man1")))
  1662. (install-file "bwa" bin)
  1663. (install-file "libbwa.a" lib)
  1664. (install-file "README.md" doc)
  1665. (install-file "bwa.1" man))
  1666. #t))
  1667. ;; no "configure" script
  1668. (delete 'configure))))
  1669. (inputs `(("zlib" ,zlib)))
  1670. ;; Non-portable SSE instructions are used so building fails on platforms
  1671. ;; other than x86_64.
  1672. (supported-systems '("x86_64-linux"))
  1673. (home-page "http://bio-bwa.sourceforge.net/")
  1674. (synopsis "Burrows-Wheeler sequence aligner")
  1675. (description
  1676. "BWA is a software package for mapping low-divergent sequences against a
  1677. large reference genome, such as the human genome. It consists of three
  1678. algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
  1679. designed for Illumina sequence reads up to 100bp, while the rest two for
  1680. longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
  1681. features such as long-read support and split alignment, but BWA-MEM, which is
  1682. the latest, is generally recommended for high-quality queries as it is faster
  1683. and more accurate. BWA-MEM also has better performance than BWA-backtrack for
  1684. 70-100bp Illumina reads.")
  1685. (license license:gpl3+)))
  1686. (define-public bwa-pssm
  1687. (package (inherit bwa)
  1688. (name "bwa-pssm")
  1689. (version "0.5.11")
  1690. (source (origin
  1691. (method git-fetch)
  1692. (uri (git-reference
  1693. (url "https://github.com/pkerpedjiev/bwa-pssm.git")
  1694. (commit version)))
  1695. (file-name (git-file-name name version))
  1696. (sha256
  1697. (base32
  1698. "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
  1699. (build-system gnu-build-system)
  1700. (inputs
  1701. `(("gdsl" ,gdsl)
  1702. ("zlib" ,zlib)
  1703. ("perl" ,perl)))
  1704. (home-page "http://bwa-pssm.binf.ku.dk/")
  1705. (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
  1706. (description
  1707. "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
  1708. the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
  1709. existing aligners it is fast and sensitive. Unlike most other aligners,
  1710. however, it is also adaptible in the sense that one can direct the alignment
  1711. based on known biases within the data set. It is coded as a modification of
  1712. the original BWA alignment program and shares the genome index structure as
  1713. well as many of the command line options.")
  1714. (license license:gpl3+)))
  1715. (define-public bwa-meth
  1716. (package
  1717. (name "bwa-meth")
  1718. (version "0.2.2")
  1719. (source (origin
  1720. (method git-fetch)
  1721. (uri (git-reference
  1722. (url "https://github.com/brentp/bwa-meth.git")
  1723. (commit (string-append "v" version))))
  1724. (file-name (git-file-name name version))
  1725. (sha256
  1726. (base32
  1727. "17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
  1728. (build-system python-build-system)
  1729. (arguments
  1730. `(#:phases
  1731. (modify-phases %standard-phases
  1732. (add-after 'unpack 'keep-references-to-bwa
  1733. (lambda* (#:key inputs #:allow-other-keys)
  1734. (substitute* "bwameth.py"
  1735. (("bwa (mem|index)" _ command)
  1736. (string-append (which "bwa") " " command))
  1737. ;; There's an ill-advised check for "samtools" on PATH.
  1738. (("^checkX.*") ""))
  1739. #t)))))
  1740. (inputs
  1741. `(("bwa" ,bwa)))
  1742. (native-inputs
  1743. `(("python-toolshed" ,python-toolshed)))
  1744. (home-page "https://github.com/brentp/bwa-meth")
  1745. (synopsis "Fast and accurante alignment of BS-Seq reads")
  1746. (description
  1747. "BWA-Meth works for single-end reads and for paired-end reads from the
  1748. directional protocol (most common). It uses the method employed by
  1749. methylcoder and Bismark of in silico conversion of all C's to T's in both
  1750. reference and reads. It recovers the original read (needed to tabulate
  1751. methylation) by attaching it as a comment which BWA appends as a tag to the
  1752. read. It performs favorably to existing aligners gauged by number of on and
  1753. off-target reads for a capture method that targets CpG-rich region.")
  1754. (license license:expat)))
  1755. (define-public python-bx-python
  1756. (package
  1757. (name "python-bx-python")
  1758. (version "0.8.2")
  1759. (source (origin
  1760. (method url-fetch)
  1761. (uri (pypi-uri "bx-python" version))
  1762. (sha256
  1763. (base32
  1764. "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
  1765. (build-system python-build-system)
  1766. ;; Tests fail because test data are not included
  1767. (arguments '(#:tests? #f))
  1768. (propagated-inputs
  1769. `(("python-numpy" ,python-numpy)
  1770. ("python-six" ,python-six)))
  1771. (inputs
  1772. `(("zlib" ,zlib)))
  1773. (native-inputs
  1774. `(("python-lzo" ,python-lzo)
  1775. ("python-nose" ,python-nose)
  1776. ("python-cython" ,python-cython)))
  1777. (home-page "https://github.com/bxlab/bx-python")
  1778. (synopsis "Tools for manipulating biological data")
  1779. (description
  1780. "bx-python provides tools for manipulating biological data, particularly
  1781. multiple sequence alignments.")
  1782. (license license:expat)))
  1783. (define-public python2-bx-python
  1784. (package-with-python2 python-bx-python))
  1785. (define-public python-pysam
  1786. (package
  1787. (name "python-pysam")
  1788. (version "0.15.1")
  1789. (source (origin
  1790. (method git-fetch)
  1791. ;; Test data is missing on PyPi.
  1792. (uri (git-reference
  1793. (url "https://github.com/pysam-developers/pysam.git")
  1794. (commit (string-append "v" version))))
  1795. (file-name (git-file-name name version))
  1796. (sha256
  1797. (base32
  1798. "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
  1799. (modules '((guix build utils)))
  1800. (snippet '(begin
  1801. ;; Drop bundled htslib. TODO: Also remove samtools
  1802. ;; and bcftools.
  1803. (delete-file-recursively "htslib")
  1804. #t))))
  1805. (build-system python-build-system)
  1806. (arguments
  1807. `(#:modules ((ice-9 ftw)
  1808. (srfi srfi-26)
  1809. (guix build python-build-system)
  1810. (guix build utils))
  1811. #:phases
  1812. (modify-phases %standard-phases
  1813. (add-before 'build 'set-flags
  1814. (lambda* (#:key inputs #:allow-other-keys)
  1815. (setenv "HTSLIB_MODE" "external")
  1816. (setenv "HTSLIB_LIBRARY_DIR"
  1817. (string-append (assoc-ref inputs "htslib") "/lib"))
  1818. (setenv "HTSLIB_INCLUDE_DIR"
  1819. (string-append (assoc-ref inputs "htslib") "/include"))
  1820. (setenv "LDFLAGS" "-lncurses")
  1821. (setenv "CFLAGS" "-D_CURSES_LIB=1")
  1822. #t))
  1823. (replace 'check
  1824. (lambda* (#:key inputs outputs #:allow-other-keys)
  1825. ;; This file contains tests that require a connection to the
  1826. ;; internet.
  1827. (delete-file "tests/tabix_test.py")
  1828. ;; FIXME: This test fails
  1829. (delete-file "tests/AlignmentFile_test.py")
  1830. ;; Add first subdirectory of "build" directory to PYTHONPATH.
  1831. (setenv "PYTHONPATH"
  1832. (string-append
  1833. (getenv "PYTHONPATH")
  1834. ":" (getcwd) "/build/"
  1835. (car (scandir "build"
  1836. (negate (cut string-prefix? "." <>))))))
  1837. ;; Step out of source dir so python does not import from CWD.
  1838. (with-directory-excursion "tests"
  1839. (setenv "HOME" "/tmp")
  1840. (invoke "make" "-C" "pysam_data")
  1841. (invoke "make" "-C" "cbcf_data")
  1842. ;; Running nosetests without explicitly asking for a single
  1843. ;; process leads to a crash. Running with multiple processes
  1844. ;; fails because the tests are not designed to run in parallel.
  1845. ;; FIXME: tests keep timing out on some systems.
  1846. (invoke "nosetests" "-v" "--processes" "1")))))))
  1847. (propagated-inputs
  1848. `(("htslib" ,htslib))) ; Included from installed header files.
  1849. (inputs
  1850. `(("ncurses" ,ncurses)
  1851. ("curl" ,curl)
  1852. ("zlib" ,zlib)))
  1853. (native-inputs
  1854. `(("python-cython" ,python-cython)
  1855. ;; Dependencies below are are for tests only.
  1856. ("samtools" ,samtools)
  1857. ("bcftools" ,bcftools)
  1858. ("python-nose" ,python-nose)))
  1859. (home-page "https://github.com/pysam-developers/pysam")
  1860. (synopsis "Python bindings to the SAMtools C API")
  1861. (description
  1862. "Pysam is a Python module for reading and manipulating files in the
  1863. SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
  1864. also includes an interface for tabix.")
  1865. (license license:expat)))
  1866. (define-public python2-pysam
  1867. (package-with-python2 python-pysam))
  1868. (define-public python-twobitreader
  1869. (package
  1870. (name "python-twobitreader")
  1871. (version "3.1.6")
  1872. (source (origin
  1873. (method git-fetch)
  1874. (uri (git-reference
  1875. (url "https://github.com/benjschiller/twobitreader")
  1876. (commit version)))
  1877. (file-name (git-file-name name version))
  1878. (sha256
  1879. (base32
  1880. "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
  1881. (build-system python-build-system)
  1882. ;; Tests are not included
  1883. (arguments '(#:tests? #f))
  1884. (native-inputs
  1885. `(("python-sphinx" ,python-sphinx)))
  1886. (home-page "https://github.com/benjschiller/twobitreader")
  1887. (synopsis "Python library for reading .2bit files")
  1888. (description
  1889. "twobitreader is a Python library for reading .2bit files as used by the
  1890. UCSC genome browser.")
  1891. (license license:artistic2.0)))
  1892. (define-public python2-twobitreader
  1893. (package-with-python2 python-twobitreader))
  1894. (define-public python-plastid
  1895. (package
  1896. (name "python-plastid")
  1897. (version "0.4.8")
  1898. (source (origin
  1899. (method url-fetch)
  1900. (uri (pypi-uri "plastid" version))
  1901. (sha256
  1902. (base32
  1903. "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
  1904. (build-system python-build-system)
  1905. (arguments
  1906. ;; Some test files are not included.
  1907. `(#:tests? #f))
  1908. (propagated-inputs
  1909. `(("python-numpy" ,python-numpy)
  1910. ("python-scipy" ,python-scipy)
  1911. ("python-pandas" ,python-pandas)
  1912. ("python-pysam" ,python-pysam)
  1913. ("python-matplotlib" ,python-matplotlib)
  1914. ("python-biopython" ,python-biopython)
  1915. ("python-twobitreader" ,python-twobitreader)
  1916. ("python-termcolor" ,python-termcolor)))
  1917. (native-inputs
  1918. `(("python-cython" ,python-cython)
  1919. ("python-nose" ,python-nose)))
  1920. (home-page "https://github.com/joshuagryphon/plastid")
  1921. (synopsis "Python library for genomic analysis")
  1922. (description
  1923. "plastid is a Python library for genomic analysis – in particular,
  1924. high-throughput sequencing data – with an emphasis on simplicity.")
  1925. (license license:bsd-3)))
  1926. (define-public python2-plastid
  1927. (package-with-python2 python-plastid))
  1928. (define-public tetoolkit
  1929. (package
  1930. (name "tetoolkit")
  1931. (version "2.0.3")
  1932. (source (origin
  1933. (method git-fetch)
  1934. (uri (git-reference
  1935. (url "https://github.com/mhammell-laboratory/tetoolkit.git")
  1936. (commit version)))
  1937. (file-name (git-file-name name version))
  1938. (sha256
  1939. (base32
  1940. "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
  1941. (build-system python-build-system)
  1942. (arguments
  1943. `(#:python ,python-2 ; not guaranteed to work with Python 3
  1944. #:phases
  1945. (modify-phases %standard-phases
  1946. (add-after 'unpack 'make-writable
  1947. (lambda _
  1948. (for-each make-file-writable (find-files "."))
  1949. #t))
  1950. (add-after 'unpack 'patch-invocations
  1951. (lambda* (#:key inputs #:allow-other-keys)
  1952. (substitute* '("bin/TEtranscripts"
  1953. "bin/TEcount")
  1954. (("'sort ")
  1955. (string-append "'" (which "sort") " "))
  1956. (("'rm -f ")
  1957. (string-append "'" (which "rm") " -f "))
  1958. (("'Rscript'") (string-append "'" (which "Rscript") "'")))
  1959. (substitute* "TEToolkit/IO/ReadInputs.py"
  1960. (("BamToBED") (which "bamToBed")))
  1961. (substitute* "TEToolkit/Normalization.py"
  1962. (("\"Rscript\"")
  1963. (string-append "\"" (which "Rscript") "\"")))
  1964. #t))
  1965. (add-after 'install 'wrap-program
  1966. (lambda* (#:key outputs #:allow-other-keys)
  1967. ;; Make sure the executables find R packages.
  1968. (let ((out (assoc-ref outputs "out")))
  1969. (for-each
  1970. (lambda (script)
  1971. (wrap-program (string-append out "/bin/" script)
  1972. `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
  1973. '("TEtranscripts"
  1974. "TEcount")))
  1975. #t)))))
  1976. (inputs
  1977. `(("coreutils" ,coreutils)
  1978. ("bedtools" ,bedtools)
  1979. ("python-argparse" ,python2-argparse)
  1980. ("python-pysam" ,python2-pysam)
  1981. ("r-minimal" ,r-minimal)
  1982. ("r-deseq2" ,r-deseq2)))
  1983. (home-page "https://github.com/mhammell-laboratory/tetoolkit")
  1984. (synopsis "Transposable elements in differential enrichment analysis")
  1985. (description
  1986. "This is package for including transposable elements in differential
  1987. enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
  1988. RNA-seq (and similar data) and annotates reads to both genes and transposable
  1989. elements. TEtranscripts then performs differential analysis using DESeq2.
  1990. Note that TEtranscripts and TEcount rely on specially curated GTF files, which
  1991. are not included due to their size.")
  1992. (license license:gpl3+)))
  1993. (define-public cd-hit
  1994. (package
  1995. (name "cd-hit")
  1996. (version "4.6.8")
  1997. (source (origin
  1998. (method url-fetch)
  1999. (uri (string-append "https://github.com/weizhongli/cdhit"
  2000. "/releases/download/V" version
  2001. "/cd-hit-v" version
  2002. "-2017-0621-source.tar.gz"))
  2003. (sha256
  2004. (base32
  2005. "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
  2006. (build-system gnu-build-system)
  2007. (arguments
  2008. `(#:tests? #f ; there are no tests
  2009. #:make-flags
  2010. ;; Executables are copied directly to the PREFIX.
  2011. (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
  2012. ;; Support longer sequences (e.g. Pacbio sequences)
  2013. "MAX_SEQ=60000000")
  2014. #:phases
  2015. (modify-phases %standard-phases
  2016. ;; No "configure" script
  2017. (delete 'configure)
  2018. ;; Remove sources of non-determinism
  2019. (add-after 'unpack 'be-timeless
  2020. (lambda _
  2021. (substitute* "cdhit-utility.c++"
  2022. ((" \\(built on \" __DATE__ \"\\)") ""))
  2023. (substitute* "cdhit-common.c++"
  2024. (("__DATE__") "\"0\"")
  2025. (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
  2026. #t))
  2027. ;; The "install" target does not create the target directory.
  2028. (add-before 'install 'create-target-dir
  2029. (lambda* (#:key outputs #:allow-other-keys)
  2030. (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
  2031. #t)))))
  2032. (inputs
  2033. `(("perl" ,perl)))
  2034. (home-page "http://weizhongli-lab.org/cd-hit/")
  2035. (synopsis "Cluster and compare protein or nucleotide sequences")
  2036. (description
  2037. "CD-HIT is a program for clustering and comparing protein or nucleotide
  2038. sequences. CD-HIT is designed to be fast and handle extremely large
  2039. databases.")
  2040. ;; The manual says: "It can be copied under the GNU General Public License
  2041. ;; version 2 (GPLv2)."
  2042. (license license:gpl2)))
  2043. (define-public clipper
  2044. (package
  2045. (name "clipper")
  2046. (version "1.2.1")
  2047. (source (origin
  2048. (method git-fetch)
  2049. (uri (git-reference
  2050. (url "https://github.com/YeoLab/clipper.git")
  2051. (commit version)))
  2052. (file-name (git-file-name name version))
  2053. (sha256
  2054. (base32
  2055. "0fja1rj84wp9vpj8rxpj3n8zqzcqq454m904yp9as1w4phccirjb"))
  2056. (modules '((guix build utils)))
  2057. (snippet
  2058. '(begin
  2059. ;; remove unnecessary setup dependency
  2060. (substitute* "setup.py"
  2061. (("setup_requires = .*") ""))
  2062. #t))))
  2063. (build-system python-build-system)
  2064. (arguments
  2065. `(#:python ,python-2 ; only Python 2 is supported
  2066. #:phases
  2067. (modify-phases %standard-phases
  2068. ;; This is fixed in upstream commit
  2069. ;; f6c2990198f906bf97730d95695b4bd5a6d01ddb.
  2070. (add-after 'unpack 'fix-typo
  2071. (lambda _
  2072. (substitute* "clipper/src/readsToWiggle.pyx"
  2073. (("^sc.*") ""))
  2074. #t)))))
  2075. (inputs
  2076. `(("htseq" ,python2-htseq)
  2077. ("python-pybedtools" ,python2-pybedtools)
  2078. ("python-cython" ,python2-cython)
  2079. ("python-scikit-learn" ,python2-scikit-learn)
  2080. ("python-matplotlib" ,python2-matplotlib)
  2081. ("python-pandas" ,python2-pandas)
  2082. ("python-pysam" ,python2-pysam)
  2083. ("python-numpy" ,python2-numpy)
  2084. ("python-scipy" ,python2-scipy)))
  2085. (native-inputs
  2086. `(("python-mock" ,python2-mock) ; for tests
  2087. ("python-nose" ,python2-nose) ; for tests
  2088. ("python-pytz" ,python2-pytz))) ; for tests
  2089. (home-page "https://github.com/YeoLab/clipper")
  2090. (synopsis "CLIP peak enrichment recognition")
  2091. (description
  2092. "CLIPper is a tool to define peaks in CLIP-seq datasets.")
  2093. (license license:gpl2)))
  2094. (define-public codingquarry
  2095. (package
  2096. (name "codingquarry")
  2097. (version "2.0")
  2098. (source (origin
  2099. (method url-fetch)
  2100. (uri (string-append
  2101. "mirror://sourceforge/codingquarry/CodingQuarry_v"
  2102. version ".tar.gz"))
  2103. (sha256
  2104. (base32
  2105. "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
  2106. (build-system gnu-build-system)
  2107. (arguments
  2108. '(#:tests? #f ; no "check" target
  2109. #:phases
  2110. (modify-phases %standard-phases
  2111. (delete 'configure)
  2112. (replace 'install
  2113. (lambda* (#:key outputs #:allow-other-keys)
  2114. (let* ((out (assoc-ref outputs "out"))
  2115. (bin (string-append out "/bin"))
  2116. (doc (string-append out "/share/doc/codingquarry")))
  2117. (install-file "INSTRUCTIONS.pdf" doc)
  2118. (copy-recursively "QuarryFiles"
  2119. (string-append out "/QuarryFiles"))
  2120. (install-file "CodingQuarry" bin)
  2121. (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
  2122. #t)))))
  2123. (inputs `(("openmpi" ,openmpi)))
  2124. (native-search-paths
  2125. (list (search-path-specification
  2126. (variable "QUARRY_PATH")
  2127. (files '("QuarryFiles")))))
  2128. (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
  2129. (synopsis "Fungal gene predictor")
  2130. (description "CodingQuarry is a highly accurate, self-training GHMM fungal
  2131. gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
  2132. (home-page "https://sourceforge.net/projects/codingquarry/")
  2133. (license license:gpl3+)))
  2134. (define-public couger
  2135. (package
  2136. (name "couger")
  2137. (version "1.8.2")
  2138. (source (origin
  2139. (method url-fetch)
  2140. (uri (string-append
  2141. "http://couger.oit.duke.edu/static/assets/COUGER"
  2142. version ".zip"))
  2143. (sha256
  2144. (base32
  2145. "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
  2146. (build-system gnu-build-system)
  2147. (arguments
  2148. `(#:tests? #f
  2149. #:phases
  2150. (modify-phases %standard-phases
  2151. (delete 'configure)
  2152. (delete 'build)
  2153. (replace
  2154. 'install
  2155. (lambda* (#:key outputs #:allow-other-keys)
  2156. (let* ((out (assoc-ref outputs "out"))
  2157. (bin (string-append out "/bin")))
  2158. (copy-recursively "src" (string-append out "/src"))
  2159. (mkdir bin)
  2160. ;; Add "src" directory to module lookup path.
  2161. (substitute* "couger"
  2162. (("from argparse")
  2163. (string-append "import sys\nsys.path.append(\""
  2164. out "\")\nfrom argparse")))
  2165. (install-file "couger" bin))
  2166. #t))
  2167. (add-after
  2168. 'install 'wrap-program
  2169. (lambda* (#:key inputs outputs #:allow-other-keys)
  2170. ;; Make sure 'couger' runs with the correct PYTHONPATH.
  2171. (let* ((out (assoc-ref outputs "out"))
  2172. (path (getenv "PYTHONPATH")))
  2173. (wrap-program (string-append out "/bin/couger")
  2174. `("PYTHONPATH" ":" prefix (,path))))
  2175. #t)))))
  2176. (inputs
  2177. `(("python" ,python-2)
  2178. ("python2-pillow" ,python2-pillow)
  2179. ("python2-numpy" ,python2-numpy)
  2180. ("python2-scipy" ,python2-scipy)
  2181. ("python2-matplotlib" ,python2-matplotlib)))
  2182. (propagated-inputs
  2183. `(("r-minimal" ,r-minimal)
  2184. ("libsvm" ,libsvm)
  2185. ("randomjungle" ,randomjungle)))
  2186. (native-inputs
  2187. `(("unzip" ,unzip)))
  2188. (home-page "http://couger.oit.duke.edu")
  2189. (synopsis "Identify co-factors in sets of genomic regions")
  2190. (description
  2191. "COUGER can be applied to any two sets of genomic regions bound by
  2192. paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
  2193. putative co-factors that provide specificity to each TF. The framework
  2194. determines the genomic targets uniquely-bound by each TF, and identifies a
  2195. small set of co-factors that best explain the in vivo binding differences
  2196. between the two TFs.
  2197. COUGER uses classification algorithms (support vector machines and random
  2198. forests) with features that reflect the DNA binding specificities of putative
  2199. co-factors. The features are generated either from high-throughput TF-DNA
  2200. binding data (from protein binding microarray experiments), or from large
  2201. collections of DNA motifs.")
  2202. (license license:gpl3+)))
  2203. (define-public clustal-omega
  2204. (package
  2205. (name "clustal-omega")
  2206. (version "1.2.4")
  2207. (source (origin
  2208. (method url-fetch)
  2209. (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
  2210. version ".tar.gz"))
  2211. (sha256
  2212. (base32
  2213. "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
  2214. (build-system gnu-build-system)
  2215. (inputs
  2216. `(("argtable" ,argtable)))
  2217. (home-page "http://www.clustal.org/omega/")
  2218. (synopsis "Multiple sequence aligner for protein and DNA/RNA")
  2219. (description
  2220. "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
  2221. program for protein and DNA/RNA. It produces high quality MSAs and is capable
  2222. of handling data-sets of hundreds of thousands of sequences in reasonable
  2223. time.")
  2224. (license license:gpl2+)))
  2225. (define-public crossmap
  2226. (package
  2227. (name "crossmap")
  2228. (version "0.3.8")
  2229. (source (origin
  2230. (method url-fetch)
  2231. (uri (pypi-uri "CrossMap" version))
  2232. (sha256
  2233. (base32
  2234. "1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
  2235. (build-system python-build-system)
  2236. (inputs
  2237. `(("python-bx-python" ,python-bx-python)
  2238. ("python-numpy" ,python-numpy)
  2239. ("python-pybigwig" ,python-pybigwig)
  2240. ("python-pysam" ,python-pysam)
  2241. ("zlib" ,zlib)))
  2242. (native-inputs
  2243. `(("python-cython" ,python-cython)
  2244. ("python-nose" ,python-nose)))
  2245. (home-page "http://crossmap.sourceforge.net/")
  2246. (synopsis "Convert genome coordinates between assemblies")
  2247. (description
  2248. "CrossMap is a program for conversion of genome coordinates or annotation
  2249. files between different genome assemblies. It supports most commonly used
  2250. file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
  2251. (license license:gpl2+)))
  2252. (define-public python-dnaio
  2253. (package
  2254. (name "python-dnaio")
  2255. (version "0.3")
  2256. (source
  2257. (origin
  2258. (method url-fetch)
  2259. (uri (pypi-uri "dnaio" version))
  2260. (sha256
  2261. (base32
  2262. "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
  2263. (build-system python-build-system)
  2264. (native-inputs
  2265. `(("python-cython" ,python-cython)
  2266. ("python-pytest" ,python-pytest)
  2267. ("python-xopen" ,python-xopen)))
  2268. (home-page "https://github.com/marcelm/dnaio/")
  2269. (synopsis "Read FASTA and FASTQ files efficiently")
  2270. (description
  2271. "dnaio is a Python library for fast parsing of FASTQ and also FASTA
  2272. files. The code was previously part of the cutadapt tool.")
  2273. (license license:expat)))
  2274. (define-public python-deeptoolsintervals
  2275. (package
  2276. (name "python-deeptoolsintervals")
  2277. (version "0.1.9")
  2278. (source (origin
  2279. (method url-fetch)
  2280. (uri (pypi-uri "deeptoolsintervals" version))
  2281. (sha256
  2282. (base32
  2283. "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
  2284. (build-system python-build-system)
  2285. (inputs
  2286. `(("zlib" ,zlib)))
  2287. (home-page "https://github.com/deeptools/deeptools_intervals")
  2288. (synopsis "Create GTF-based interval trees with associated meta-data")
  2289. (description
  2290. "This package provides a Python module creating/accessing GTF-based
  2291. interval trees with associated meta-data. It is primarily used by the
  2292. @code{deeptools} package.")
  2293. (license license:expat)))
  2294. (define-public python-deeptools
  2295. (package
  2296. (name "python-deeptools")
  2297. (version "3.4.3")
  2298. (source (origin
  2299. (method url-fetch)
  2300. (uri (pypi-uri "deepTools" version))
  2301. (sha256
  2302. (base32
  2303. "1azgjniss5ff6a90nicdjkxyjwqmi3gzfn09gra42hwlz19hipxb"))))
  2304. (build-system python-build-system)
  2305. (propagated-inputs
  2306. `(("python-matplotlib" ,python-matplotlib)
  2307. ("python-numpy" ,python-numpy)
  2308. ("python-numpydoc" ,python-numpydoc)
  2309. ("python-py2bit" ,python-py2bit)
  2310. ("python-pybigwig" ,python-pybigwig)
  2311. ("python-pysam" ,python-pysam)
  2312. ("python-scipy" ,python-scipy)
  2313. ("python-deeptoolsintervals" ,python-deeptoolsintervals)
  2314. ("python-plotly" ,python-plotly)))
  2315. (home-page "https://pypi.org/project/deepTools/")
  2316. (synopsis "Useful tools for exploring deep sequencing data")
  2317. (description "This package addresses the challenge of handling large amounts
  2318. of data that are now routinely generated from DNA sequencing centers.
  2319. @code{deepTools} contains useful modules to process the mapped reads data for
  2320. multiple quality checks, creating normalized coverage files in standard bedGraph
  2321. and bigWig file formats, that allow comparison between different files. Finally,
  2322. using such normalized and standardized files, deepTools can create many
  2323. publication-ready visualizations to identify enrichments and for functional
  2324. annotations of the genome.")
  2325. ;; The file deeptools/cm.py is licensed under the BSD license. The
  2326. ;; remainder of the code is licensed under the MIT license.
  2327. (license (list license:bsd-3 license:expat))))
  2328. (define-public cutadapt
  2329. (package
  2330. (name "cutadapt")
  2331. (version "2.1")
  2332. (source (origin
  2333. (method url-fetch)
  2334. (uri (pypi-uri "cutadapt" version))
  2335. (sha256
  2336. (base32
  2337. "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
  2338. (build-system python-build-system)
  2339. (inputs
  2340. `(("python-dnaio" ,python-dnaio)
  2341. ("python-xopen" ,python-xopen)))
  2342. (native-inputs
  2343. `(("python-cython" ,python-cython)
  2344. ("python-pytest" ,python-pytest)
  2345. ("python-setuptools-scm" ,python-setuptools-scm)))
  2346. (home-page "https://cutadapt.readthedocs.io/en/stable/")
  2347. (synopsis "Remove adapter sequences from nucleotide sequencing reads")
  2348. (description
  2349. "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
  2350. other types of unwanted sequence from high-throughput sequencing reads.")
  2351. (license license:expat)))
  2352. (define-public libbigwig
  2353. (package
  2354. (name "libbigwig")
  2355. (version "0.4.4")
  2356. (source (origin
  2357. (method git-fetch)
  2358. (uri (git-reference
  2359. (url "https://github.com/dpryan79/libBigWig.git")
  2360. (commit version)))
  2361. (file-name (git-file-name name version))
  2362. (sha256
  2363. (base32
  2364. "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
  2365. (build-system gnu-build-system)
  2366. (arguments
  2367. `(#:test-target "test"
  2368. #:tests? #f ; tests require access to the web
  2369. #:make-flags
  2370. (list "CC=gcc"
  2371. (string-append "prefix=" (assoc-ref %outputs "out")))
  2372. #:phases
  2373. (modify-phases %standard-phases
  2374. (delete 'configure))))
  2375. (inputs
  2376. `(("zlib" ,zlib)
  2377. ("curl" ,curl)))
  2378. (native-inputs
  2379. `(("doxygen" ,doxygen)
  2380. ;; Need for tests
  2381. ("python" ,python-2)))
  2382. (home-page "https://github.com/dpryan79/libBigWig")
  2383. (synopsis "C library for handling bigWig files")
  2384. (description
  2385. "This package provides a C library for parsing local and remote BigWig
  2386. files.")
  2387. (license license:expat)))
  2388. (define-public python-pybigwig
  2389. (package
  2390. (name "python-pybigwig")
  2391. (version "0.3.17")
  2392. (source (origin
  2393. (method url-fetch)
  2394. (uri (pypi-uri "pyBigWig" version))
  2395. (sha256
  2396. (base32
  2397. "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
  2398. (modules '((guix build utils)))
  2399. (snippet
  2400. '(begin
  2401. ;; Delete bundled libBigWig sources
  2402. (delete-file-recursively "libBigWig")
  2403. #t))))
  2404. (build-system python-build-system)
  2405. (arguments
  2406. `(#:phases
  2407. (modify-phases %standard-phases
  2408. (add-after 'unpack 'link-with-libBigWig
  2409. (lambda* (#:key inputs #:allow-other-keys)
  2410. (substitute* "setup.py"
  2411. (("libs=\\[") "libs=[\"BigWig\", "))
  2412. #t)))))
  2413. (propagated-inputs
  2414. `(("python-numpy" ,python-numpy)))
  2415. (inputs
  2416. `(("libbigwig" ,libbigwig)
  2417. ("zlib" ,zlib)
  2418. ("curl" ,curl)))
  2419. (home-page "https://github.com/dpryan79/pyBigWig")
  2420. (synopsis "Access bigWig files in Python using libBigWig")
  2421. (description
  2422. "This package provides Python bindings to the libBigWig library for
  2423. accessing bigWig files.")
  2424. (license license:expat)))
  2425. (define-public python2-pybigwig
  2426. (package-with-python2 python-pybigwig))
  2427. (define-public python-dendropy
  2428. (package
  2429. (name "python-dendropy")
  2430. (version "4.4.0")
  2431. (source
  2432. (origin
  2433. (method git-fetch)
  2434. ;; Source from GitHub so that tests are included.
  2435. (uri (git-reference
  2436. (url "https://github.com/jeetsukumaran/DendroPy.git")
  2437. (commit (string-append "v" version))))
  2438. (file-name (git-file-name name version))
  2439. (sha256
  2440. (base32
  2441. "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
  2442. (build-system python-build-system)
  2443. (home-page "https://dendropy.org/")
  2444. (synopsis "Library for phylogenetics and phylogenetic computing")
  2445. (description
  2446. "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
  2447. writing, simulation, processing and manipulation of phylogenetic
  2448. trees (phylogenies) and characters.")
  2449. (license license:bsd-3)))
  2450. (define-public python2-dendropy
  2451. (let ((base (package-with-python2 python-dendropy)))
  2452. (package
  2453. (inherit base)
  2454. (arguments
  2455. `(#:phases
  2456. (modify-phases %standard-phases
  2457. (add-after 'unpack 'remove-failing-test
  2458. (lambda _
  2459. ;; This test fails when the full test suite is run, as documented
  2460. ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
  2461. (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
  2462. (("test_collection_comments_and_annotations")
  2463. "do_not_test_collection_comments_and_annotations"))
  2464. #t)))
  2465. ,@(package-arguments base))))))
  2466. (define-public python-py2bit
  2467. (package
  2468. (name "python-py2bit")
  2469. (version "0.3.0")
  2470. (source
  2471. (origin
  2472. (method url-fetch)
  2473. (uri (pypi-uri "py2bit" version))
  2474. (sha256
  2475. (base32
  2476. "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
  2477. (build-system python-build-system)
  2478. (home-page "https://github.com/dpryan79/py2bit")
  2479. (synopsis "Access 2bit files using lib2bit")
  2480. (description
  2481. "This package provides Python bindings for lib2bit to access 2bit files
  2482. with Python.")
  2483. (license license:expat)))
  2484. (define-public deeptools
  2485. (package
  2486. (name "deeptools")
  2487. (version "3.1.3")
  2488. (source (origin
  2489. (method git-fetch)
  2490. (uri (git-reference
  2491. (url "https://github.com/deeptools/deepTools.git")
  2492. (commit version)))
  2493. (file-name (git-file-name name version))
  2494. (sha256
  2495. (base32
  2496. "1vggnf52g6q2vifdl4cyi7s2fnfqq0ky2zrkj5zv2qfzsc3p3siw"))))
  2497. (build-system python-build-system)
  2498. (arguments
  2499. `(#:phases
  2500. (modify-phases %standard-phases
  2501. ;; This phase fails, but it's not needed.
  2502. (delete 'reset-gzip-timestamps))))
  2503. (inputs
  2504. `(("python-plotly" ,python-plotly)
  2505. ("python-scipy" ,python-scipy)
  2506. ("python-numpy" ,python-numpy)
  2507. ("python-numpydoc" ,python-numpydoc)
  2508. ("python-matplotlib" ,python-matplotlib)
  2509. ("python-pysam" ,python-pysam)
  2510. ("python-py2bit" ,python-py2bit)
  2511. ("python-pybigwig" ,python-pybigwig)))
  2512. (native-inputs
  2513. `(("python-mock" ,python-mock) ;for tests
  2514. ("python-nose" ,python-nose) ;for tests
  2515. ("python-pytz" ,python-pytz))) ;for tests
  2516. (home-page "https://github.com/deeptools/deepTools")
  2517. (synopsis "Tools for normalizing and visualizing deep-sequencing data")
  2518. (description
  2519. "DeepTools addresses the challenge of handling the large amounts of data
  2520. that are now routinely generated from DNA sequencing centers. To do so,
  2521. deepTools contains useful modules to process the mapped reads data to create
  2522. coverage files in standard bedGraph and bigWig file formats. By doing so,
  2523. deepTools allows the creation of normalized coverage files or the comparison
  2524. between two files (for example, treatment and control). Finally, using such
  2525. normalized and standardized files, multiple visualizations can be created to
  2526. identify enrichments with functional annotations of the genome.")
  2527. (license license:gpl3+)))
  2528. (define-public delly
  2529. (package
  2530. (name "delly")
  2531. (version "0.7.9")
  2532. (source (origin
  2533. (method git-fetch)
  2534. (uri (git-reference
  2535. (url "https://github.com/dellytools/delly.git")
  2536. (commit (string-append "v" version))))
  2537. (file-name (git-file-name name version))
  2538. (sha256
  2539. (base32 "034jqsxswy9gqdh2zkgc1js99qkv75ks4xvzgmh0284sraagv61z"))
  2540. (modules '((guix build utils)))
  2541. (snippet
  2542. '(begin
  2543. (delete-file-recursively "src/htslib")
  2544. #t))))
  2545. (build-system gnu-build-system)
  2546. (arguments
  2547. `(#:tests? #f ; There are no tests to run.
  2548. #:make-flags
  2549. (list "PARALLEL=1" ; Allow parallel execution at run-time.
  2550. (string-append "prefix=" (assoc-ref %outputs "out")))
  2551. #:phases
  2552. (modify-phases %standard-phases
  2553. (delete 'configure) ; There is no configure phase.
  2554. (add-after 'install 'install-templates
  2555. (lambda* (#:key outputs #:allow-other-keys)
  2556. (let ((templates (string-append (assoc-ref outputs "out")
  2557. "/share/delly/templates")))
  2558. (mkdir-p templates)
  2559. (copy-recursively "excludeTemplates" templates)
  2560. #t))))))
  2561. (inputs
  2562. `(("boost" ,boost)
  2563. ("htslib" ,htslib)
  2564. ("zlib" ,zlib)
  2565. ("bzip2" ,bzip2)))
  2566. (home-page "https://github.com/dellytools/delly")
  2567. (synopsis "Integrated structural variant prediction method")
  2568. (description "Delly is an integrated structural variant prediction method
  2569. that can discover and genotype deletions, tandem duplications, inversions and
  2570. translocations at single-nucleotide resolution in short-read massively parallel
  2571. sequencing data. It uses paired-ends and split-reads to sensitively and
  2572. accurately delineate genomic rearrangements throughout the genome.")
  2573. (license license:gpl3+)))
  2574. (define-public diamond
  2575. (package
  2576. (name "diamond")
  2577. (version "0.9.30")
  2578. (source (origin
  2579. (method git-fetch)
  2580. (uri (git-reference
  2581. (url "https://github.com/bbuchfink/diamond.git")
  2582. (commit (string-append "v" version))))
  2583. (file-name (git-file-name name version))
  2584. (sha256
  2585. (base32
  2586. "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
  2587. (build-system cmake-build-system)
  2588. (arguments
  2589. '(#:tests? #f ; no "check" target
  2590. #:phases
  2591. (modify-phases %standard-phases
  2592. (add-after 'unpack 'remove-native-compilation
  2593. (lambda _
  2594. (substitute* "CMakeLists.txt" (("-march=native") ""))
  2595. #t)))))
  2596. (inputs
  2597. `(("zlib" ,zlib)))
  2598. (home-page "https://github.com/bbuchfink/diamond")
  2599. (synopsis "Accelerated BLAST compatible local sequence aligner")
  2600. (description
  2601. "DIAMOND is a BLAST-compatible local aligner for mapping protein and
  2602. translated DNA query sequences against a protein reference database (BLASTP
  2603. and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
  2604. reads at a typical sensitivity of 90-99% relative to BLAST depending on the
  2605. data and settings.")
  2606. (license license:agpl3+)))
  2607. (define-public discrover
  2608. (package
  2609. (name "discrover")
  2610. (version "1.6.0")
  2611. (source
  2612. (origin
  2613. (method git-fetch)
  2614. (uri (git-reference
  2615. (url "https://github.com/maaskola/discrover.git")
  2616. (commit version)))
  2617. (file-name (git-file-name name version))
  2618. (sha256
  2619. (base32
  2620. "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
  2621. (build-system cmake-build-system)
  2622. (arguments
  2623. `(#:tests? #f ; there are no tests
  2624. #:phases
  2625. (modify-phases %standard-phases
  2626. (add-after 'unpack 'fix-latex-errors
  2627. (lambda _
  2628. (with-fluids ((%default-port-encoding #f))
  2629. (substitute* "doc/references.bib"
  2630. (("\\{S\\}illanp[^,]+,")
  2631. "{S}illanp{\\\"a}{\\\"a},")))
  2632. ;; XXX: I just can't get pdflatex to not complain about these
  2633. ;; characters. They end up in the manual via the generated
  2634. ;; discrover-cli-help.txt.
  2635. (substitute* "src/hmm/cli.cpp"
  2636. (("µ") "mu")
  2637. (("η") "eta")
  2638. (("≤") "<="))
  2639. ;; This seems to be a syntax error.
  2640. (substitute* "doc/discrover-manual.tex"
  2641. (("theverbbox\\[t\\]") "theverbbox"))
  2642. #t))
  2643. (add-after 'unpack 'add-missing-includes
  2644. (lambda _
  2645. (substitute* "src/executioninformation.hpp"
  2646. (("#define EXECUTIONINFORMATION_HPP" line)
  2647. (string-append line "\n#include <random>")))
  2648. (substitute* "src/plasma/fasta.hpp"
  2649. (("#define FASTA_HPP" line)
  2650. (string-append line "\n#include <random>")))
  2651. #t))
  2652. ;; FIXME: this is needed because we're using texlive-union, which
  2653. ;; doesn't handle fonts correctly. It expects to be able to generate
  2654. ;; fonts in the home directory.
  2655. (add-before 'build 'setenv-HOME
  2656. (lambda _ (setenv "HOME" "/tmp") #t)))))
  2657. (inputs
  2658. `(("boost" ,boost)
  2659. ("cairo" ,cairo)
  2660. ("rmath-standalone" ,rmath-standalone)))
  2661. (native-inputs
  2662. `(("texlive" ,(texlive-union (list texlive-fonts-cm
  2663. texlive-fonts-amsfonts
  2664. texlive-latex-doi
  2665. texlive-latex-examplep
  2666. texlive-latex-hyperref
  2667. texlive-latex-ms
  2668. texlive-latex-natbib
  2669. texlive-bibtex ; style files used by natbib
  2670. texlive-latex-pgf ; tikz
  2671. texlive-latex-verbatimbox)))
  2672. ("imagemagick" ,imagemagick)))
  2673. (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
  2674. (synopsis "Discover discriminative nucleotide sequence motifs")
  2675. (description "Discrover is a motif discovery method to find binding sites
  2676. of nucleic acid binding proteins.")
  2677. (license license:gpl3+)))
  2678. (define-public eigensoft
  2679. (package
  2680. (name "eigensoft")
  2681. (version "7.2.1")
  2682. (source
  2683. (origin
  2684. (method git-fetch)
  2685. (uri (git-reference
  2686. (url "https://github.com/DReichLab/EIG.git")
  2687. (commit (string-append "v" version))))
  2688. (file-name (git-file-name name version))
  2689. (sha256
  2690. (base32
  2691. "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
  2692. (modules '((guix build utils)))
  2693. ;; Remove pre-built binaries.
  2694. (snippet '(begin
  2695. (delete-file-recursively "bin")
  2696. (mkdir "bin")
  2697. #t))))
  2698. (build-system gnu-build-system)
  2699. (arguments
  2700. `(#:tests? #f ; There are no tests.
  2701. #:make-flags '("CC=gcc")
  2702. #:phases
  2703. (modify-phases %standard-phases
  2704. ;; There is no configure phase, but the Makefile is in a
  2705. ;; sub-directory.
  2706. (replace 'configure
  2707. (lambda _ (chdir "src") #t))
  2708. ;; The provided install target only copies executables to
  2709. ;; the "bin" directory in the build root.
  2710. (add-after 'install 'actually-install
  2711. (lambda* (#:key outputs #:allow-other-keys)
  2712. (let* ((out (assoc-ref outputs "out"))
  2713. (bin (string-append out "/bin")))
  2714. (for-each (lambda (file)
  2715. (install-file file bin))
  2716. (find-files "../bin" ".*"))
  2717. #t))))))
  2718. (inputs
  2719. `(("gsl" ,gsl)
  2720. ("lapack" ,lapack)
  2721. ("openblas" ,openblas)
  2722. ("perl" ,perl)
  2723. ("gfortran" ,gfortran "lib")))
  2724. (home-page "https://github.com/DReichLab/EIG")
  2725. (synopsis "Tools for population genetics")
  2726. (description "The EIGENSOFT package provides tools for population
  2727. genetics and stratification correction. EIGENSOFT implements methods commonly
  2728. used in population genetics analyses such as PCA, computation of Tracy-Widom
  2729. statistics, and finding related individuals in structured populations. It
  2730. comes with a built-in plotting script and supports multiple file formats and
  2731. quantitative phenotypes.")
  2732. ;; The license of the eigensoft tools is Expat, but since it's
  2733. ;; linking with the GNU Scientific Library (GSL) the effective
  2734. ;; license is the GPL.
  2735. (license license:gpl3+)))
  2736. (define-public edirect
  2737. (package
  2738. (name "edirect")
  2739. (version "13.3.20200128")
  2740. (source (origin
  2741. (method url-fetch)
  2742. (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
  2743. "/versions/" version
  2744. "/edirect-" version ".tar.gz"))
  2745. (sha256
  2746. (base32
  2747. "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
  2748. (modules '((guix build utils)))
  2749. (snippet
  2750. '(begin (delete-file "Mozilla-CA.tar.gz")
  2751. (substitute* "rchive.go"
  2752. ;; This go library does not have any license.
  2753. (("github.com/fiam/gounidecode/unidecode")
  2754. "golang.org/rainycape/unidecode"))
  2755. #t))))
  2756. (build-system perl-build-system)
  2757. (arguments
  2758. `(#:phases
  2759. (modify-phases %standard-phases
  2760. (delete 'configure)
  2761. (delete 'build)
  2762. (delete 'check) ; simple check after install
  2763. (add-after 'unpack 'patch-programs
  2764. (lambda* (#:key inputs #:allow-other-keys)
  2765. ;; Ignore errors about missing xtract.Linux and rchive.Linux.
  2766. (substitute* "pm-refresh"
  2767. (("cat \\\"\\$target")
  2768. "grep ^[[:digit:]] \"$target"))
  2769. #t))
  2770. (replace 'install
  2771. (lambda* (#:key inputs outputs #:allow-other-keys)
  2772. (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
  2773. (edirect-go (assoc-ref inputs "edirect-go-programs")))
  2774. (for-each
  2775. (lambda (file)
  2776. (install-file file bin))
  2777. '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
  2778. "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
  2779. "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
  2780. "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
  2781. "pm-index" "pm-invert" "pm-merge" "pm-promote"))
  2782. (symlink (string-append edirect-go "/bin/xtract.Linux")
  2783. (string-append bin "/xtract"))
  2784. (symlink (string-append edirect-go "/bin/rchive.Linux")
  2785. (string-append bin "/rchive")))
  2786. #t))
  2787. (add-after 'install 'wrap-program
  2788. (lambda* (#:key outputs #:allow-other-keys)
  2789. ;; Make sure everything can run in a pure environment.
  2790. (let ((out (assoc-ref outputs "out"))
  2791. (path (getenv "PERL5LIB")))
  2792. (for-each
  2793. (lambda (file)
  2794. (wrap-program file
  2795. `("PERL5LIB" ":" prefix (,path)))
  2796. (wrap-program file
  2797. `("PATH" ":" prefix (,(string-append out "/bin")
  2798. ,(dirname (which "sed"))
  2799. ,(dirname (which "gzip"))
  2800. ,(dirname (which "grep"))
  2801. ,(dirname (which "perl"))
  2802. ,(dirname (which "uname"))))))
  2803. (find-files out ".")))
  2804. #t))
  2805. (add-after 'wrap-program 'check
  2806. (lambda* (#:key outputs #:allow-other-keys)
  2807. (invoke (string-append (assoc-ref outputs "out")
  2808. "/bin/edirect.pl")
  2809. "-filter" "-help")
  2810. #t)))))
  2811. (inputs
  2812. `(("edirect-go-programs" ,edirect-go-programs)
  2813. ("perl-html-parser" ,perl-html-parser)
  2814. ("perl-encode-locale" ,perl-encode-locale)
  2815. ("perl-file-listing" ,perl-file-listing)
  2816. ("perl-html-tagset" ,perl-html-tagset)
  2817. ("perl-html-tree" ,perl-html-tree)
  2818. ("perl-http-cookies" ,perl-http-cookies)
  2819. ("perl-http-date" ,perl-http-date)
  2820. ("perl-http-message" ,perl-http-message)
  2821. ("perl-http-negotiate" ,perl-http-negotiate)
  2822. ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
  2823. ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
  2824. ("perl-net-http" ,perl-net-http)
  2825. ("perl-uri" ,perl-uri)
  2826. ("perl-www-robotrules" ,perl-www-robotrules)
  2827. ("perl-xml-simple" ,perl-xml-simple)
  2828. ("perl" ,perl)))
  2829. (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
  2830. (synopsis "Tools for accessing the NCBI's set of databases")
  2831. (description
  2832. "Entrez Direct (EDirect) is a method for accessing the National Center
  2833. for Biotechnology Information's (NCBI) set of interconnected
  2834. databases (publication, sequence, structure, gene, variation, expression,
  2835. etc.) from a terminal. Functions take search terms from command-line
  2836. arguments. Individual operations are combined to build multi-step queries.
  2837. Record retrieval and formatting normally complete the process.
  2838. EDirect also provides an argument-driven function that simplifies the
  2839. extraction of data from document summaries or other results that are returned
  2840. in structured XML format. This can eliminate the need for writing custom
  2841. software to answer ad hoc questions.")
  2842. (native-search-paths
  2843. ;; Ideally this should be set for LWP somewhere.
  2844. (list (search-path-specification
  2845. (variable "PERL_LWP_SSL_CA_FILE")
  2846. (file-type 'regular)
  2847. (separator #f)
  2848. (files '("/etc/ssl/certs/ca-certificates.crt")))))
  2849. (license license:public-domain)))
  2850. (define-public edirect-go-programs
  2851. (package
  2852. (inherit edirect)
  2853. (name "edirect-go-programs")
  2854. (build-system go-build-system)
  2855. (arguments
  2856. `(#:install-source? #f
  2857. #:tests? #f ; No tests.
  2858. #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
  2859. #:phases
  2860. (modify-phases %standard-phases
  2861. (replace 'build
  2862. (lambda* (#:key import-path #:allow-other-keys)
  2863. (with-directory-excursion (string-append "src/" import-path)
  2864. (invoke "go" "build" "-v" "-x" "j2x.go")
  2865. (invoke "go" "build" "-v" "-x" "t2x.go")
  2866. (invoke "go" "build" "-v" "-x" "-o"
  2867. "xtract.Linux" "xtract.go" "common.go")
  2868. (invoke "go" "build" "-v" "-x" "-o"
  2869. "rchive.Linux" "rchive.go" "common.go")
  2870. (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
  2871. (replace 'install
  2872. (lambda* (#:key outputs import-path #:allow-other-keys)
  2873. (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
  2874. (source (string-append "src/" import-path "/")))
  2875. (for-each (lambda (file)
  2876. (format #t "installing ~a~%" file)
  2877. (install-file (string-append source file) dest))
  2878. '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
  2879. #t))))))
  2880. (native-inputs '())
  2881. (propagated-inputs '())
  2882. (inputs
  2883. `(("go-github-com-fatih-color" ,go-github-com-fatih-color)
  2884. ("go-github-com-fogleman-gg" ,go-github-com-fogleman-gg)
  2885. ("go-github-com-gedex-inflector" ,go-github-com-gedex-inflector)
  2886. ("go-github-com-golang-freetype" ,go-github-com-golang-freetype)
  2887. ("go-github-com-klauspost-cpuid" ,go-github-com-klauspost-cpuid)
  2888. ("go-github-com-pbnjay-memory" ,go-github-com-pbnjay-memory)
  2889. ("go-github-com-surgebase-porter2" ,go-github-com-surgebase-porter2)
  2890. ("go-golang-org-rainycape-unidecode" ,go-golang-org-rainycape-unidecode)
  2891. ("go-golang-org-x-image" ,go-golang-org-x-image)
  2892. ("go-golang-org-x-text" ,go-golang-org-x-text)))))
  2893. (define-public exonerate
  2894. (package
  2895. (name "exonerate")
  2896. (version "2.4.0")
  2897. (source
  2898. (origin
  2899. (method url-fetch)
  2900. (uri
  2901. (string-append
  2902. "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
  2903. "exonerate-" version ".tar.gz"))
  2904. (sha256
  2905. (base32
  2906. "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
  2907. (build-system gnu-build-system)
  2908. (arguments
  2909. `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
  2910. (native-inputs
  2911. `(("pkg-config" ,pkg-config)))
  2912. (inputs
  2913. `(("glib" ,glib)))
  2914. (home-page
  2915. "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
  2916. (synopsis "Generic tool for biological sequence alignment")
  2917. (description
  2918. "Exonerate is a generic tool for pairwise sequence comparison. It allows
  2919. the alignment of sequences using a many alignment models, either exhaustive
  2920. dynamic programming or a variety of heuristics.")
  2921. (license license:gpl3)))
  2922. (define-public express
  2923. (package
  2924. (name "express")
  2925. (version "1.5.1")
  2926. (source (origin
  2927. (method url-fetch)
  2928. (uri
  2929. (string-append
  2930. "http://bio.math.berkeley.edu/eXpress/downloads/express-"
  2931. version "/express-" version "-src.tgz"))
  2932. (sha256
  2933. (base32
  2934. "03rczxd0gjp2l1jxcmjfmf5j94j77zqyxa6x063zsc585nj40n0c"))))
  2935. (build-system cmake-build-system)
  2936. (arguments
  2937. `(#:tests? #f ;no "check" target
  2938. #:phases
  2939. (modify-phases %standard-phases
  2940. (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
  2941. (lambda* (#:key inputs #:allow-other-keys)
  2942. (substitute* "CMakeLists.txt"
  2943. (("set\\(Boost_USE_STATIC_LIBS ON\\)")
  2944. "set(Boost_USE_STATIC_LIBS OFF)")
  2945. (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
  2946. (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
  2947. (substitute* "src/CMakeLists.txt"
  2948. (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
  2949. (string-append (assoc-ref inputs "bamtools") "/lib"))
  2950. (("libprotobuf.a") "libprotobuf.so"))
  2951. #t)))))
  2952. (inputs
  2953. `(("boost" ,boost)
  2954. ("bamtools" ,bamtools)
  2955. ("protobuf" ,protobuf)
  2956. ("zlib" ,zlib)))
  2957. (home-page "http://bio.math.berkeley.edu/eXpress")
  2958. (synopsis "Streaming quantification for high-throughput genomic sequencing")
  2959. (description
  2960. "eXpress is a streaming tool for quantifying the abundances of a set of
  2961. target sequences from sampled subsequences. Example applications include
  2962. transcript-level RNA-Seq quantification, allele-specific/haplotype expression
  2963. analysis (from RNA-Seq), transcription factor binding quantification in
  2964. ChIP-Seq, and analysis of metagenomic data.")
  2965. (license license:artistic2.0)))
  2966. (define-public express-beta-diversity
  2967. (package
  2968. (name "express-beta-diversity")
  2969. (version "1.0.8")
  2970. (source (origin
  2971. (method git-fetch)
  2972. (uri (git-reference
  2973. (url "https://github.com/dparks1134/ExpressBetaDiversity.git")
  2974. (commit (string-append "v" version))))
  2975. (file-name (git-file-name name version))
  2976. (sha256
  2977. (base32
  2978. "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
  2979. (build-system gnu-build-system)
  2980. (arguments
  2981. `(#:phases
  2982. (modify-phases %standard-phases
  2983. (delete 'configure)
  2984. (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
  2985. (replace 'check
  2986. (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
  2987. (replace 'install
  2988. (lambda* (#:key outputs #:allow-other-keys)
  2989. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  2990. (install-file "../scripts/convertToEBD.py" bin)
  2991. (install-file "../bin/ExpressBetaDiversity" bin)
  2992. #t))))))
  2993. (inputs
  2994. `(("python" ,python-2)))
  2995. (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
  2996. (synopsis "Taxon- and phylogenetic-based beta diversity measures")
  2997. (description
  2998. "Express Beta Diversity (EBD) calculates ecological beta diversity
  2999. (dissimilarity) measures between biological communities. EBD implements a
  3000. variety of diversity measures including those that make use of phylogenetic
  3001. similarity of community members.")
  3002. (license license:gpl3+)))
  3003. (define-public fasttree
  3004. (package
  3005. (name "fasttree")
  3006. (version "2.1.10")
  3007. (source (origin
  3008. (method url-fetch)
  3009. (uri (string-append
  3010. "http://www.microbesonline.org/fasttree/FastTree-"
  3011. version ".c"))
  3012. (sha256
  3013. (base32
  3014. "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
  3015. (build-system gnu-build-system)
  3016. (arguments
  3017. `(#:tests? #f ; no "check" target
  3018. #:phases
  3019. (modify-phases %standard-phases
  3020. (delete 'unpack)
  3021. (delete 'configure)
  3022. (replace 'build
  3023. (lambda* (#:key source #:allow-other-keys)
  3024. (invoke "gcc"
  3025. "-O3"
  3026. "-finline-functions"
  3027. "-funroll-loops"
  3028. "-Wall"
  3029. "-o"
  3030. "FastTree"
  3031. source
  3032. "-lm")
  3033. (invoke "gcc"
  3034. "-DOPENMP"
  3035. "-fopenmp"
  3036. "-O3"
  3037. "-finline-functions"
  3038. "-funroll-loops"
  3039. "-Wall"
  3040. "-o"
  3041. "FastTreeMP"
  3042. source
  3043. "-lm")
  3044. #t))
  3045. (replace 'install
  3046. (lambda* (#:key outputs #:allow-other-keys)
  3047. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  3048. (install-file "FastTree" bin)
  3049. (install-file "FastTreeMP" bin)
  3050. #t))))))
  3051. (home-page "http://www.microbesonline.org/fasttree")
  3052. (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
  3053. (description
  3054. "FastTree can handle alignments with up to a million of sequences in a
  3055. reasonable amount of time and memory. For large alignments, FastTree is
  3056. 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
  3057. (license license:gpl2+)))
  3058. (define-public fastx-toolkit
  3059. (package
  3060. (name "fastx-toolkit")
  3061. (version "0.0.14")
  3062. (source (origin
  3063. (method url-fetch)
  3064. (uri
  3065. (string-append
  3066. "https://github.com/agordon/fastx_toolkit/releases/download/"
  3067. version "/fastx_toolkit-" version ".tar.bz2"))
  3068. (sha256
  3069. (base32
  3070. "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
  3071. (build-system gnu-build-system)
  3072. (inputs
  3073. `(("libgtextutils" ,libgtextutils)))
  3074. (native-inputs
  3075. `(("gcc" ,gcc-6) ;; doesn't build with later versions
  3076. ("pkg-config" ,pkg-config)))
  3077. (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
  3078. (synopsis "Tools for FASTA/FASTQ file preprocessing")
  3079. (description
  3080. "The FASTX-Toolkit is a collection of command line tools for Short-Reads
  3081. FASTA/FASTQ files preprocessing.
  3082. Next-Generation sequencing machines usually produce FASTA or FASTQ files,
  3083. containing multiple short-reads sequences. The main processing of such
  3084. FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
  3085. is sometimes more productive to preprocess the files before mapping the
  3086. sequences to the genome---manipulating the sequences to produce better mapping
  3087. results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
  3088. (license license:agpl3+)))
  3089. (define-public flexbar
  3090. (package
  3091. (name "flexbar")
  3092. (version "3.4.0")
  3093. (source (origin
  3094. (method git-fetch)
  3095. (uri (git-reference
  3096. (url "https://github.com/seqan/flexbar.git")
  3097. (commit (string-append "v" version))))
  3098. (file-name (git-file-name name version))
  3099. (sha256
  3100. (base32
  3101. "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
  3102. (build-system cmake-build-system)
  3103. (arguments
  3104. `(#:phases
  3105. (modify-phases %standard-phases
  3106. (add-after 'unpack 'do-not-tune-to-CPU
  3107. (lambda _
  3108. (substitute* "src/CMakeLists.txt"
  3109. ((" -march=native") ""))
  3110. #t))
  3111. (replace 'check
  3112. (lambda* (#:key outputs #:allow-other-keys)
  3113. (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
  3114. (with-directory-excursion "../source/test"
  3115. (invoke "bash" "flexbar_test.sh"))
  3116. #t))
  3117. (replace 'install
  3118. (lambda* (#:key outputs #:allow-other-keys)
  3119. (let* ((out (string-append (assoc-ref outputs "out")))
  3120. (bin (string-append out "/bin/")))
  3121. (install-file "flexbar" bin))
  3122. #t)))))
  3123. (inputs
  3124. `(("tbb" ,tbb)
  3125. ("zlib" ,zlib)))
  3126. (native-inputs
  3127. `(("pkg-config" ,pkg-config)
  3128. ("seqan" ,seqan)))
  3129. (home-page "https://github.com/seqan/flexbar")
  3130. (synopsis "Barcode and adapter removal tool for sequencing platforms")
  3131. (description
  3132. "Flexbar preprocesses high-throughput nucleotide sequencing data
  3133. efficiently. It demultiplexes barcoded runs and removes adapter sequences.
  3134. Moreover, trimming and filtering features are provided. Flexbar increases
  3135. read mapping rates and improves genome and transcriptome assemblies. It
  3136. supports next-generation sequencing data in fasta/q and csfasta/q format from
  3137. Illumina, Roche 454, and the SOLiD platform.")
  3138. (license license:bsd-3)))
  3139. (define-public fraggenescan
  3140. (package
  3141. (name "fraggenescan")
  3142. (version "1.30")
  3143. (source
  3144. (origin
  3145. (method url-fetch)
  3146. (uri
  3147. (string-append "mirror://sourceforge/fraggenescan/"
  3148. "FragGeneScan" version ".tar.gz"))
  3149. (sha256
  3150. (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
  3151. (build-system gnu-build-system)
  3152. (arguments
  3153. `(#:phases
  3154. (modify-phases %standard-phases
  3155. (delete 'configure)
  3156. (add-before 'build 'patch-paths
  3157. (lambda* (#:key outputs #:allow-other-keys)
  3158. (let* ((out (string-append (assoc-ref outputs "out")))
  3159. (share (string-append out "/share/fraggenescan/")))
  3160. (substitute* "run_FragGeneScan.pl"
  3161. (("system\\(\"rm")
  3162. (string-append "system(\"" (which "rm")))
  3163. (("system\\(\"mv")
  3164. (string-append "system(\"" (which "mv")))
  3165. (("\\\"awk") (string-append "\"" (which "awk")))
  3166. ;; This script and other programs expect the training files
  3167. ;; to be in the non-standard location bin/train/XXX. Change
  3168. ;; this to be share/fraggenescan/train/XXX instead.
  3169. (("^\\$train.file = \\$dir.*")
  3170. (string-append "$train_file = \""
  3171. share
  3172. "train/\".$FGS_train_file;")))
  3173. (substitute* "run_hmm.c"
  3174. (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
  3175. (string-append " strcpy(train_dir, \"" share "/train/\");"))))
  3176. #t))
  3177. (replace 'build
  3178. (lambda _
  3179. (invoke "make" "clean")
  3180. (invoke "make" "fgs")
  3181. #t))
  3182. (replace 'install
  3183. (lambda* (#:key outputs #:allow-other-keys)
  3184. (let* ((out (string-append (assoc-ref outputs "out")))
  3185. (bin (string-append out "/bin/"))
  3186. (share (string-append out "/share/fraggenescan/train")))
  3187. (install-file "run_FragGeneScan.pl" bin)
  3188. (install-file "FragGeneScan" bin)
  3189. (copy-recursively "train" share))
  3190. #t))
  3191. (delete 'check)
  3192. (add-after 'install 'post-install-check
  3193. ;; In lieu of 'make check', run one of the examples and check the
  3194. ;; output files gets created.
  3195. (lambda* (#:key outputs #:allow-other-keys)
  3196. (let* ((out (string-append (assoc-ref outputs "out")))
  3197. (bin (string-append out "/bin/"))
  3198. (frag (string-append bin "run_FragGeneScan.pl")))
  3199. ;; Test complete genome.
  3200. (invoke frag
  3201. "-genome=./example/NC_000913.fna"
  3202. "-out=./test2"
  3203. "-complete=1"
  3204. "-train=complete")
  3205. (unless (and (file-exists? "test2.faa")
  3206. (file-exists? "test2.ffn")
  3207. (file-exists? "test2.gff")
  3208. (file-exists? "test2.out"))
  3209. (error "Expected files do not exist."))
  3210. ;; Test incomplete sequences.
  3211. (invoke frag
  3212. "-genome=./example/NC_000913-fgs.ffn"
  3213. "-out=out"
  3214. "-complete=0"
  3215. "-train=454_30")
  3216. #t))))))
  3217. (inputs
  3218. `(("perl" ,perl)