You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1290 lines
55 KiB

  1. ;; Bioinformatics module
  2. (define-module (gn packages bioinformatics)
  3. #:use-module ((guix licenses) #:prefix license:)
  4. #:use-module (guix packages)
  5. #:use-module (guix utils)
  6. #:use-module (guix download)
  7. #:use-module (guix git-download)
  8. #:use-module (guix build-system gnu)
  9. #:use-module (guix build-system cmake)
  10. #:use-module (guix build-system perl)
  11. #:use-module (guix build-system python)
  12. ;; #:use-module (guix build-system ruby)
  13. #:use-module (guix build-system r)
  14. #:use-module (guix build-system trivial)
  15. #:use-module (gn packages statistics)
  16. #:use-module (gnu packages)
  17. #:use-module (gnu packages autotools)
  18. #:use-module (gnu packages algebra)
  19. #:use-module (gnu packages base)
  20. #:use-module (gnu packages bioinformatics)
  21. #:use-module (gnu packages boost)
  22. #:use-module (gnu packages compression)
  23. #:use-module (gnu packages databases)
  24. #:use-module (gnu packages check)
  25. #:use-module (gnu packages cmake)
  26. #:use-module (gnu packages compression)
  27. #:use-module (gnu packages cpio)
  28. #:use-module (gnu packages curl)
  29. #:use-module (gnu packages doxygen)
  30. #:use-module (gnu packages datastructures)
  31. #:use-module (gnu packages check)
  32. #:use-module (gnu packages file)
  33. #:use-module (gnu packages gawk)
  34. #:use-module (gnu packages gcc)
  35. #:use-module (gnu packages graphviz)
  36. #:use-module (gnu packages java)
  37. #:use-module (gnu packages linux)
  38. #:use-module (gnu packages ldc)
  39. #:use-module (gnu packages machine-learning)
  40. #:use-module (gnu packages maths)
  41. #:use-module (gnu packages mpi)
  42. #:use-module (gnu packages ncurses)
  43. #:use-module (gnu packages node)
  44. #:use-module (gnu packages parallel)
  45. #:use-module (gnu packages pcre)
  46. #:use-module (gnu packages perl)
  47. #:use-module (gnu packages pkg-config)
  48. #:use-module (gnu packages popt)
  49. #:use-module (gnu packages protobuf)
  50. #:use-module (gnu packages python)
  51. #:use-module (gnu packages ruby)
  52. #:use-module (gnu packages statistics)
  53. #:use-module (gnu packages tbb)
  54. #:use-module (gnu packages textutils)
  55. #:use-module (gnu packages time)
  56. #:use-module (gnu packages tls)
  57. #:use-module (gnu packages vim)
  58. #:use-module (gnu packages web)
  59. #:use-module (gnu packages xml)
  60. #:use-module (gnu packages zip)
  61. #:use-module (gnu packages bootstrap)
  62. #:use-module (srfi srfi-1))
  63. (define-public contra
  64. (package
  65. (name "contra")
  66. (version "2.0.6")
  67. (source (origin
  68. (method url-fetch)
  69. (uri (string-append
  70. "mirror://sourceforge/contra-cnv/CONTRA.v" version ".tar.gz"))
  71. (sha256
  72. (base32
  73. "0agpcm2xh5f0i9n9sx1kvln6mzdksddmh11bvzj6bh76yw5pnw91"))))
  74. (build-system gnu-build-system)
  75. (propagated-inputs
  76. `(("python" ,python-2)
  77. ("r" ,r)
  78. ("r-dnacopy" ,r-dnacopy)
  79. ("bedtools" ,bedtools)
  80. ("samtools" ,samtools)))
  81. (arguments
  82. `(#:tests? #f ; There are no tests.
  83. #:phases
  84. (modify-phases %standard-phases
  85. (delete 'configure)
  86. (delete 'build) ; We can use Guix's BEDtools instead.
  87. (replace 'install
  88. (lambda _
  89. (let* ((out (assoc-ref %outputs "out"))
  90. (bin (string-append out "/bin"))
  91. (doc (string-append out "/share/doc/contra")))
  92. (mkdir-p bin)
  93. (mkdir-p doc)
  94. (and
  95. (zero? (system* "cp" "--recursive" "scripts" bin))
  96. (zero? (system* "cp" "contra.py" bin))
  97. (zero? (system* "cp" "baseline.py" bin))
  98. ;; There's only a pre-built PDF available.
  99. (zero? (system* "cp" "CONTRA_User_Guide.2.0.pdf" doc)))))))))
  100. (home-page "http://contra-cnv.sourceforge.net/")
  101. (synopsis "Tool for copy number variation (CNV) detection for targeted
  102. resequencing data")
  103. (description "CONTRA is a tool for copy number variation (CNV) detection
  104. for targeted resequencing data such as those from whole-exome capture data.
  105. CONTRA calls copy number gains and losses for each target region with key
  106. strategies including the use of base-level log-ratios to remove GC-content
  107. bias, correction for an imbalanced library size effect on log-ratios, and the
  108. estimation of log-ratio variations via binning and interpolation. It takes
  109. standard alignment formats (BAM/SAM) and outputs in variant call format
  110. (VCF 4.0) for easy integration with other next generation sequencing analysis
  111. package.")
  112. (license license:gpl3+)))
  113. (define boost-delly
  114. (package (inherit boost)
  115. (name "boost-delly")
  116. (version "1.57.0")
  117. (source (origin
  118. (method url-fetch)
  119. (uri (string-append
  120. "mirror://sourceforge/boost/boost_"
  121. (string-map (lambda (x) (if (eq? x #\.) #\_ x)) version)
  122. ".tar.bz2"))
  123. (sha256
  124. (base32
  125. "0rs94vdmg34bwwj23fllva6mhrml2i7mvmlb11zyrk1k5818q34i"))))))
  126. (define-public delly
  127. (package
  128. (name "delly")
  129. (version "0.7.2")
  130. (source (origin
  131. (method url-fetch)
  132. (uri (string-append "https://github.com/tobiasrausch/delly/archive/v"
  133. version ".tar.gz"))
  134. (sha256
  135. (base32 "173mmg43dbxqkyq0kiffz63xbmggr2kzd55mwxci9yfh5md1zprn"))
  136. (patches (list (search-patch "delly-use-system-libraries.patch")))))
  137. (build-system gnu-build-system)
  138. (native-inputs
  139. `(("python" ,python-2)))
  140. (inputs
  141. `(("boost" ,boost-delly) ; Use version 1.57.0 instead.
  142. ("htslib" ,htslib)
  143. ("zlib" ,zlib)
  144. ("bzip2" ,bzip2)))
  145. (arguments
  146. `(#:tests? #f ; There are no tests to run.
  147. #:phases
  148. (modify-phases %standard-phases
  149. (delete 'configure) ; There is no configure phase.
  150. (replace 'install
  151. (lambda _
  152. (let ((bin (string-append (assoc-ref %outputs "out") "/bin")))
  153. (install-file "src/cov" bin)
  154. (install-file "src/delly" bin)
  155. (install-file "src/extract" bin)
  156. (install-file "src/iover" bin)
  157. (install-file "src/stats" bin)))))))
  158. (home-page "https://github.com/tobiasrausch/delly")
  159. (synopsis "Integrated structural variant prediction method")
  160. (description "Delly is an integrated structural variant prediction method
  161. that can discover and genotype deletions, tandem duplications, inversions and
  162. translocations at single-nucleotide resolution in short-read massively parallel
  163. sequencing data. It uses paired-ends and split-reads to sensitively and
  164. accurately delineate genomic rearrangements throughout the genome. Structural
  165. variants can be visualized using Delly-maze and Delly-suave.")
  166. (license license:gpl3)))
  167. (define-public freec
  168. (package
  169. (name "control-freec")
  170. (version "8.7")
  171. (source (origin
  172. (method url-fetch)
  173. (uri "http://bioinfo-out.curie.fr/projects/freec/src/FREEC_Linux64.tar.gz")
  174. (file-name (string-append name "-" version ".tar.gz"))
  175. (sha256
  176. (base32 "12sl7gxbklhvv0687qjhml1z4lwpcn159zcyxvawvclsrzqjmv0h"))))
  177. (build-system gnu-build-system)
  178. ;; The source code's filename indicates only a 64-bit Linux build.
  179. ;; We need to investigate whether this is true.
  180. (supported-systems '("x86_64-linux"))
  181. (arguments
  182. `(#:phases
  183. (modify-phases %standard-phases
  184. ;; There's no configure phase because there are no external
  185. ;; dependencies.
  186. (delete 'configure)
  187. ;; There are no tests.
  188. (delete 'check)
  189. (replace
  190. 'unpack
  191. (lambda* (#:key source #:allow-other-keys)
  192. (and
  193. (zero? (system* "mkdir" "source"))
  194. (with-directory-excursion "source"
  195. (zero? (system* "tar" "xvf" source))))))
  196. (replace
  197. 'build
  198. (lambda* (#:key inputs #:allow-other-keys)
  199. (with-directory-excursion "source"
  200. (zero? (system* "make")))))
  201. (replace
  202. 'install
  203. (lambda* (#:key outputs #:allow-other-keys)
  204. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  205. (install-file "source/freec" bin)))))))
  206. (home-page "http://bioinfo-out.curie.fr/projects/freec/")
  207. (synopsis "Tool for detection of copy-number changes and allelic imbalances
  208. (including LOH) using deep-sequencing data")
  209. (description "Control-FREEC automatically computes, normalizes, segments
  210. copy number and beta allele frequency (BAF) profiles, then calls copy number
  211. alterations and LOH. The control (matched normal) sample is optional for whole
  212. genome sequencing data but mandatory for whole exome or targeted sequencing
  213. data. For whole genome sequencing data analysis, the program can also use
  214. mappability data (files created by GEM). ")
  215. (license license:gpl2+)))
  216. (define-public tabixpp
  217. (package
  218. (name "tabixpp")
  219. (version "1.0.0")
  220. (source (origin
  221. (method url-fetch)
  222. (uri (string-append "https://github.com/ekg/tabixpp/archive/v"
  223. version ".tar.gz"))
  224. (file-name (string-append name "-" version ".tar.gz"))
  225. (sha256
  226. (base32 "1s0lgks7qlvlhvcjhi2wm18nnza1bwcnic44ij7z8wfg88h4ivwn"))))
  227. (build-system gnu-build-system)
  228. (inputs
  229. `(("htslib" ,htslib)
  230. ("zlib" ,zlib)))
  231. (arguments
  232. `(#:tests? #f ; There are no tests to run.
  233. #:phases
  234. (modify-phases %standard-phases
  235. (delete 'configure) ; There is no configure phase.
  236. ;; The build phase needs overriding the location of htslib.
  237. (replace 'build
  238. (lambda* (#:key inputs #:allow-other-keys)
  239. (let ((htslib-ref (assoc-ref inputs "htslib")))
  240. (zero?
  241. (system* "make"
  242. (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
  243. "HTS_HEADERS=" ; No need to check for headers here.
  244. (string-append "LIBPATH=-L. -L" htslib-ref "/include"))))))
  245. (replace 'install
  246. (lambda* (#:key outputs #:allow-other-keys)
  247. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  248. (install-file "tabix++" bin)))))))
  249. (home-page "https://github.com/ekg/tabixpp")
  250. (synopsis "C++ wrapper around tabix project")
  251. (description "This is a C++ wrapper around the Tabix project which abstracts
  252. some of the details of opening and jumping in tabix-indexed files.")
  253. (license license:expat)))
  254. ;; This version works with FreeBayes while the released version doesn't. The
  255. ;; released creates a variable with the name "vcf" somewhere, which is also the
  256. ;; name of a namespace in vcflib.
  257. (define-public tabixpp-freebayes
  258. (let ((commit "bbc63a49acc52212199f92e9e3b8fba0a593e3f7"))
  259. (package (inherit tabixpp)
  260. (name "tabixpp-freebayes")
  261. (version (string-append "0-1." (string-take commit 7)))
  262. (source (origin
  263. (method url-fetch)
  264. (uri (string-append "https://github.com/ekg/tabixpp/archive/"
  265. commit ".tar.gz"))
  266. (file-name (string-append name "-" version "-checkout.tar.gz"))
  267. (sha256
  268. (base32 "1s06wmpgj4my4pik5kp2lc42hzzazbp5ism2y4i2ajp2y1c68g77")))))))
  269. (define-public smithwaterman
  270. ;; TODO: Upgrading smithwaterman breaks FreeBayes.
  271. (let ((commit "203218b47d45ac56ef234716f1bd4c741b289be1"))
  272. (package
  273. (name "smithwaterman")
  274. (version (string-append "0-1." (string-take commit 7)))
  275. (source (origin
  276. (method url-fetch)
  277. (uri (string-append "https://github.com/ekg/smithwaterman/archive/"
  278. commit ".tar.gz"))
  279. (file-name (string-append name "-" version "-checkout.tar.gz"))
  280. (sha256
  281. (base32 "1lkxy4xkjn96l70jdbsrlm687jhisgw4il0xr2dm33qwcclzzm3b"))))
  282. (build-system gnu-build-system)
  283. (arguments
  284. `(#:tests? #f ; There are no tests to run.
  285. #:phases
  286. (modify-phases %standard-phases
  287. (delete 'configure) ; There is no configure phase.
  288. (replace 'install
  289. (lambda* (#:key outputs #:allow-other-keys)
  290. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  291. (install-file "smithwaterman" bin)))))))
  292. (home-page "https://github.com/ekg/smithwaterman")
  293. (synopsis "Implementation of the Smith-Waterman algorithm")
  294. (description "Implementation of the Smith-Waterman algorithm.")
  295. ;; The project contains a license file for the GPLv2. The source files
  296. ;; do not contain a license notice, so GPLv2-only is assumed here.
  297. (license license:gpl2))))
  298. (define-public multichoose
  299. (package
  300. (name "multichoose")
  301. (version "1.0.3")
  302. (source (origin
  303. (method url-fetch)
  304. (uri (string-append "https://github.com/ekg/multichoose/archive/v"
  305. version ".tar.gz"))
  306. (file-name (string-append name "-" version ".tar.gz"))
  307. (sha256
  308. (base32 "0xy86vvr3qrs4l81qis7ia1q2hnqv0xcb4a1n60smxbhqqis5w3l"))))
  309. (build-system gnu-build-system)
  310. (native-inputs
  311. `(("python" ,python-2)
  312. ("node" ,node)))
  313. (arguments
  314. `(#:tests? #f ; There are no tests to run.
  315. #:phases
  316. (modify-phases %standard-phases
  317. (delete 'configure) ; There is no configure phase.
  318. (replace 'install
  319. (lambda* (#:key outputs #:allow-other-keys)
  320. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  321. ;; TODO: There are Python modules for these programs too.
  322. (install-file "multichoose" bin)
  323. (install-file "multipermute" bin)))))))
  324. (home-page "https://github.com/ekg/multichoose")
  325. (synopsis "Library for efficient loopless multiset combination generation
  326. algorithm")
  327. (description "A library implements an efficient loopless multiset
  328. combination generation algorithm which is (approximately) described in
  329. \"Loopless algorithms for generating permutations, combinations, and other
  330. combinatorial configurations.\" G Ehrlich - Journal of the ACM (JACM),
  331. 1973. (Algorithm 7.)")
  332. (license license:expat)))
  333. (define-public fsom
  334. (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
  335. (package
  336. (name "fsom")
  337. (version (string-append "0-1." (string-take commit 7)))
  338. (source (origin
  339. (method url-fetch)
  340. (uri (string-append "https://github.com/ekg/fsom/archive/"
  341. "a6ef318fbd347c53189384aef7f670c0e6ce89a3" ".tar.gz"))
  342. (file-name (string-append name "-" version "-checkout.tar.gz"))
  343. (sha256
  344. (base32 "0q6b57ppxfvsm5cqmmbfmjpn5qvx2zi5pamvp3yh8gpmmz8cfbl3"))))
  345. (build-system gnu-build-system)
  346. (arguments
  347. `(#:tests? #f ; There are no tests to run.
  348. #:phases
  349. (modify-phases %standard-phases
  350. (delete 'configure) ; There is no configure phase.
  351. (replace 'install
  352. (lambda* (#:key outputs #:allow-other-keys)
  353. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  354. (install-file "fsom" bin)))))))
  355. (home-page "https://github.com/ekg/fsom")
  356. (synopsis "Program for managing SOM (Self-Organizing Maps) neural networks")
  357. (description "Program for managing SOM (Self-Organizing Maps) neural networks.")
  358. (license license:gpl3))))
  359. (define-public filevercmp
  360. (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
  361. (package
  362. (name "filevercmp")
  363. (version (string-append "0-1." (string-take commit 7)))
  364. (source (origin
  365. (method url-fetch)
  366. (uri (string-append "https://github.com/ekg/filevercmp/archive/"
  367. commit ".tar.gz"))
  368. (file-name "filevercmp-src.tar.gz")
  369. (sha256
  370. (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
  371. (build-system gnu-build-system)
  372. (arguments
  373. `(#:tests? #f ; There are no tests to run.
  374. #:phases
  375. (modify-phases %standard-phases
  376. (delete 'configure) ; There is no configure phase.
  377. (replace 'install
  378. (lambda* (#:key outputs #:allow-other-keys)
  379. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  380. (install-file "filevercmp" bin)))))))
  381. (home-page "https://github.com/ekg/filevercmp")
  382. (synopsis "Program to compare version strings")
  383. (description "A program to compare version strings. It intends to be a
  384. replacement for strverscmp.")
  385. (license license:gpl3+))))
  386. (define-public fastahack
  387. (let ((commit "c68cebb4f2e5d5d2b70cf08fbdf1944e9ab2c2dd"))
  388. (package
  389. (name "fastahack")
  390. (version (string-append "0-1." (string-take commit 7)))
  391. (source (origin
  392. (method url-fetch)
  393. (uri (string-append "https://github.com/ekg/fastahack/archive/"
  394. commit ".tar.gz"))
  395. (file-name (string-append name "-" version "-checkout.tar.gz"))
  396. (sha256
  397. (base32 "0j25lcl3jk1kls66zzxjfyq5ir6sfcvqrdwfcva61y3ajc9ssay2"))))
  398. (build-system gnu-build-system)
  399. (arguments
  400. `(#:tests? #f ; There are no tests to run.
  401. #:phases
  402. (modify-phases %standard-phases
  403. (delete 'configure) ; There is no configure phase.
  404. (replace 'install
  405. (lambda* (#:key outputs #:allow-other-keys)
  406. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  407. (install-file "fastahack" bin)))))))
  408. (home-page "https://github.com/ekg/fastahack")
  409. (synopsis "Program for indexing and sequence extraction from FASTA files")
  410. (description "Fastahack is a small application for indexing and extracting
  411. sequences and subsequences from FASTA files. The included Fasta.cpp library
  412. provides a FASTA reader and indexer that can be embeddedinto applications which
  413. would benefit from directly reading subsequences from FASTA files. The library
  414. automatically handles index file generation and use.")
  415. ;; There is no specific license for fastahack.
  416. ;; A part of the program is licensed GPLv2.
  417. (license (list license:non-copyleft license:gpl2)))))
  418. (define-public vcflib
  419. (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb"))
  420. (package
  421. (name "vcflib")
  422. (version (string-append "1.0.2-1." (string-take commit 7)))
  423. (source
  424. (origin
  425. (method url-fetch)
  426. (uri (string-append "https://github.com/vcflib/vcflib/archive/"
  427. "5ac091365fdc716cc47cc5410bb97ee5dc2a2c92" ".tar.gz"))
  428. (file-name "vcflib-5ac0913.tar.gz")
  429. (sha256
  430. (base32 "0ywshwpif059z5h0g7zzrdfzzdj2gr8xvwlwcsdxrms3p9iy35h8"))))
  431. (build-system gnu-build-system)
  432. (native-inputs
  433. `(("htslib" ,htslib)
  434. ("zlib" ,zlib)
  435. ("python" ,python-2)
  436. ("perl" ,perl)
  437. ("r" ,r)
  438. ("node" ,node)
  439. ("tabixpp-src" ,(package-source tabixpp-freebayes))
  440. ("smithwaterman-src" ,(package-source smithwaterman))
  441. ("multichoose-src" ,(package-source multichoose))
  442. ("fsom-src" ,(package-source fsom))
  443. ("filevercmp-src" ,(package-source filevercmp))
  444. ("fastahack-src" ,(package-source fastahack))
  445. ("intervaltree-src"
  446. ,(origin
  447. (method url-fetch)
  448. (uri (string-append
  449. "https://github.com/ekg/intervaltree/archive/"
  450. "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz"))
  451. (file-name "intervaltree-src.tar.gz")
  452. (sha256
  453. (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks"))))))
  454. (arguments
  455. `(#:tests? #f
  456. #:phases
  457. (modify-phases %standard-phases
  458. (delete 'configure)
  459. (delete 'check)
  460. (add-after 'unpack 'unpack-submodule-sources
  461. (lambda* (#:key inputs #:allow-other-keys)
  462. (let ((unpack (lambda (source target)
  463. (with-directory-excursion target
  464. (zero? (system* "tar" "xvf"
  465. (assoc-ref inputs source)
  466. "--strip-components=1"))))))
  467. (and
  468. (unpack "intervaltree-src" "intervaltree")
  469. (unpack "fastahack-src" "fastahack")
  470. (unpack "filevercmp-src" "filevercmp")
  471. (unpack "fsom-src" "fsom")
  472. (unpack "multichoose-src" "multichoose")
  473. (unpack "smithwaterman-src" "smithwaterman")
  474. (unpack "tabixpp-src" "tabixpp")))))
  475. (add-after 'unpack-submodule-sources 'fix-makefile
  476. (lambda* (#:key inputs #:allow-other-keys)
  477. (substitute* '("Makefile")
  478. (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0"))))
  479. (replace
  480. 'build
  481. (lambda* (#:key inputs make-flags #:allow-other-keys)
  482. (with-directory-excursion "tabixpp"
  483. (zero? (system* "make")))
  484. (zero? (system* "make" "CC=gcc"
  485. (string-append "CFLAGS=\"" "-Itabixpp "
  486. "-I" (assoc-ref inputs "htslib") "/include " "\"") "all"))))
  487. (replace
  488. 'install
  489. (lambda* (#:key outputs #:allow-other-keys)
  490. (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
  491. ;;(include (string-append (assoc-ref outputs "out") "/include"))
  492. (lib (string-append (assoc-ref outputs "out") "/lib")))
  493. (for-each (lambda (file)
  494. (install-file file bin))
  495. (find-files "bin" ".*"))
  496. ;; The header files do not correspond to libvcflib.a, therefore
  497. ;; I left them out.
  498. ;;(for-each (lambda (file)
  499. ;; (install-file file include))
  500. ;; (find-files "src" "\\.h$"))
  501. (install-file "libvcflib.a" lib)))))))
  502. (home-page "https://github.com/vcflib/vcflib/")
  503. (synopsis "Library for parsing and manipulating VCF files")
  504. (description "Vcflib provides methods to manipulate and interpret
  505. sequence variation as it can be described by VCF. It is both an API for parsing
  506. and operating on records of genomic variation as it can be described by the VCF
  507. format, and a collection of command-line utilities for executing complex
  508. manipulations on VCF files.")
  509. (license license:expat))))
  510. (define-public bash-tap
  511. (package
  512. (name "bash-tap")
  513. (version "1.0.2")
  514. (source (origin
  515. (method url-fetch)
  516. (uri (string-append "https://github.com/illusori/bash-tap/archive/"
  517. version ".tar.gz"))
  518. (file-name (string-append name "-" version ".tar.gz"))
  519. (sha256
  520. (base32 "0qs1qi38bl3ns4mpagcawv618dsk2q1lgrbddgvs0wl3ia12cyz5"))))
  521. (build-system trivial-build-system)
  522. (native-inputs `(("source" ,source)
  523. ("tar" ,tar)
  524. ("gzip" ,gzip)))
  525. (arguments
  526. `(#:modules ((guix build utils))
  527. #:builder (begin
  528. (use-modules (guix build utils))
  529. (let ((tar (string-append (assoc-ref %build-inputs "tar") "/bin/tar"))
  530. (path (string-append (assoc-ref %build-inputs "gzip") "/bin"))
  531. (bin (string-append %output "/bin"))
  532. (source (string-append (assoc-ref %build-inputs "source"))))
  533. (setenv "PATH" path)
  534. (mkdir-p bin)
  535. (with-directory-excursion bin
  536. (zero? (system* tar "xvf" source
  537. "--strip-components=1"
  538. "--no-anchored"
  539. "bash-tap"
  540. "bash-tap-bootstrap"
  541. "bash-tap-mock")))))))
  542. (home-page "http://www.illusori.co.uk/projects/bash-tap/")
  543. (synopsis "Bash port of a Test::More/Test::Builder-style TAP-compliant
  544. test library")
  545. (description "Bash TAP is a TAP-compliant Test::More-style testing library
  546. for Bash shell scripts and functions. Along with the Test::More-style testing
  547. helpers it provides helper functions for mocking commands and functions and
  548. in-process output capturing.")
  549. ;; The author didn't specify a license.
  550. (license license:public-domain)))
  551. (define-public freebayes
  552. (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")
  553. (revision "1"))
  554. (package
  555. (name "freebayes")
  556. (version (string-append "1.0.2-" revision "." (string-take commit 7)))
  557. (source (origin
  558. (method git-fetch)
  559. (uri (git-reference
  560. (url "https://github.com/ekg/freebayes.git")
  561. (commit commit)))
  562. (file-name (string-append name "-" version "-checkout"))
  563. (sha256
  564. (base32 "1sbzwmcbn78ybymjnhwk7qc5r912azy5vqz2y7y81616yc3ba2a2"))))
  565. (build-system gnu-build-system)
  566. (inputs
  567. `(("zlib" ,zlib)
  568. ("htslib" ,htslib)))
  569. (native-inputs
  570. `(("bc" ,bc) ; Needed for running tests.
  571. ("samtools" ,samtools) ; Needed for running tests.
  572. ("parallel" ,parallel) ; Needed for running tests.
  573. ("procps" ,procps) ; Needed for running tests.
  574. ("bamtools" ,bamtools)
  575. ("cmake" ,cmake)
  576. ("python" ,python-2)
  577. ("node" ,node)
  578. ("r" ,r)
  579. ("perl" ,perl)
  580. ("bamtools-src" ,(package-source bamtools))
  581. ("vcflib-src" ,(package-source vcflib))
  582. ;; These are submodules for the vcflib version used in freebayes
  583. ("tabixpp-src" ,(package-source tabixpp-freebayes))
  584. ("smithwaterman-src" ,(package-source smithwaterman))
  585. ("multichoose-src" ,(package-source multichoose))
  586. ("fsom-src" ,(package-source fsom))
  587. ("filevercmp-src" ,(package-source filevercmp))
  588. ("fastahack-src" ,(package-source fastahack))
  589. ("intervaltree-src"
  590. ,(origin
  591. (method url-fetch)
  592. (uri (string-append
  593. "https://github.com/ekg/intervaltree/archive/"
  594. "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz"))
  595. (file-name "intervaltree-src.tar.gz")
  596. (sha256
  597. (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks"))))
  598. ;; These submodules are needed to run the tests.
  599. ("bash-tap-src" ,(package-source bash-tap))
  600. ;; ,(origin
  601. ;; (method url-fetch)
  602. ;; (uri (string-append "https://github.com/illusori/bash-tap/archive/"
  603. ;; "c38fbfa401600cc81ccda66bfc0da3ea56288d03" ".tar.gz"))
  604. ;; (file-name "bash-tap-src.tar.gz")
  605. ;; (sha256
  606. ;; (base32 "07ijb1p0aa65ajpg9nkghc183iha6lwiydkckay8pghapa01j6nz"))))
  607. ("test-simple-bash-src"
  608. ,(origin
  609. (method url-fetch)
  610. (uri (string-append "https://github.com/ingydotnet/test-simple-bash/archive/"
  611. "124673ff204b01c8e96b7fc9f9b32ee35d898acc" ".tar.gz"))
  612. (file-name "test-simple-bash-src.tar.gz")
  613. (sha256
  614. (base32 "016xf3wbgqbav9dncvfdx5k0f10z5xwq8jdszajzmcvnhz5wis14"))))))
  615. (arguments
  616. `(#:phases
  617. (modify-phases %standard-phases
  618. (delete 'configure)
  619. (add-after 'unpack 'unpack-submodule-sources
  620. (lambda* (#:key inputs #:allow-other-keys)
  621. (let ((unpack (lambda (source target)
  622. (with-directory-excursion target
  623. (zero? (system* "tar" "xvf"
  624. (assoc-ref inputs source)
  625. "--strip-components=1"))))))
  626. (and
  627. (unpack "bamtools-src" "bamtools")
  628. (unpack "vcflib-src" "vcflib")
  629. ;;(unpack "intervaltree-src" "intervaltree")
  630. (unpack "fastahack-src" "vcflib/fastahack")
  631. (unpack "filevercmp-src" "vcflib/filevercmp")
  632. (unpack "fsom-src" "vcflib/fsom")
  633. (unpack "intervaltree-src" "vcflib/intervaltree")
  634. (unpack "multichoose-src" "vcflib/multichoose")
  635. (unpack "smithwaterman-src" "vcflib/smithwaterman")
  636. (unpack "tabixpp-src" "vcflib/tabixpp")
  637. (unpack "test-simple-bash-src" "test/test-simple-bash")
  638. (unpack "bash-tap-src" "test/bash-tap")))))
  639. (add-after 'unpack-submodule-sources 'fix-makefile
  640. (lambda* (#:key inputs #:allow-other-keys)
  641. ;; We don't have the .git folder to get the version tag from.
  642. ;; For this checkout of the code, it's v1.0.0.
  643. (substitute* '("vcflib/Makefile")
  644. (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0"))))
  645. (replace 'build
  646. (lambda* (#:key inputs make-flags #:allow-other-keys)
  647. (and
  648. ;; Compile Bamtools before compiling the main project.
  649. (with-directory-excursion "bamtools"
  650. (system* "mkdir" "build")
  651. (with-directory-excursion "build"
  652. (and (zero? (system* "cmake" "../"))
  653. (zero? (system* "make")))))
  654. ;; Compile vcflib before we compiling the main project.
  655. (with-directory-excursion "vcflib"
  656. (with-directory-excursion "tabixpp"
  657. (let ((htslib-ref (assoc-ref inputs "htslib")))
  658. (zero?
  659. (system* "make" "HTS_HEADERS="
  660. (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
  661. (string-append "LIBPATH=-L. -L" htslib-ref "/include")))))
  662. (zero? (system* "make" "CC=gcc"
  663. (string-append "CFLAGS=\"" "-Itabixpp "
  664. "-I" (assoc-ref inputs "htslib") "/include " "\"") "all")))
  665. (with-directory-excursion "src"
  666. (zero? (system* "make"))))))
  667. (replace 'install
  668. (lambda* (#:key outputs #:allow-other-keys)
  669. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  670. (install-file "bin/freebayes" bin)
  671. (install-file "bin/bamleftalign" bin))))
  672. ;; There are three tests that fail. All because of the -P
  673. ;; (--perl-regexp) option in grep, which is not compiled into the
  674. ;; version of grep in Guix.
  675. (replace 'check
  676. (lambda* (#:key inputs #:allow-other-keys)
  677. (system* "make" "test"))))))
  678. (home-page "https://github.com/ekg/freebayes")
  679. (synopsis "Haplotype-based variant detector")
  680. (description "FreeBayes is a Bayesian genetic variant detector designed to
  681. find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
  682. indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
  683. complex events (composite insertion and substitution events) smaller than the
  684. length of a short-read sequencing alignment.")
  685. (license license:expat))))
  686. (define-public r-biocpreprocesscore
  687. (package
  688. (name "r-biocpreprocesscore")
  689. (version "1.32.0")
  690. (source (origin
  691. (method url-fetch)
  692. (uri (bioconductor-uri "preprocessCore" version))
  693. (sha256
  694. (base32
  695. "07isghjkqm91rg37l1fzpjrbq36b7w4pbsi95wwh6a8qq7r69z1n"))))
  696. (properties
  697. `((upstream-name . "BiocpreprocessCore")
  698. (r-repository . bioconductor)))
  699. (build-system r-build-system)
  700. (home-page "http://bioconductor.org/packages/preprocessCore")
  701. (synopsis "Preprocess functions for Bioconductor")
  702. (description
  703. "A library of core preprocessing routines.")
  704. (license license:lgpl2.0+)))
  705. (define-public r-wgcna
  706. (let ((commit "425bc170cc0873ddbd414675ac40f6d4d724c7cb"))
  707. (package
  708. (name "r-wgcna")
  709. (version (string-append "1.49-" commit))
  710. (source (origin
  711. (method git-fetch)
  712. (uri (git-reference
  713. ;; (url "https://github.com/genenetwork/WGCNA.git")
  714. (url "https://github.com/pjotrp/WGCNA.git")
  715. (commit commit)))
  716. (file-name (string-append name "-" commit))
  717. (sha256
  718. (base32
  719. "1zqnsb8s3065rq1y2y3l79zi8wmdwjkcjls96ypycrb7pmdil58j"))))
  720. (properties `((upstream-name . "WGCNA")))
  721. (build-system r-build-system)
  722. (propagated-inputs
  723. `( ;; ("r-annotationdbi" ,r-annotationdbi)
  724. ; ("r-biocparallel" ,r-biocparallel)
  725. ("r-doparallel" ,r-doparallel)
  726. ("r-dynamictreecut" ,r-dynamictreecut)
  727. ("r-fastcluster" ,r-fastcluster)
  728. ("r-foreach" ,r-foreach)
  729. ("r-go-db" ,r-go-db)
  730. ; ("r-grdevices" ,r-grdevices)
  731. ("r-hmisc" ,r-hmisc)
  732. ("r-impute" ,r-impute)
  733. ("r-matrixstats" ,r-matrixstats)
  734. ; ("r-parallel" ,r-parallel)
  735. ("r-biocpreprocesscore" ,r-biocpreprocesscore)
  736. ; ("r-splines" ,r-splines)
  737. ; ("r-stats" ,r-stats)
  738. ; ("r-survival" ,r-survival)
  739. ; ("r-utils" ,r-utils)
  740. ))
  741. (arguments
  742. `(
  743. #:tests? #f)) ; no 'setup.py test'
  744. (home-page
  745. "http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA/")
  746. (synopsis
  747. "Weighted gene correlation network analysis (wgcna)")
  748. (description
  749. "Functions necessary to perform Weighted Correlation Network
  750. Analysis on high-dimensional data. Includes functions for rudimentary
  751. data cleaning, construction of correlation networks, module
  752. identification, summarization, and relating of variables and modules
  753. to sample traits. Also includes a number of utility functions for
  754. data manipulation and visualization.")
  755. (license license:gpl2+))))
  756. (define-public qtlreaper
  757. (package
  758. (name "qtlreaper")
  759. (version "1.1.1")
  760. (source
  761. (origin
  762. (method url-fetch)
  763. (uri (string-append
  764. "mirror://sourceforge/qtlreaper/qtlreaper-" version ".tar.gz"
  765. ;; "http://downloads.sourceforge.net/project/qtlreaper/qtlreaper/1.1.1/qtlreaper-1.1.1.tar.gz?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fqtlreaper%2Ffiles%2Flatest%2Fdownload&ts=1358975786&use_mirror=iweb"))
  766. ))
  767. (file-name (string-append name "-" version ".tar.gz"))
  768. (sha256
  769. (base32
  770. "0rbf030940nbbbkggdq2dxiy3c0jv8l4y3vvyfxhqimgj0qv3l1x"))))
  771. (build-system python-build-system)
  772. ;; (native-inputs
  773. ;; `(("python-setuptools" ,python-setuptools)))
  774. (arguments
  775. `(#:python ,python-2
  776. #:tests? #f)) ; no 'setup.py test'
  777. (home-page "http://qtlreaper.sourceforge.net/")
  778. (synopsis "Tool for scanning expression data for QTLs")
  779. (description
  780. "It is essentially the batch-oriented version of WebQTL. It
  781. requires, as input, expression data from members of a set of
  782. recombinant inbred lines and genotype information for the same
  783. lines. It searches for an association between each expression trait
  784. and all genotypes and evaluates that association by a permutation
  785. test. For the permutation test, it performs only as many permutations
  786. as are necessary to define the empirical P-value to a reasonable
  787. precision. It also performs bootstrap resampling to estimate the
  788. confidence region for the location of a putative QTL.")
  789. (license license:gpl2+)))
  790. (define-public plink2
  791. (package
  792. (name "plink2")
  793. (version "1.90b3")
  794. (source
  795. (origin
  796. (method url-fetch)
  797. ;; https://github.com/chrchang/plink-ng/archive/v1.90b3.tar.gz
  798. (uri (string-append
  799. "https://github.com/chrchang/plink-ng/archive/v"
  800. version ".tar.gz"))
  801. (sha256
  802. (base32 "03fzib1al5qkr9vxv63wxmv6y2pfb1rmir0h8jpi72r87hczqjig"))
  803. (patches (list (search-patch "plink-ng-Makefile-zlib.patch")))))
  804. (build-system gnu-build-system)
  805. (arguments
  806. '(#:tests? #f ;no "check" target
  807. #:phases
  808. (modify-phases %standard-phases
  809. (delete 'configure)
  810. (replace 'build
  811. (lambda _
  812. (zero? (system* "make" "-f" "Makefile.std"))
  813. ))
  814. (replace 'install
  815. (lambda* (#:key outputs #:allow-other-keys)
  816. (let ((bin (string-append (assoc-ref outputs "out")
  817. "/bin/")))
  818. (install-file "plink2" bin)
  819. #t))))))
  820. (inputs
  821. `(("zlib" ,zlib)
  822. ("openblas" ,openblas)
  823. ("atlas" ,atlas)
  824. ("lapack" ,lapack)
  825. ("gfortran" ,gfortran)
  826. ))
  827. (native-inputs
  828. `(("unzip" ,unzip)))
  829. (home-page "https://www.cog-genomics.org/plink2")
  830. (synopsis "Whole genome association analysis toolset")
  831. (description
  832. "PLINK is a whole genome association analysis toolset, designed to
  833. perform a range of basic, large-scale analyses in a computationally efficient
  834. manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
  835. so there is no support for steps prior to this (e.g. study design and
  836. planning, generating genotype or CNV calls from raw data). Through
  837. integration with gPLINK and Haploview, there is some support for the
  838. subsequent visualization, annotation and storage of results.")
  839. ;; Code is released under GPLv2, except for fisher.h, which is under
  840. ;; LGPLv2.1+
  841. (license (list license:gpl2 license:lgpl2.1+))))
  842. (define-public plink-ng
  843. (let ((commit "516d730f9"))
  844. (package
  845. (name "plink-ng")
  846. (version (string-append "1.90b3-" commit ))
  847. (source (origin
  848. (method git-fetch)
  849. (uri (git-reference
  850. (url "https://github.com/chrchang/plink-ng.git")
  851. (commit commit)))
  852. (file-name (string-append name "-" commit))
  853. (sha256
  854. (base32
  855. "0cv824wkdml9h9imsc30s2x3l8g65j44cpjbr1ydkk49g5qmf580"))
  856. (patches (list (search-patch "plink-ng-Makefile-zlib-git.patch")))))
  857. (build-system gnu-build-system)
  858. (arguments
  859. '(#:tests? #f ;no "check" target
  860. #:phases
  861. (modify-phases %standard-phases
  862. (delete 'configure)
  863. (replace 'build
  864. (lambda _
  865. (zero? (system* "make" "-f" "Makefile.std"))
  866. ))
  867. (replace 'install
  868. (lambda* (#:key outputs #:allow-other-keys)
  869. (let ((bin (string-append (assoc-ref outputs "out")
  870. "/bin/")))
  871. (install-file "plink2" bin)
  872. #t))))))
  873. (inputs
  874. `(("zlib" ,zlib)
  875. ("openblas" ,openblas)
  876. ("atlas" ,atlas)
  877. ("lapack" ,lapack)
  878. ("gfortran" ,gfortran)
  879. ))
  880. (native-inputs
  881. `(("unzip" ,unzip)))
  882. (home-page "https://www.cog-genomics.org/plink2")
  883. (synopsis "Whole genome association analysis toolset")
  884. (description
  885. "PLINK is a whole genome association analysis toolset, designed to
  886. perform a range of basic, large-scale analyses in a computationally efficient
  887. manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
  888. so there is no support for steps prior to this (e.g. study design and
  889. planning, generating genotype or CNV calls from raw data). Through
  890. integration with gPLINK and Haploview, there is some support for the
  891. subsequent visualization, annotation and storage of results.")
  892. (license license:gpl3+))))
  893. (define-public gemma-git
  894. (let ((commit "2de4bfab3"))
  895. (package
  896. (name "gemma-git")
  897. (version (string-append "0.9.5-" commit ))
  898. (source (origin
  899. (method git-fetch)
  900. (uri (git-reference
  901. (url "https://github.com/genenetwork/GEMMA.git")
  902. (commit commit)))
  903. (file-name (string-append name "-" commit))
  904. (sha256
  905. (base32
  906. "1drffdgwbzgiw9sf55ghl3zjv58f8i9kfz0zys5mp6n06syp4ira"))))
  907. (inputs `(
  908. ("gsl" ,gsl)
  909. ("lapack" ,lapack)
  910. ("zlib" ,zlib)
  911. ))
  912. (build-system gnu-build-system)
  913. (arguments
  914. `(#:make-flags '(" FORCE_DYNAMIC=1")
  915. #:phases
  916. (modify-phases %standard-phases
  917. (delete 'configure)
  918. (add-before 'build 'bin-mkdir
  919. (lambda _
  920. (mkdir-p "bin")
  921. ))
  922. (replace 'install
  923. (lambda* (#:key outputs #:allow-other-keys)
  924. (let ((out (assoc-ref outputs "out")))
  925. (install-file "bin/gemma" (string-append out "/bin"))))))
  926. #:tests? #f))
  927. (home-page "")
  928. (synopsis "Tool for genome-wide efficient mixed model association")
  929. (description "GEMMA is the software implementing the Genome-wide
  930. Efficient Mixed Model Association algorithm for a standard linear
  931. mixed model and some of its close relatives for genome-wide
  932. association studies (GWAS).")
  933. (license license:gpl3))))
  934. (define-public sambamba
  935. (let ((commit "c810c7ef14957f16288c205fd7b9d25c4ae7005d"))
  936. ;;(let ((commit "2ca5a2dbac5ab90c3b4c588519edc3edcb71df84"))
  937. (package
  938. (name "sambamba")
  939. (version (string-append "0.5.9-1." (string-take commit 7)))
  940. (source (origin
  941. (method git-fetch)
  942. (uri (git-reference
  943. (url "https://github.com/roelj/sambamba.git")
  944. ;;(url "https://github.com/pjotrp/sambamba.git")
  945. (commit commit)))
  946. (file-name (string-append name "-" version "-checkout"))
  947. (sha256
  948. (base32
  949. "0c4c13f021sl7mf5xc2v8dbwsz775n8dlsrrn7qa6qgbx05n54dv"))))
  950. ;;"1f14wn9aaxwjkmla6pzq3s28741carbr2v0fd2v2mm1dcpwnrqz5"))))
  951. (build-system gnu-build-system)
  952. (native-inputs
  953. `(("ldc" ,ldc)
  954. ;;("lz4" ,lz4)
  955. ("rdmd" ,rdmd)
  956. ("zlib" ,zlib)
  957. ("perl" ,perl) ; Needed for htslib
  958. ("ruby" ,ruby) ; Needed for htslib
  959. ("python" ,python) ; Needed for htslib
  960. ("gcc" ,gcc)
  961. ("lz4-src"
  962. ,(origin
  963. (method url-fetch)
  964. (uri "https://github.com/Cyan4973/lz4/archive/160661c7a4cbf805f4af74d2e3932a17a66e6ce7.tar.gz")
  965. (sha256
  966. (base32 "131nnbsd5dh7c8sdqzc9kawh3mi0qi4qxznv7zhzfszlx4g2fd20"))))
  967. ("htslib-src"
  968. ,(origin
  969. (method url-fetch)
  970. (uri "https://github.com/lomereiter/htslib/archive/2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5.tar.gz")
  971. ;;(uri "https://github.com/samtools/htslib/archive/1.3.tar.gz")
  972. ;;(file-name "htslib-1.3.tar.gz")
  973. (sha256
  974. (base32 "0bl6w856afnbgdsw8bybsxpqsyf2ba3f12rqh47hhpxvv866g08w"))))
  975. ;;(base32 "1bqkif7yrqmiqak5yb74kgpb2lsdlg7y344qa1xkdg7k1l4m86i9"))
  976. ;;(patches (list (search-patch "htslib-add-cram_to_bam.patch")))))
  977. ("biod-src"
  978. ,(origin
  979. (method git-fetch)
  980. (uri (git-reference
  981. (url "https://github.com/biod/BioD.git")
  982. (commit "7efdb8a2f7fdcd71c9ad9596be48d1262bb1bd5b")))
  983. (file-name "biod-src")
  984. (sha256
  985. (base32 "09icc2bjsg9y4hxjim4ql275izadf0kh1nnmapg8manyz6bc8svf"))))))
  986. (arguments
  987. `(#:tests? #f
  988. #:make-flags (list "-f" "Makefile.guix")
  989. #:phases
  990. (modify-phases %standard-phases
  991. (delete 'configure)
  992. (delete 'check)
  993. (add-after 'unpack 'unpack-htslib-sources
  994. (lambda* (#:key inputs #:allow-other-keys)
  995. ;; The current build compiles htslib statically into the
  996. ;; executable. On top of that, we need to patch the latest
  997. ;; version of htslib to have it working with Sambamba.
  998. (and (with-directory-excursion "htslib"
  999. (zero? (system* "tar" "xvf" (assoc-ref inputs "htslib-src")
  1000. "--strip-components=1")))
  1001. (with-directory-excursion "lz4"
  1002. (zero? (system* "tar" "xvf" (assoc-ref inputs "lz4-src")
  1003. "--strip-components=1")))
  1004. (zero? (system* "rm" "-r" "BioD"))
  1005. (zero? (system* "ln" "--symbolic" "--no-target-directory"
  1006. (assoc-ref inputs "biod-src") "BioD")))))
  1007. (replace
  1008. 'build
  1009. (lambda* (#:key inputs make-flags #:allow-other-keys)
  1010. (zero? (system* "make" "sambamba-ldmd2-64" "CC=gcc" "D_COMPILER=ldc2"
  1011. (string-append "LDC_LIB_PATH="
  1012. (assoc-ref inputs "ldc")
  1013. "/lib")))))
  1014. (replace
  1015. 'install
  1016. (lambda* (#:key outputs #:allow-other-keys)
  1017. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  1018. (install-file "build/sambamba" bin)))))))
  1019. (home-page "https://github.com/lomereiter/sambamba")
  1020. (synopsis "A tool for working with SAM and BAM files written in D.")
  1021. (description
  1022. "Sambamba is a high performance modern robust and fast tool (and
  1023. library), written in the D programming language, for working with SAM
  1024. and BAM files. Current parallelised functionality is an important
  1025. subset of samtools functionality, including view, index, sort,
  1026. markdup, and depth.")
  1027. (license license:gpl2+))))
  1028. (define-public picard
  1029. (package
  1030. (name "picard")
  1031. (version "2.1.0")
  1032. (source
  1033. (origin
  1034. (method url-fetch)
  1035. (uri (string-append
  1036. "https://github.com/broadinstitute/picard/archive/"
  1037. version ".tar.gz"))
  1038. (sha256
  1039. (base32 ""))))
  1040. (build-system gnu-build-system)
  1041. (home-page "http://broadinstitute.github.io/picard/")
  1042. (synopsis "A set of Java command line tools for manipulating high-throughput
  1043. sequencing data (HTS) data and formats")
  1044. (description "Picard comprises Java-based command-line utilities that
  1045. manipulate SAM files, and a Java API (HTSJDK) for creating new programs that
  1046. read and write SAM files. Both SAM text format and SAM binary (BAM) format are
  1047. supported.")
  1048. ;; The license is MIT.
  1049. (license license:expat)
  1050. ))
  1051. (define-public fastqc
  1052. (package
  1053. (name "fastqc")
  1054. (version "0.11.4")
  1055. (source
  1056. (origin
  1057. (method url-fetch)
  1058. (uri (string-append
  1059. "http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v"
  1060. version "_source.zip"))
  1061. (sha256
  1062. (base32 ""))))
  1063. (build-system gnu-build-system)
  1064. (arguments
  1065. `(("perl" ,perl) ; Needed to run the java command.
  1066. ("jdk" ,icedtea "jdk")))
  1067. (native-inputs
  1068. `(("ant" ,ant) ; TODO: Most Java packages need Ant, but in this case, IDK..
  1069. ("jdk" ,icedtea "jdk")
  1070. ;;("htsjdk" ,htsjdk) ; It is based on htsjdk, but it ships its own copy.
  1071. ("unzip" ,unzip)))
  1072. (home-page "http://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
  1073. (synopsis "A quality control tool for high throughput sequence data")
  1074. (description
  1075. "FastQC aims to provide a QC report which can spot problems which originate either in the sequencer or in the starting library material. It can either run as a stand alone interactive application for the immediate analysis of small numbers of FastQ files, or it can be run in a non-interactive mode where it would be suitable for integrating into a larger analysis pipeline for the systematic processing of large numbers of files.")
  1076. (license license:gpl3+)))
  1077. (define-public vcflib
  1078. (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb"))
  1079. (package
  1080. (name "vcflib")
  1081. (version (string-append "v1.0.2-" (string-take commit 7)))
  1082. (source
  1083. (origin
  1084. (method url-fetch)
  1085. (uri (string-append "https://github.com/vcflib/vcflib/archive/"
  1086. "5ac091365fdc716cc47cc5410bb97ee5dc2a2c92" ".tar.gz"))
  1087. (file-name "vcflib-5ac0913.tar.gz")
  1088. (sha256
  1089. (base32 "0ywshwpif059z5h0g7zzrdfzzdj2gr8xvwlwcsdxrms3p9iy35h8"))))
  1090. (build-system gnu-build-system)
  1091. (native-inputs
  1092. `(("htslib" ,htslib)
  1093. ("zlib" ,zlib)
  1094. ("python" ,python-2)
  1095. ("perl" ,perl)
  1096. ("tabixpp-src"
  1097. ,(origin
  1098. (method url-fetch)
  1099. (uri (string-append "https://github.com/ekg/tabixpp/archive/"
  1100. "bbc63a49acc52212199f92e9e3b8fba0a593e3f7" ".tar.gz"))
  1101. (file-name "tabixpp-src.tar.gz")
  1102. (sha256
  1103. (base32 "1s06wmpgj4my4pik5kp2lc42hzzazbp5ism2y4i2ajp2y1c68g77"))))
  1104. ("intervaltree-src"
  1105. ,(origin
  1106. (method url-fetch)
  1107. (uri (string-append
  1108. "https://github.com/ekg/intervaltree/archive/"
  1109. "dbb4c513d1ad3baac516fc1484c995daf9b42838" ".tar.gz"))
  1110. (file-name "intervaltree-src.tar.gz")
  1111. (sha256
  1112. (base32 "19prwpn2wxsrijp5svfqvfcxl5nj7zdhm3jycd5kqhl9nifpmcks"))))
  1113. ("smithwaterman-src"
  1114. ,(origin
  1115. (method url-fetch)
  1116. (uri (string-append "https://github.com/ekg/smithwaterman/archive/"
  1117. "203218b47d45ac56ef234716f1bd4c741b289be1" ".tar.gz"))
  1118. (file-name "smithwaterman-src.tar.gz")
  1119. (sha256
  1120. (base32 "1lkxy4xkjn96l70jdbsrlm687jhisgw4il0xr2dm33qwcclzzm3b"))))
  1121. ("multichoose-src"
  1122. ,(origin
  1123. (method url-fetch)
  1124. (uri (string-append "https://github.com/ekg/multichoose/archive/"
  1125. "73d35daa18bf35729b9ba758041a9247a72484a5" ".tar.gz"))
  1126. (file-name "multichoose-src.tar.gz")
  1127. (sha256
  1128. (base32 "07aizwdabmlnjaq4p3v0vsasgz1xzxid8xcxcw3paq8kh9c1099i"))))
  1129. ("fsom-src"
  1130. ,(origin
  1131. (method url-fetch)
  1132. (uri (string-append "https://github.com/ekg/fsom/archive/"
  1133. "a6ef318fbd347c53189384aef7f670c0e6ce89a3" ".tar.gz"))
  1134. (file-name "fsom-src.tar.gz")
  1135. (sha256
  1136. (base32 "0q6b57ppxfvsm5cqmmbfmjpn5qvx2zi5pamvp3yh8gpmmz8cfbl3"))))
  1137. ("filevercmp-src"
  1138. ,(origin
  1139. (method url-fetch)
  1140. (uri (string-append "https://github.com/ekg/filevercmp/archive/"
  1141. "1a9b779b93d0b244040274794d402106907b71b7" ".tar.gz"))
  1142. (file-name "filevercmp-src.tar.gz")
  1143. (sha256
  1144. (base32 "0yp5jswf5j2pqc6517x277s4s6h1ss99v57kxw9gy0jkfl3yh450"))))
  1145. ("fastahack-src"
  1146. ,(origin
  1147. (method url-fetch)
  1148. (uri (string-append "https://github.com/ekg/fastahack/archive/"
  1149. "c68cebb4f2e5d5d2b70cf08fbdf1944e9ab2c2dd" ".tar.gz"))
  1150. (file-name "fastahack-src.tar.gz")
  1151. (sha256
  1152. (base32 "0j25lcl3jk1kls66zzxjfyq5ir6sfcvqrdwfcva61y3ajc9ssay2"))))))
  1153. (arguments
  1154. `(#:tests? #f
  1155. #:phases
  1156. (modify-phases %standard-phases
  1157. (delete 'configure)
  1158. (delete 'check)
  1159. (add-after 'unpack 'unpack-submodule-sources
  1160. (lambda* (#:key inputs #:allow-other-keys)
  1161. (let ((unpack (lambda (source target)
  1162. (with-directory-excursion target
  1163. (zero? (system* "tar" "xvf"
  1164. (assoc-ref inputs source)
  1165. "--strip-components=1"))))))
  1166. (and
  1167. (unpack "intervaltree-src" "intervaltree")
  1168. (unpack "fastahack-src" "fastahack")
  1169. (unpack "filevercmp-src" "filevercmp")
  1170. (unpack "fsom-src" "fsom")
  1171. (unpack "intervaltree-src" "intervaltree")
  1172. (unpack "multichoose-src" "multichoose")
  1173. (unpack "smithwaterman-src" "smithwaterman")
  1174. (unpack "tabixpp-src" "tabixpp")))))
  1175. (add-after 'unpack-submodule-sources 'fix-makefile
  1176. (lambda* (#:key inputs #:allow-other-keys)
  1177. (substitute* '("Makefile")
  1178. (("^GIT_VERSION.*") "GIT_VERSION = v1.0.0"))))
  1179. (replace
  1180. 'build
  1181. (lambda* (#:key inputs make-flags #:allow-other-keys)
  1182. (with-directory-excursion "tabixpp"
  1183. (zero? (system* "make")))
  1184. (zero? (system* "make" "CC=gcc"
  1185. (string-append "CFLAGS=\"" "-Itabixpp "
  1186. "-I" (assoc-ref inputs "htslib") "/include " "\"") "all"))))
  1187. (replace
  1188. 'install
  1189. (lambda* (#:key outputs #:allow-other-keys)
  1190. (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
  1191. (lib (string-append (assoc-ref outputs "out") "/lib")))
  1192. (for-each (lambda (file)
  1193. (install-file file bin))
  1194. (find-files "bin" ".*"))
  1195. (install-file "libvcflib.a" lib)))))))
  1196. (home-page "https://github.com/vcflib/vcflib/")
  1197. (synopsis "Library for parsing and manipulating VCF files")
  1198. (description "Vcflib provides methods to manipulate and interpret
  1199. sequence variation as it can be described by VCF. It is both an API for parsing
  1200. and operating on records of genomic variation as it can be described by the VCF
  1201. format, and a collection of command-line utilities for executing complex
  1202. manipulations on VCF files.")
  1203. (license license:expat))))
  1204. (define-public pindel
  1205. (package
  1206. (name "pindel")
  1207. (version "0.2.5b8")
  1208. (source (origin
  1209. (method url-fetch)
  1210. (uri (string-append "https://github.com/genome/pindel/archive/v"
  1211. version ".tar.gz"))
  1212. (file-name (string-append name "-" version ".tar.gz"))
  1213. (sha256
  1214. (base32 "06bsf0psxwf7h5p3j97xkh9k5qrwhxh6xn942y1j1m2inyhgs8bz"))))
  1215. (build-system gnu-build-system)
  1216. (inputs
  1217. `(("samtools" ,samtools)
  1218. ("htslib" ,htslib)
  1219. ("zlib" ,zlib)))
  1220. (native-inputs
  1221. `(("cppcheck" ,cppcheck)
  1222. ("python" ,python-2)
  1223. ("perl" ,perl)))
  1224. (arguments
  1225. `(#:phases
  1226. (modify-phases %standard-phases
  1227. (delete 'configure) ; There is no configure phase.
  1228. ;; The build phase needs to run 'make' twice for the reasons described
  1229. ;; below.
  1230. (replace 'build
  1231. (lambda* (#:key inputs #:allow-other-keys)
  1232. ;; The first run creates a Makefile.local file. Make will report
  1233. ;; the failure to find Makefile.local, but we can ignore this error.
  1234. (system* "make" (string-append "SAMTOOLS=" (assoc-ref inputs "samtools")))
  1235. ;; The second run actually compiles the program. Now Makefile.local
  1236. ;; is available, and we should treat an exiting make with an error as
  1237. ;; a true error.
  1238. (zero? (system* "make"))))
  1239. (replace 'install
  1240. (lambda* (#:key outputs #:allow-other-keys)
  1241. (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
  1242. (install-file "src/pindel" bin)
  1243. (install-file "src/pindel2vcf" bin)
  1244. (install-file "src/pindel2vcf4tcga" bin)
  1245. (install-file "src/sam2pindel" bin))))
  1246. ;; There are multiple test targets, so in order to run all
  1247. ;; tests, we must run the separate make targets.
  1248. (replace 'check
  1249. (lambda* (#:key inputs #:allow-other-keys)
  1250. (and
  1251. (zero? (system* "make" "acceptance-tests"))
  1252. (zero? (system* "make" "coverage-tests"))
  1253. (zero? (system* "make" "cppcheck"))
  1254. (zero? (system* "make" "functional-tests"))
  1255. (zero? (system* "make" "regression-tests"))))))))
  1256. (home-page "https://github.com/genome/pindel")
  1257. (synopsis "Structural variants detector for next-gen sequencing data")
  1258. (description "Pindel can detect breakpoints of large deletions, medium sized
  1259. insertions, inversions, tandem duplications and other structural variants at
  1260. single-based resolution from next-gen sequence data. It uses a pattern growth
  1261. approach to identify the breakpoints of these variants from paired-end short
  1262. reads.")
  1263. (license license:gpl3+)))