aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEfraim Flashner2021-09-09 18:06:26 +0300
committerEfraim Flashner2021-09-09 18:06:26 +0300
commit599a3aa11183bee3a49d53d87afcc9fd7f0e0685 (patch)
treeea02a13dcbed6767228bfba2da21260507f464b3
parentb773e0e0a2869b2d370891696e0dd1382dacf9f4 (diff)
downloadguix-bioinformatics-wip-braker.tar.gz
braker almost readywip-braker
-rw-r--r--gn/packages/bioinformatics.scm347
-rw-r--r--gn/packages/databases.scm55
2 files changed, 324 insertions, 78 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index 677de1d..637027c 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -18,6 +18,7 @@
#:use-module (guix build-system waf)
#:use-module (gnu packages)
#:use-module (gn packages crates-io)
+ #:use-module (gn packages databases)
#:use-module (gn packages java)
#:use-module (gn packages perl)
#:use-module (gn packages python)
@@ -45,6 +46,7 @@
#:use-module (gnu packages guile)
#:use-module (gnu packages image)
#:use-module (gnu packages imagemagick)
+ #:use-module (gnu packages java)
#:use-module (gnu packages jemalloc)
#:use-module (gnu packages linux)
#:use-module (gnu packages machine-learning)
@@ -66,6 +68,7 @@
#:use-module (gnu packages serialization)
#:use-module (gnu packages shells)
#:use-module (gnu packages statistics)
+ #:use-module (gnu packages sqlite)
#:use-module (gnu packages tcl)
#:use-module (gnu packages time)
#:use-module (gnu packages tls)
@@ -2427,68 +2430,62 @@ To run the bundled rtg-tools software you will also need java. The
(build-system perl-build-system)
(arguments
`(#:modules ((srfi srfi-26)
- (guix build perl-build-system)
- (guix build utils))
+ ,@%perl-build-system-modules)
#:phases
(modify-phases %standard-phases
- (delete 'configure)
+ (delete 'configure) ; No configure script.
(delete 'build)
(replace 'check
(lambda* (#:key tests? #:allow-other-keys)
(when tests?
- (invoke "prove" "-l" "t"))))
+ (invoke "prove" "--verbose" "--lib" "t"))))
(replace 'install
(lambda* (#:key inputs outputs #:allow-other-keys)
- (let* ((out (assoc-ref outputs "out"))
- (bin (string-append out "/bin")))
- (with-directory-excursion "scripts"
- (for-each (cut install-file <> bin)
- (find-files "." "\\.(py|pl|pm)$"))
- (for-each
- (cut wrap-script <>
- `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
- ,bin)))
- ;; braker.pl is the entry point so wrap it separately.
- (delete (string-append bin "/braker.pl")
- (find-files bin "\\.pl$")))
- (wrap-script (string-append bin "/braker.pl")
- `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
- ,bin))
- `("PATH" ":" prefix (;,(assoc-ref inputs "augustus")
- ;,(assoc-ref inputs "genemark")
- ,(assoc-ref inputs "bamtools")
- ,(assoc-ref inputs "samtools")
- ,(assoc-ref inputs "prothint")
- ;,(assoc-ref inputs "genomethreader")
- ,(assoc-ref inputs "spaln")
- ,(assoc-ref inputs "exonerate")
- ,(assoc-ref inputs "ncbi-blast")
- ,(assoc-ref inputs "diamond")
- ,(assoc-ref inputs "cdbfasta")
- ;,(assoc-ref inputs "gushr")
- ;,(assoc-ref inputs "ucsc")
- ))
- ;`("GENEMARK_PATH" "=" (,(string-append (assoc-ref inputs "genemark") "/bin")))
- ;`("AUGUSTUS_BIN_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin")))
- ;`("AUGUSTUS_AUGUSTUS_SCRIPTS_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin/augustus_scripts")))
- `("PYTHON3_PATH" "=" (,(string-append (assoc-ref inputs "python") "/bin")))
- `("BAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "bamtools") "/bin")))
- `("DIAMOND_PATH" "=" (,(string-append (assoc-ref inputs "diamond") "/bin")))
- `("BLAST_PATH" "=" (,(string-append (assoc-ref inputs "blast+") "/bin")))
- `("PROTHINT_PATH" "=" (,(string-append (assoc-ref inputs "prothint") "/bin")))
- `("SAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "samtools") "/bin")))
- `("CDBTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "cdbfasta") "/bin")))
- `("ALIGNMENT_TOOL_PATH" "=" (,(string-append (assoc-ref inputs "spaln") "/bin/spaln")
- ,(string-append (assoc-ref inputs "exonerate") "/bin/exonerate")))
- ;`("MAKEHUB_PATH" "=" (,(string-append (assoc-ref inputs "makehub") "/bin")))
- )
- ))))
- )))
+ (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
+ (for-each
+ (cut install-file <> bin)
+ (find-files "scripts" "\\.(py|pl|pm)$"))
+ (for-each
+ (cut wrap-script <>
+ `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
+ ,bin)))
+ ;; braker.pl is the entry point so wrap it separately.
+ (delete (string-append bin "/braker.pl")
+ (find-files bin "\\.pl$")))
+ (wrap-script (string-append bin "/braker.pl")
+ `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
+ ,bin))
+ `("PATH" ":" prefix (,(assoc-ref inputs "augustus")
+ ;,(assoc-ref inputs "genemark")
+ ,(assoc-ref inputs "bamtools")
+ ,(assoc-ref inputs "samtools")
+ ,(assoc-ref inputs "prothint")
+ ;,(assoc-ref inputs "genomethreader")
+ ,(assoc-ref inputs "spaln")
+ ,(assoc-ref inputs "exonerate")
+ ,(assoc-ref inputs "ncbi-blast")
+ ,(assoc-ref inputs "diamond")
+ ,(assoc-ref inputs "cdbfasta")
+ ,(assoc-ref inputs "gushr")
+ ,(assoc-ref inputs "ucsc")))
+ ;`("GENEMARK_PATH" "=" (,(string-append (assoc-ref inputs "genemark") "/bin")))
+ `("AUGUSTUS_BIN_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin")))
+ `("AUGUSTUS_AUGUSTUS_SCRIPTS_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/share/augustus")))
+ `("PYTHON3_PATH" "=" (,(string-append (assoc-ref inputs "python") "/bin")))
+ `("BAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "bamtools") "/bin")))
+ `("DIAMOND_PATH" "=" (,(string-append (assoc-ref inputs "diamond") "/bin")))
+ `("BLAST_PATH" "=" (,(string-append (assoc-ref inputs "blast+") "/bin")))
+ `("PROTHINT_PATH" "=" (,(string-append (assoc-ref inputs "prothint") "/bin")))
+ `("SAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "samtools") "/bin")))
+ `("CDBTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "cdbfasta") "/bin")))
+ `("ALIGNMENT_TOOL_PATH" "=" (,(string-append (assoc-ref inputs "spaln") "/bin/spaln")
+ ,(string-append (assoc-ref inputs "exonerate") "/bin/exonerate")))
+ ;`("MAKEHUB_PATH" "=" (,(string-append (assoc-ref inputs "makehub") "/bin")))
+ )))))))
(inputs
- `(
- ("guile" ,guile-3.0) ; for wrap-script
+ `(("guile" ,guile-3.0) ; for wrap-script
- ;("augustus" ,augustus)
+ ("augustus" ,augustus-bio)
;("genemark" ,genemark)
("bamtools" ,bamtools)
("samtools" ,samtools)
@@ -2498,10 +2495,10 @@ To run the bundled rtg-tools software you will also need java. The
("exonerate" ,exonerate)
("ncbi-blast" ,blast+)
("diamond" ,diamond)
- ("cbdfasta" ,cbdfasta) ; provides cdbfasta and cdbyank
- ;("gushr" ,gushr)
+ ("cdbfasta" ,cdbfasta) ; provides cdbfasta and cdbyank
+ ("gushr" ,gushr)
;("makehub" ,makehub)
- ;("ucsc" ,ucsc-genome-browser) ; provides bin/twoBitInfo and bin/faToTwoBit
+ ("ucsc" ,ucsc-genome-browser) ; provides bin/twoBitInfo and bin/faToTwoBit
("perl" ,perl)
("perl-hash-merge" ,perl-hash-merge)
@@ -2510,11 +2507,7 @@ To run the bundled rtg-tools software you will also need java. The
("perl-module-load-conditional" ,perl-module-load-conditional)
("perl-parallel-forkmanager" ,perl-parallel-forkmanager)
("perl-yaml" ,perl-yaml)
- ("python" ,python)
- ))
- (native-inputs
- `(
- ))
+ ("python" ,python)))
(home-page "https://github.com/Gaius-Augustus/BRAKER")
(synopsis
"Pipeline for fully automated prediction of protein coding gene structures")
@@ -2560,7 +2553,10 @@ RNA-Seq data.")
(delete-file "dependencies/spaln_boundary_scorer")))))
(build-system perl-build-system)
(arguments
- `(#:tests? #f ; TODO: Test suite fails, or packaging is wrong?
+ `(#:modules ((srfi srfi-26)
+ (guix build perl-build-system)
+ (guix build utils))
+ #:tests? #f ; TODO: Test suite fails, or packaging is wrong?
#:phases
(modify-phases %standard-phases
(add-after 'unpack 'adjust-source
@@ -2573,8 +2569,7 @@ RNA-Seq data.")
"/bin/spaln_boundary_scorer\""))
(("\\$binDir/\\.\\./dependencies/spaln_table\\\"")
(string-append (assoc-ref inputs "spaln")
- "/share/spaln/table\""))
- )))
+ "/share/spaln/table\"")))))
(delete 'configure)
(delete 'build)
(replace 'check
@@ -2589,21 +2584,20 @@ RNA-Seq data.")
(lambda* (#:key inputs outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin")))
- (with-directory-excursion "scripts"
- (for-each (cut install-file <> bin)
- (find-files "bin" "\\.(pl|py|sh)$"))
- (for-each
- (cut wrap-script <>
- `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB"))))
- (find-files bin "\\.pl$"))
- (for-each
- (cut wrap-script <>
- `("PATH" ":" prefix (,(assoc-ref inputs "coreutils")
- ,(assoc-ref inputs "diamond")
- ;,(assoc-ref inputs "genemark")
- ,(assoc-ref inputs "grep")
- ,(assoc-ref inputs "spaln"))))
- (find-files bin "\\.py$")))))))))
+ (for-each (cut install-file <> bin)
+ (find-files "bin" "\\.(pl|py|sh)$"))
+ (for-each
+ (cut wrap-script <>
+ `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB"))))
+ (find-files bin "\\.pl$"))
+ (for-each
+ (cut wrap-script <>
+ `("PATH" ":" prefix (,(assoc-ref inputs "coreutils")
+ ,(assoc-ref inputs "diamond")
+ ;,(assoc-ref inputs "genemark")
+ ,(assoc-ref inputs "grep")
+ ,(assoc-ref inputs "spaln"))))
+ (find-files bin "\\.py$"))))))))
(inputs
`(("guile" ,guile-3.0) ; for wrap-script
@@ -2842,3 +2836,200 @@ are scored based on local alignment quality around their boundaries.")
(supported-systems '("x86_64-linux"))
;; Licensee may use the Product solely for Licensee's own internal research purposes.
(license license:non-copyleft)))
+
+;; Name collision with augustus in games.
+(define-public augustus-bio
+ (package
+ (name "augustus-bio")
+ (version "3.4.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "https://github.com/Gaius-Augustus/Augustus"
+ "/releases/download/v" version
+ "/augustus-" version ".tar.gz"))
+ (sha256
+ (base32
+ "1j0ny6v8v3hsk76w4f4vvcqzpjjn8qfvp5q1mdanglb2g1rp4rr4"))
+ (modules '((guix build utils)))
+ (snippet
+ '(begin
+ (for-each delete-file
+ (append
+ '("auxprogs/aln2wig/aln2wig"
+ "auxprogs/bam2hints/bam2hints"
+ "auxprogs/bam2wig/bam2wig"
+ "auxprogs/compileSpliceCands/compileSpliceCands"
+ "auxprogs/filterBam/data/BAMseek2011July24.jar"
+ "auxprogs/homGeneMapping/src/homGeneMapping"
+ "auxprogs/joingenes/joingenes"
+ "auxprogs/utrrnaseq/Debug/utrrnaseq")
+ (find-files "." "\\.pdf$")
+ (find-files "mansrc" "\\.1$")))
+ (for-each delete-file-recursively
+ '("bin"
+ "tests/__pycache__"
+ "tests/examples/__pycache__"
+ "tests/examples_test/__pycache__"
+ "tests/utils/__pycache__"))))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:test-target "unit_test"
+ #:make-flags (list (string-append "CC = " ,(cc-for-target)))
+ #:phases
+ (modify-phases %standard-phases
+ (delete 'configure) ; No configure script.
+ (add-after 'unpack 'adjust-sources
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "common.mk"
+ (("AUGVERSION =.*")
+ (string-append "AUGVERSION = ",version "\n")))
+ (substitute* '("src/Makefile"
+ "src/unittests/Makefile")
+ (("/usr/include/mysql ")
+ (string-append (assoc-ref inputs "mariadb:dev") "/include/mysql "))
+ (("/usr/include/mysql\\+\\+")
+ (string-append (assoc-ref inputs "mysql++") "/include/mysql++ "))
+ (("/usr/include/lpsolve")
+ (string-append (assoc-ref inputs "lpsolve") "/include/lpsolve")))
+ (substitute* '("auxprogs/bam2hints/Makefile"
+ "auxprogs/filterBam/src/Makefile")
+ (("/usr/include/bamtools")
+ (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
+ (substitute* "auxprogs/bam2wig/Makefile"
+ (("/usr/include/htslib")
+ (string-append (assoc-ref inputs "htslib") "/include/htslib")))))
+ (add-after 'unpack 'remove-googletest
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((gtest (assoc-ref inputs "googletest")))
+ (delete-file-recursively "src/googletest")
+ (substitute* "src/Makefile"
+ (("unittest: googletest") "unittest:"))
+ (substitute* "src/unittests/Makefile"
+ (("\\.\\./googletest/include") (string-append gtest "/include"))
+ (("-L\\.\\./googletest") (string-append "-L" gtest "/lib"))))))
+ ;; This mimics 'make install'.
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin"))
+ (scripts (string-append out "/share/augustus")))
+ (mkdir-p scripts)
+ (mkdir-p bin)
+ (copy-recursively "config" scripts)
+ (copy-recursively "bin" scripts)
+ (copy-recursively "scripts" scripts)
+ (for-each
+ (lambda (binary)
+ (symlink (string-append "../share/augustus/" binary)
+ (string-append bin "/" binary)))
+ '("augustus" "etraining" "prepareAlign" "fastBlockSearch"
+ "load2db" "getSeq")))))
+ (add-after 'install 'install-manpages
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (man1 (string-append out "/share/man/man1")))
+ (for-each
+ (lambda (adoc)
+ (invoke "asciidoctor"
+ "-a" "docdate=''"
+ "-b" "manpage"
+ (string-append "--destination-dir=" man1)
+ adoc))
+ (find-files "mansrc" "\\.adoc$"))))))))
+ (inputs
+ `(("bamtools" ,bamtools)
+ ("boost" ,boost)
+ ("gsl" ,gsl)
+ ("htslib" ,htslib)
+ ("lpsolve" ,lpsolve)
+ ("mariadb:lib" ,mariadb "lib")
+ ("mysql++" ,mysql++)
+ ("perl" ,perl)
+ ("python" ,python)
+ ("sqlite" ,sqlite)
+ ("suitesparse" ,suitesparse)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("asciidoctor" ,ruby-asciidoctor)
+ ("googletest" ,googletest)
+ ("mariadb:dev" ,mariadb "dev")))
+ (home-page "http://bioinf.uni-greifswald.de/webaugustus/")
+ (synopsis "Genome annotation with AUGUSTUS")
+ (description "AUGUSTUS is a gene prediction program. It can be used as an
+ab initio program, which means it bases its prediction purely on the sequence.
+AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic
+sources such as EST, MS/MS, protein alignments and syntenic genomic alignments.")
+ (license license:artistic2.0)))
+
+;; TODO: Replace GoMoMa-1.6.2.jar with https://github.com/Jstacs/Jstacs
+(define-public gushr
+ (let ((commit "ee26d5c7eee97170c6183089d57477b338a7bc4b")
+ (revision "1"))
+ (package
+ (name "gushr")
+ (version (git-version "0.0.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/Gaius-Augustus/GUSHR")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "00alkahjzc6zgzfm7f6zcayjvgcs3kb90fb9v855gpqcxv3y1s40"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (delete 'configure) ; No configure script.
+ (delete 'build)
+ ;; Override the test.sh file.
+ (replace 'check
+ (lambda* (#:key tests? parallel-tests? inputs #:allow-other-keys)
+ (when tests?
+ (with-directory-excursion "example"
+ (invoke "../gushr.py"
+ "--AUGUSTUS_SCRIPTS_PATH"
+ (string-append (assoc-ref inputs "augustus")
+ "/share/augustus")
+ "--bam" (assoc-ref inputs "RNAseq.bam")
+ "--gtf" "augustus.gtf"
+ "--genome" "genome.fa"
+ "--outfile_name_stem" "gushr"
+ "--cores" (if parallel-tests?
+ (number->string (parallel-job-count))
+ "1"))))))
+ (replace 'install
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin")))
+ (install-file "gushr.py" bin)
+ (install-file "GeMoMa-1.6.2.jar" bin)
+ (wrap-script (string-append bin "/gushr.py")
+ `("PATH" ":" prefix (,(string-append (assoc-ref inputs "java")
+ "/bin")
+ ,(string-append (assoc-ref inputs "samtools")
+ "/bin"))))))))))
+ (inputs
+ `(("guile" ,guile-3.0) ; for wrap-script
+ ("augustus" ,augustus-bio)
+ ("java" ,icedtea-8 "jdk")
+ ("python" ,python)
+ ("samtools" ,samtools)))
+ (native-inputs
+ `(("RNAseq.bam"
+ ,(origin
+ (method url-fetch)
+ (uri "http://bioinf.uni-greifswald.de/bioinf/braker/RNAseq.bam")
+ (sha256
+ (base32
+ "1apmgh9irwhfnbpqwvcjasfs524g03i284bxbb8czdkqfsa74w47"))))))
+ (home-page "https://github.com/Gaius-Augustus/GUSHR")
+ (synopsis "Generating UTRs from SHort Reads")
+ (description "Assembly-free construction of UTRs from short read RNA-Seq
+data on the basis of coding sequence annotation. This tool has been adapted to
+the format needs of AUGUSTUS/BRAKER and employs GeMoMa for generating UTRs from
+RNA-Seq coverage data.")
+ (license license:gpl3+))))
diff --git a/gn/packages/databases.scm b/gn/packages/databases.scm
index 93280c2..9c4a109 100644
--- a/gn/packages/databases.scm
+++ b/gn/packages/databases.scm
@@ -2,6 +2,7 @@
#:use-module (gnu packages)
#:use-module (guix packages)
#:use-module (guix download)
+ #:use-module ((guix licenses) #:prefix license:)
#:use-module (guix utils)
#:use-module (guix build-system gnu)
#:use-module (guix build-system python)
@@ -136,3 +137,57 @@
(inputs
`(("zlib" ,zlib)
,@(package-inputs base))))))
+
+(define-public mysql++
+ (package
+ (name "mysql++")
+ (version "3.3.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "https://tangentsoft.com/mysqlpp/releases"
+ "/mysql++-" version ".tar.gz"))
+ (sha256
+ (base32
+ "1kz7l1ngk649cpp2h1cnyqan9px8d50r0dk7kngwrhkcam3br724"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:configure-flags
+ (list (string-append "--with-mysql-include="
+ (assoc-ref %build-inputs "mariadb:dev"))
+ (string-append "--with-mysql-lib="
+ (assoc-ref %build-inputs "mariadb:lib")))
+ #:phases
+ (modify-phases %standard-phases
+ ;; It is unclear how to run the test suite so we just invoke the
+ ;; compiled binaries which start with 'test_*'.
+ (replace 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (setenv "LD_PRELOAD" "./libmysqlpp.so.3")
+ (and
+ (invoke "./test_array_index")
+ (invoke "./test_cpool")
+ (invoke "./test_datetime")
+ (invoke "./test_insertpolicy")
+ (invoke "./test_inttypes")
+ (invoke "./test_manip")
+ (invoke "./test_null_comparison")
+ (invoke "./test_qssqls")
+ (invoke "./test_qstream")
+ (invoke "./test_query_copy")
+ (invoke "./test_sqlstream")
+ (invoke "./test_ssqls2")
+ (invoke "./test_string")
+ ;(invoke "./test_tcp") ; Requires TCP connection.
+ (invoke "./test_uds")
+ (invoke "./test_wnp")))
+ #t)))))
+ (inputs
+ `(("mariadb:dev" ,mariadb "dev")
+ ("mariadb:lib" ,mariadb "lib")))
+ (home-page "https://tangentsoft.net/mysqlpp/")
+ (synopsis "MySQL C++ library bindings")
+ (description "MySQL++ is a complex C++ API for MySQL. The goal of this API
+is to make working with Queries as easy as working with other STL containers.")
+ (license license:lgpl2.1+)))