From 599a3aa11183bee3a49d53d87afcc9fd7f0e0685 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Thu, 9 Sep 2021 18:06:26 +0300 Subject: braker almost ready --- gn/packages/bioinformatics.scm | 347 ++++++++++++++++++++++++++++++++--------- gn/packages/databases.scm | 55 +++++++ 2 files changed, 324 insertions(+), 78 deletions(-) (limited to 'gn') diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 677de1d..637027c 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -18,6 +18,7 @@ #:use-module (guix build-system waf) #:use-module (gnu packages) #:use-module (gn packages crates-io) + #:use-module (gn packages databases) #:use-module (gn packages java) #:use-module (gn packages perl) #:use-module (gn packages python) @@ -45,6 +46,7 @@ #:use-module (gnu packages guile) #:use-module (gnu packages image) #:use-module (gnu packages imagemagick) + #:use-module (gnu packages java) #:use-module (gnu packages jemalloc) #:use-module (gnu packages linux) #:use-module (gnu packages machine-learning) @@ -66,6 +68,7 @@ #:use-module (gnu packages serialization) #:use-module (gnu packages shells) #:use-module (gnu packages statistics) + #:use-module (gnu packages sqlite) #:use-module (gnu packages tcl) #:use-module (gnu packages time) #:use-module (gnu packages tls) @@ -2427,68 +2430,62 @@ To run the bundled rtg-tools software you will also need java. The (build-system perl-build-system) (arguments `(#:modules ((srfi srfi-26) - (guix build perl-build-system) - (guix build utils)) + ,@%perl-build-system-modules) #:phases (modify-phases %standard-phases - (delete 'configure) + (delete 'configure) ; No configure script. (delete 'build) (replace 'check (lambda* (#:key tests? #:allow-other-keys) (when tests? - (invoke "prove" "-l" "t")))) + (invoke "prove" "--verbose" "--lib" "t")))) (replace 'install (lambda* (#:key inputs outputs #:allow-other-keys) - (let* ((out (assoc-ref outputs "out")) - (bin (string-append out "/bin"))) - (with-directory-excursion "scripts" - (for-each (cut install-file <> bin) - (find-files "." "\\.(py|pl|pm)$")) - (for-each - (cut wrap-script <> - `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB") - ,bin))) - ;; braker.pl is the entry point so wrap it separately. - (delete (string-append bin "/braker.pl") - (find-files bin "\\.pl$"))) - (wrap-script (string-append bin "/braker.pl") - `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB") - ,bin)) - `("PATH" ":" prefix (;,(assoc-ref inputs "augustus") - ;,(assoc-ref inputs "genemark") - ,(assoc-ref inputs "bamtools") - ,(assoc-ref inputs "samtools") - ,(assoc-ref inputs "prothint") - ;,(assoc-ref inputs "genomethreader") - ,(assoc-ref inputs "spaln") - ,(assoc-ref inputs "exonerate") - ,(assoc-ref inputs "ncbi-blast") - ,(assoc-ref inputs "diamond") - ,(assoc-ref inputs "cdbfasta") - ;,(assoc-ref inputs "gushr") - ;,(assoc-ref inputs "ucsc") - )) - ;`("GENEMARK_PATH" "=" (,(string-append (assoc-ref inputs "genemark") "/bin"))) - ;`("AUGUSTUS_BIN_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin"))) - ;`("AUGUSTUS_AUGUSTUS_SCRIPTS_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin/augustus_scripts"))) - `("PYTHON3_PATH" "=" (,(string-append (assoc-ref inputs "python") "/bin"))) - `("BAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "bamtools") "/bin"))) - `("DIAMOND_PATH" "=" (,(string-append (assoc-ref inputs "diamond") "/bin"))) - `("BLAST_PATH" "=" (,(string-append (assoc-ref inputs "blast+") "/bin"))) - `("PROTHINT_PATH" "=" (,(string-append (assoc-ref inputs "prothint") "/bin"))) - `("SAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "samtools") "/bin"))) - `("CDBTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "cdbfasta") "/bin"))) - `("ALIGNMENT_TOOL_PATH" "=" (,(string-append (assoc-ref inputs "spaln") "/bin/spaln") - ,(string-append (assoc-ref inputs "exonerate") "/bin/exonerate"))) - ;`("MAKEHUB_PATH" "=" (,(string-append (assoc-ref inputs "makehub") "/bin"))) - ) - )))) - ))) + (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) + (for-each + (cut install-file <> bin) + (find-files "scripts" "\\.(py|pl|pm)$")) + (for-each + (cut wrap-script <> + `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB") + ,bin))) + ;; braker.pl is the entry point so wrap it separately. + (delete (string-append bin "/braker.pl") + (find-files bin "\\.pl$"))) + (wrap-script (string-append bin "/braker.pl") + `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB") + ,bin)) + `("PATH" ":" prefix (,(assoc-ref inputs "augustus") + ;,(assoc-ref inputs "genemark") + ,(assoc-ref inputs "bamtools") + ,(assoc-ref inputs "samtools") + ,(assoc-ref inputs "prothint") + ;,(assoc-ref inputs "genomethreader") + ,(assoc-ref inputs "spaln") + ,(assoc-ref inputs "exonerate") + ,(assoc-ref inputs "ncbi-blast") + ,(assoc-ref inputs "diamond") + ,(assoc-ref inputs "cdbfasta") + ,(assoc-ref inputs "gushr") + ,(assoc-ref inputs "ucsc"))) + ;`("GENEMARK_PATH" "=" (,(string-append (assoc-ref inputs "genemark") "/bin"))) + `("AUGUSTUS_BIN_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin"))) + `("AUGUSTUS_AUGUSTUS_SCRIPTS_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/share/augustus"))) + `("PYTHON3_PATH" "=" (,(string-append (assoc-ref inputs "python") "/bin"))) + `("BAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "bamtools") "/bin"))) + `("DIAMOND_PATH" "=" (,(string-append (assoc-ref inputs "diamond") "/bin"))) + `("BLAST_PATH" "=" (,(string-append (assoc-ref inputs "blast+") "/bin"))) + `("PROTHINT_PATH" "=" (,(string-append (assoc-ref inputs "prothint") "/bin"))) + `("SAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "samtools") "/bin"))) + `("CDBTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "cdbfasta") "/bin"))) + `("ALIGNMENT_TOOL_PATH" "=" (,(string-append (assoc-ref inputs "spaln") "/bin/spaln") + ,(string-append (assoc-ref inputs "exonerate") "/bin/exonerate"))) + ;`("MAKEHUB_PATH" "=" (,(string-append (assoc-ref inputs "makehub") "/bin"))) + ))))))) (inputs - `( - ("guile" ,guile-3.0) ; for wrap-script + `(("guile" ,guile-3.0) ; for wrap-script - ;("augustus" ,augustus) + ("augustus" ,augustus-bio) ;("genemark" ,genemark) ("bamtools" ,bamtools) ("samtools" ,samtools) @@ -2498,10 +2495,10 @@ To run the bundled rtg-tools software you will also need java. The ("exonerate" ,exonerate) ("ncbi-blast" ,blast+) ("diamond" ,diamond) - ("cbdfasta" ,cbdfasta) ; provides cdbfasta and cdbyank - ;("gushr" ,gushr) + ("cdbfasta" ,cdbfasta) ; provides cdbfasta and cdbyank + ("gushr" ,gushr) ;("makehub" ,makehub) - ;("ucsc" ,ucsc-genome-browser) ; provides bin/twoBitInfo and bin/faToTwoBit + ("ucsc" ,ucsc-genome-browser) ; provides bin/twoBitInfo and bin/faToTwoBit ("perl" ,perl) ("perl-hash-merge" ,perl-hash-merge) @@ -2510,11 +2507,7 @@ To run the bundled rtg-tools software you will also need java. The ("perl-module-load-conditional" ,perl-module-load-conditional) ("perl-parallel-forkmanager" ,perl-parallel-forkmanager) ("perl-yaml" ,perl-yaml) - ("python" ,python) - )) - (native-inputs - `( - )) + ("python" ,python))) (home-page "https://github.com/Gaius-Augustus/BRAKER") (synopsis "Pipeline for fully automated prediction of protein coding gene structures") @@ -2560,7 +2553,10 @@ RNA-Seq data.") (delete-file "dependencies/spaln_boundary_scorer"))))) (build-system perl-build-system) (arguments - `(#:tests? #f ; TODO: Test suite fails, or packaging is wrong? + `(#:modules ((srfi srfi-26) + (guix build perl-build-system) + (guix build utils)) + #:tests? #f ; TODO: Test suite fails, or packaging is wrong? #:phases (modify-phases %standard-phases (add-after 'unpack 'adjust-source @@ -2573,8 +2569,7 @@ RNA-Seq data.") "/bin/spaln_boundary_scorer\"")) (("\\$binDir/\\.\\./dependencies/spaln_table\\\"") (string-append (assoc-ref inputs "spaln") - "/share/spaln/table\"")) - ))) + "/share/spaln/table\""))))) (delete 'configure) (delete 'build) (replace 'check @@ -2589,21 +2584,20 @@ RNA-Seq data.") (lambda* (#:key inputs outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) (bin (string-append out "/bin"))) - (with-directory-excursion "scripts" - (for-each (cut install-file <> bin) - (find-files "bin" "\\.(pl|py|sh)$")) - (for-each - (cut wrap-script <> - `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))) - (find-files bin "\\.pl$")) - (for-each - (cut wrap-script <> - `("PATH" ":" prefix (,(assoc-ref inputs "coreutils") - ,(assoc-ref inputs "diamond") - ;,(assoc-ref inputs "genemark") - ,(assoc-ref inputs "grep") - ,(assoc-ref inputs "spaln")))) - (find-files bin "\\.py$"))))))))) + (for-each (cut install-file <> bin) + (find-files "bin" "\\.(pl|py|sh)$")) + (for-each + (cut wrap-script <> + `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))) + (find-files bin "\\.pl$")) + (for-each + (cut wrap-script <> + `("PATH" ":" prefix (,(assoc-ref inputs "coreutils") + ,(assoc-ref inputs "diamond") + ;,(assoc-ref inputs "genemark") + ,(assoc-ref inputs "grep") + ,(assoc-ref inputs "spaln")))) + (find-files bin "\\.py$")))))))) (inputs `(("guile" ,guile-3.0) ; for wrap-script @@ -2842,3 +2836,200 @@ are scored based on local alignment quality around their boundaries.") (supported-systems '("x86_64-linux")) ;; Licensee may use the Product solely for Licensee's own internal research purposes. (license license:non-copyleft))) + +;; Name collision with augustus in games. +(define-public augustus-bio + (package + (name "augustus-bio") + (version "3.4.0") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/Gaius-Augustus/Augustus" + "/releases/download/v" version + "/augustus-" version ".tar.gz")) + (sha256 + (base32 + "1j0ny6v8v3hsk76w4f4vvcqzpjjn8qfvp5q1mdanglb2g1rp4rr4")) + (modules '((guix build utils))) + (snippet + '(begin + (for-each delete-file + (append + '("auxprogs/aln2wig/aln2wig" + "auxprogs/bam2hints/bam2hints" + "auxprogs/bam2wig/bam2wig" + "auxprogs/compileSpliceCands/compileSpliceCands" + "auxprogs/filterBam/data/BAMseek2011July24.jar" + "auxprogs/homGeneMapping/src/homGeneMapping" + "auxprogs/joingenes/joingenes" + "auxprogs/utrrnaseq/Debug/utrrnaseq") + (find-files "." "\\.pdf$") + (find-files "mansrc" "\\.1$"))) + (for-each delete-file-recursively + '("bin" + "tests/__pycache__" + "tests/examples/__pycache__" + "tests/examples_test/__pycache__" + "tests/utils/__pycache__")))))) + (build-system gnu-build-system) + (arguments + `(#:test-target "unit_test" + #:make-flags (list (string-append "CC = " ,(cc-for-target))) + #:phases + (modify-phases %standard-phases + (delete 'configure) ; No configure script. + (add-after 'unpack 'adjust-sources + (lambda* (#:key inputs #:allow-other-keys) + (substitute* "common.mk" + (("AUGVERSION =.*") + (string-append "AUGVERSION = ",version "\n"))) + (substitute* '("src/Makefile" + "src/unittests/Makefile") + (("/usr/include/mysql ") + (string-append (assoc-ref inputs "mariadb:dev") "/include/mysql ")) + (("/usr/include/mysql\\+\\+") + (string-append (assoc-ref inputs "mysql++") "/include/mysql++ ")) + (("/usr/include/lpsolve") + (string-append (assoc-ref inputs "lpsolve") "/include/lpsolve"))) + (substitute* '("auxprogs/bam2hints/Makefile" + "auxprogs/filterBam/src/Makefile") + (("/usr/include/bamtools") + (string-append (assoc-ref inputs "bamtools") "/include/bamtools"))) + (substitute* "auxprogs/bam2wig/Makefile" + (("/usr/include/htslib") + (string-append (assoc-ref inputs "htslib") "/include/htslib"))))) + (add-after 'unpack 'remove-googletest + (lambda* (#:key inputs #:allow-other-keys) + (let ((gtest (assoc-ref inputs "googletest"))) + (delete-file-recursively "src/googletest") + (substitute* "src/Makefile" + (("unittest: googletest") "unittest:")) + (substitute* "src/unittests/Makefile" + (("\\.\\./googletest/include") (string-append gtest "/include")) + (("-L\\.\\./googletest") (string-append "-L" gtest "/lib")))))) + ;; This mimics 'make install'. + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (bin (string-append out "/bin")) + (scripts (string-append out "/share/augustus"))) + (mkdir-p scripts) + (mkdir-p bin) + (copy-recursively "config" scripts) + (copy-recursively "bin" scripts) + (copy-recursively "scripts" scripts) + (for-each + (lambda (binary) + (symlink (string-append "../share/augustus/" binary) + (string-append bin "/" binary))) + '("augustus" "etraining" "prepareAlign" "fastBlockSearch" + "load2db" "getSeq"))))) + (add-after 'install 'install-manpages + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (man1 (string-append out "/share/man/man1"))) + (for-each + (lambda (adoc) + (invoke "asciidoctor" + "-a" "docdate=''" + "-b" "manpage" + (string-append "--destination-dir=" man1) + adoc)) + (find-files "mansrc" "\\.adoc$")))))))) + (inputs + `(("bamtools" ,bamtools) + ("boost" ,boost) + ("gsl" ,gsl) + ("htslib" ,htslib) + ("lpsolve" ,lpsolve) + ("mariadb:lib" ,mariadb "lib") + ("mysql++" ,mysql++) + ("perl" ,perl) + ("python" ,python) + ("sqlite" ,sqlite) + ("suitesparse" ,suitesparse) + ("zlib" ,zlib))) + (native-inputs + `(("asciidoctor" ,ruby-asciidoctor) + ("googletest" ,googletest) + ("mariadb:dev" ,mariadb "dev"))) + (home-page "http://bioinf.uni-greifswald.de/webaugustus/") + (synopsis "Genome annotation with AUGUSTUS") + (description "AUGUSTUS is a gene prediction program. It can be used as an +ab initio program, which means it bases its prediction purely on the sequence. +AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic +sources such as EST, MS/MS, protein alignments and syntenic genomic alignments.") + (license license:artistic2.0))) + +;; TODO: Replace GoMoMa-1.6.2.jar with https://github.com/Jstacs/Jstacs +(define-public gushr + (let ((commit "ee26d5c7eee97170c6183089d57477b338a7bc4b") + (revision "1")) + (package + (name "gushr") + (version (git-version "0.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/Gaius-Augustus/GUSHR") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "00alkahjzc6zgzfm7f6zcayjvgcs3kb90fb9v855gpqcxv3y1s40")))) + (build-system gnu-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (delete 'configure) ; No configure script. + (delete 'build) + ;; Override the test.sh file. + (replace 'check + (lambda* (#:key tests? parallel-tests? inputs #:allow-other-keys) + (when tests? + (with-directory-excursion "example" + (invoke "../gushr.py" + "--AUGUSTUS_SCRIPTS_PATH" + (string-append (assoc-ref inputs "augustus") + "/share/augustus") + "--bam" (assoc-ref inputs "RNAseq.bam") + "--gtf" "augustus.gtf" + "--genome" "genome.fa" + "--outfile_name_stem" "gushr" + "--cores" (if parallel-tests? + (number->string (parallel-job-count)) + "1")))))) + (replace 'install + (lambda* (#:key inputs outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (bin (string-append out "/bin"))) + (install-file "gushr.py" bin) + (install-file "GeMoMa-1.6.2.jar" bin) + (wrap-script (string-append bin "/gushr.py") + `("PATH" ":" prefix (,(string-append (assoc-ref inputs "java") + "/bin") + ,(string-append (assoc-ref inputs "samtools") + "/bin")))))))))) + (inputs + `(("guile" ,guile-3.0) ; for wrap-script + ("augustus" ,augustus-bio) + ("java" ,icedtea-8 "jdk") + ("python" ,python) + ("samtools" ,samtools))) + (native-inputs + `(("RNAseq.bam" + ,(origin + (method url-fetch) + (uri "http://bioinf.uni-greifswald.de/bioinf/braker/RNAseq.bam") + (sha256 + (base32 + "1apmgh9irwhfnbpqwvcjasfs524g03i284bxbb8czdkqfsa74w47")))))) + (home-page "https://github.com/Gaius-Augustus/GUSHR") + (synopsis "Generating UTRs from SHort Reads") + (description "Assembly-free construction of UTRs from short read RNA-Seq +data on the basis of coding sequence annotation. This tool has been adapted to +the format needs of AUGUSTUS/BRAKER and employs GeMoMa for generating UTRs from +RNA-Seq coverage data.") + (license license:gpl3+)))) diff --git a/gn/packages/databases.scm b/gn/packages/databases.scm index 93280c2..9c4a109 100644 --- a/gn/packages/databases.scm +++ b/gn/packages/databases.scm @@ -2,6 +2,7 @@ #:use-module (gnu packages) #:use-module (guix packages) #:use-module (guix download) + #:use-module ((guix licenses) #:prefix license:) #:use-module (guix utils) #:use-module (guix build-system gnu) #:use-module (guix build-system python) @@ -136,3 +137,57 @@ (inputs `(("zlib" ,zlib) ,@(package-inputs base)))))) + +(define-public mysql++ + (package + (name "mysql++") + (version "3.3.0") + (source + (origin + (method url-fetch) + (uri (string-append "https://tangentsoft.com/mysqlpp/releases" + "/mysql++-" version ".tar.gz")) + (sha256 + (base32 + "1kz7l1ngk649cpp2h1cnyqan9px8d50r0dk7kngwrhkcam3br724")))) + (build-system gnu-build-system) + (arguments + `(#:configure-flags + (list (string-append "--with-mysql-include=" + (assoc-ref %build-inputs "mariadb:dev")) + (string-append "--with-mysql-lib=" + (assoc-ref %build-inputs "mariadb:lib"))) + #:phases + (modify-phases %standard-phases + ;; It is unclear how to run the test suite so we just invoke the + ;; compiled binaries which start with 'test_*'. + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (setenv "LD_PRELOAD" "./libmysqlpp.so.3") + (and + (invoke "./test_array_index") + (invoke "./test_cpool") + (invoke "./test_datetime") + (invoke "./test_insertpolicy") + (invoke "./test_inttypes") + (invoke "./test_manip") + (invoke "./test_null_comparison") + (invoke "./test_qssqls") + (invoke "./test_qstream") + (invoke "./test_query_copy") + (invoke "./test_sqlstream") + (invoke "./test_ssqls2") + (invoke "./test_string") + ;(invoke "./test_tcp") ; Requires TCP connection. + (invoke "./test_uds") + (invoke "./test_wnp"))) + #t))))) + (inputs + `(("mariadb:dev" ,mariadb "dev") + ("mariadb:lib" ,mariadb "lib"))) + (home-page "https://tangentsoft.net/mysqlpp/") + (synopsis "MySQL C++ library bindings") + (description "MySQL++ is a complex C++ API for MySQL. The goal of this API +is to make working with Queries as easy as working with other STL containers.") + (license license:lgpl2.1+))) -- cgit v1.2.3