Browse Source

braker almost ready

wip-braker
Efraim Flashner 3 months ago
parent
commit
599a3aa111
Signed by: efraim GPG Key ID: 41AAE7DCCA3D8351
  1. 347
      gn/packages/bioinformatics.scm
  2. 55
      gn/packages/databases.scm

347
gn/packages/bioinformatics.scm

@ -18,6 +18,7 @@
#:use-module (guix build-system waf)
#:use-module (gnu packages)
#:use-module (gn packages crates-io)
#:use-module (gn packages databases)
#:use-module (gn packages java)
#:use-module (gn packages perl)
#:use-module (gn packages python)
@ -45,6 +46,7 @@
#:use-module (gnu packages guile)
#:use-module (gnu packages image)
#:use-module (gnu packages imagemagick)
#:use-module (gnu packages java)
#:use-module (gnu packages jemalloc)
#:use-module (gnu packages linux)
#:use-module (gnu packages machine-learning)
@ -66,6 +68,7 @@
#:use-module (gnu packages serialization)
#:use-module (gnu packages shells)
#:use-module (gnu packages statistics)
#:use-module (gnu packages sqlite)
#:use-module (gnu packages tcl)
#:use-module (gnu packages time)
#:use-module (gnu packages tls)
@ -2427,68 +2430,62 @@ To run the bundled rtg-tools software you will also need java. The
(build-system perl-build-system)
(arguments
`(#:modules ((srfi srfi-26)
(guix build perl-build-system)
(guix build utils))
,@%perl-build-system-modules)
#:phases
(modify-phases %standard-phases
(delete 'configure)
(delete 'configure) ; No configure script.
(delete 'build)
(replace 'check
(lambda* (#:key tests? #:allow-other-keys)
(when tests?
(invoke "prove" "-l" "t"))))
(invoke "prove" "--verbose" "--lib" "t"))))
(replace 'install
(lambda* (#:key inputs outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin")))
(with-directory-excursion "scripts"
(for-each (cut install-file <> bin)
(find-files "." "\\.(py|pl|pm)$"))
(for-each
(cut wrap-script <>
`("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
,bin)))
;; braker.pl is the entry point so wrap it separately.
(delete (string-append bin "/braker.pl")
(find-files bin "\\.pl$")))
(wrap-script (string-append bin "/braker.pl")
`("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
,bin))
`("PATH" ":" prefix (;,(assoc-ref inputs "augustus")
;,(assoc-ref inputs "genemark")
,(assoc-ref inputs "bamtools")
,(assoc-ref inputs "samtools")
,(assoc-ref inputs "prothint")
;,(assoc-ref inputs "genomethreader")
,(assoc-ref inputs "spaln")
,(assoc-ref inputs "exonerate")
,(assoc-ref inputs "ncbi-blast")
,(assoc-ref inputs "diamond")
,(assoc-ref inputs "cdbfasta")
;,(assoc-ref inputs "gushr")
;,(assoc-ref inputs "ucsc")
))
;`("GENEMARK_PATH" "=" (,(string-append (assoc-ref inputs "genemark") "/bin")))
;`("AUGUSTUS_BIN_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin")))
;`("AUGUSTUS_AUGUSTUS_SCRIPTS_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin/augustus_scripts")))
`("PYTHON3_PATH" "=" (,(string-append (assoc-ref inputs "python") "/bin")))
`("BAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "bamtools") "/bin")))
`("DIAMOND_PATH" "=" (,(string-append (assoc-ref inputs "diamond") "/bin")))
`("BLAST_PATH" "=" (,(string-append (assoc-ref inputs "blast+") "/bin")))
`("PROTHINT_PATH" "=" (,(string-append (assoc-ref inputs "prothint") "/bin")))
`("SAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "samtools") "/bin")))
`("CDBTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "cdbfasta") "/bin")))
`("ALIGNMENT_TOOL_PATH" "=" (,(string-append (assoc-ref inputs "spaln") "/bin/spaln")
,(string-append (assoc-ref inputs "exonerate") "/bin/exonerate")))
;`("MAKEHUB_PATH" "=" (,(string-append (assoc-ref inputs "makehub") "/bin")))
)
))))
)))
(let ((bin (string-append (assoc-ref outputs "out") "/bin")))
(for-each
(cut install-file <> bin)
(find-files "scripts" "\\.(py|pl|pm)$"))
(for-each
(cut wrap-script <>
`("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
,bin)))
;; braker.pl is the entry point so wrap it separately.
(delete (string-append bin "/braker.pl")
(find-files bin "\\.pl$")))
(wrap-script (string-append bin "/braker.pl")
`("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
,bin))
`("PATH" ":" prefix (,(assoc-ref inputs "augustus")
;,(assoc-ref inputs "genemark")
,(assoc-ref inputs "bamtools")
,(assoc-ref inputs "samtools")
,(assoc-ref inputs "prothint")
;,(assoc-ref inputs "genomethreader")
,(assoc-ref inputs "spaln")
,(assoc-ref inputs "exonerate")
,(assoc-ref inputs "ncbi-blast")
,(assoc-ref inputs "diamond")
,(assoc-ref inputs "cdbfasta")
,(assoc-ref inputs "gushr")
,(assoc-ref inputs "ucsc")))
;`("GENEMARK_PATH" "=" (,(string-append (assoc-ref inputs "genemark") "/bin")))
`("AUGUSTUS_BIN_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/bin")))
`("AUGUSTUS_AUGUSTUS_SCRIPTS_PATH" "=" (,(string-append (assoc-ref inputs "augustus") "/share/augustus")))
`("PYTHON3_PATH" "=" (,(string-append (assoc-ref inputs "python") "/bin")))
`("BAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "bamtools") "/bin")))
`("DIAMOND_PATH" "=" (,(string-append (assoc-ref inputs "diamond") "/bin")))
`("BLAST_PATH" "=" (,(string-append (assoc-ref inputs "blast+") "/bin")))
`("PROTHINT_PATH" "=" (,(string-append (assoc-ref inputs "prothint") "/bin")))
`("SAMTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "samtools") "/bin")))
`("CDBTOOLS_PATH" "=" (,(string-append (assoc-ref inputs "cdbfasta") "/bin")))
`("ALIGNMENT_TOOL_PATH" "=" (,(string-append (assoc-ref inputs "spaln") "/bin/spaln")
,(string-append (assoc-ref inputs "exonerate") "/bin/exonerate")))
;`("MAKEHUB_PATH" "=" (,(string-append (assoc-ref inputs "makehub") "/bin")))
)))))))
(inputs
`(
("guile" ,guile-3.0) ; for wrap-script
`(("guile" ,guile-3.0) ; for wrap-script
;("augustus" ,augustus)
("augustus" ,augustus-bio)
;("genemark" ,genemark)
("bamtools" ,bamtools)
("samtools" ,samtools)
@ -2498,10 +2495,10 @@ To run the bundled rtg-tools software you will also need java. The
("exonerate" ,exonerate)
("ncbi-blast" ,blast+)
("diamond" ,diamond)
("cbdfasta" ,cbdfasta) ; provides cdbfasta and cdbyank
;("gushr" ,gushr)
("cdbfasta" ,cdbfasta) ; provides cdbfasta and cdbyank
("gushr" ,gushr)
;("makehub" ,makehub)
;("ucsc" ,ucsc-genome-browser) ; provides bin/twoBitInfo and bin/faToTwoBit
("ucsc" ,ucsc-genome-browser) ; provides bin/twoBitInfo and bin/faToTwoBit
("perl" ,perl)
("perl-hash-merge" ,perl-hash-merge)
@ -2510,11 +2507,7 @@ To run the bundled rtg-tools software you will also need java. The
("perl-module-load-conditional" ,perl-module-load-conditional)
("perl-parallel-forkmanager" ,perl-parallel-forkmanager)
("perl-yaml" ,perl-yaml)
("python" ,python)
))
(native-inputs
`(
))
("python" ,python)))
(home-page "https://github.com/Gaius-Augustus/BRAKER")
(synopsis
"Pipeline for fully automated prediction of protein coding gene structures")
@ -2560,7 +2553,10 @@ RNA-Seq data.")
(delete-file "dependencies/spaln_boundary_scorer")))))
(build-system perl-build-system)
(arguments
`(#:tests? #f ; TODO: Test suite fails, or packaging is wrong?
`(#:modules ((srfi srfi-26)
(guix build perl-build-system)
(guix build utils))
#:tests? #f ; TODO: Test suite fails, or packaging is wrong?
#:phases
(modify-phases %standard-phases
(add-after 'unpack 'adjust-source
@ -2573,8 +2569,7 @@ RNA-Seq data.")
"/bin/spaln_boundary_scorer\""))
(("\\$binDir/\\.\\./dependencies/spaln_table\\\"")
(string-append (assoc-ref inputs "spaln")
"/share/spaln/table\""))
)))
"/share/spaln/table\"")))))
(delete 'configure)
(delete 'build)
(replace 'check
@ -2589,21 +2584,20 @@ RNA-Seq data.")
(lambda* (#:key inputs outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin")))
(with-directory-excursion "scripts"
(for-each (cut install-file <> bin)
(find-files "bin" "\\.(pl|py|sh)$"))
(for-each
(cut wrap-script <>
`("PERL5LIB" ":" prefix (,(getenv "PERL5LIB"))))
(find-files bin "\\.pl$"))
(for-each
(cut wrap-script <>
`("PATH" ":" prefix (,(assoc-ref inputs "coreutils")
,(assoc-ref inputs "diamond")
;,(assoc-ref inputs "genemark")
,(assoc-ref inputs "grep")
,(assoc-ref inputs "spaln"))))
(find-files bin "\\.py$")))))))))
(for-each (cut install-file <> bin)
(find-files "bin" "\\.(pl|py|sh)$"))
(for-each
(cut wrap-script <>
`("PERL5LIB" ":" prefix (,(getenv "PERL5LIB"))))
(find-files bin "\\.pl$"))
(for-each
(cut wrap-script <>
`("PATH" ":" prefix (,(assoc-ref inputs "coreutils")
,(assoc-ref inputs "diamond")
;,(assoc-ref inputs "genemark")
,(assoc-ref inputs "grep")
,(assoc-ref inputs "spaln"))))
(find-files bin "\\.py$"))))))))
(inputs
`(("guile" ,guile-3.0) ; for wrap-script
@ -2842,3 +2836,200 @@ are scored based on local alignment quality around their boundaries.")
(supported-systems '("x86_64-linux"))
;; Licensee may use the Product solely for Licensee's own internal research purposes.
(license license:non-copyleft)))
;; Name collision with augustus in games.
(define-public augustus-bio
(package
(name "augustus-bio")
(version "3.4.0")
(source
(origin
(method url-fetch)
(uri (string-append "https://github.com/Gaius-Augustus/Augustus"
"/releases/download/v" version
"/augustus-" version ".tar.gz"))
(sha256
(base32
"1j0ny6v8v3hsk76w4f4vvcqzpjjn8qfvp5q1mdanglb2g1rp4rr4"))
(modules '((guix build utils)))
(snippet
'(begin
(for-each delete-file
(append
'("auxprogs/aln2wig/aln2wig"
"auxprogs/bam2hints/bam2hints"
"auxprogs/bam2wig/bam2wig"
"auxprogs/compileSpliceCands/compileSpliceCands"
"auxprogs/filterBam/data/BAMseek2011July24.jar"
"auxprogs/homGeneMapping/src/homGeneMapping"
"auxprogs/joingenes/joingenes"
"auxprogs/utrrnaseq/Debug/utrrnaseq")
(find-files "." "\\.pdf$")
(find-files "mansrc" "\\.1$")))
(for-each delete-file-recursively
'("bin"
"tests/__pycache__"
"tests/examples/__pycache__"
"tests/examples_test/__pycache__"
"tests/utils/__pycache__"))))))
(build-system gnu-build-system)
(arguments
`(#:test-target "unit_test"
#:make-flags (list (string-append "CC = " ,(cc-for-target)))
#:phases
(modify-phases %standard-phases
(delete 'configure) ; No configure script.
(add-after 'unpack 'adjust-sources
(lambda* (#:key inputs #:allow-other-keys)
(substitute* "common.mk"
(("AUGVERSION =.*")
(string-append "AUGVERSION = ",version "\n")))
(substitute* '("src/Makefile"
"src/unittests/Makefile")
(("/usr/include/mysql ")
(string-append (assoc-ref inputs "mariadb:dev") "/include/mysql "))
(("/usr/include/mysql\\+\\+")
(string-append (assoc-ref inputs "mysql++") "/include/mysql++ "))
(("/usr/include/lpsolve")
(string-append (assoc-ref inputs "lpsolve") "/include/lpsolve")))
(substitute* '("auxprogs/bam2hints/Makefile"
"auxprogs/filterBam/src/Makefile")
(("/usr/include/bamtools")
(string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
(substitute* "auxprogs/bam2wig/Makefile"
(("/usr/include/htslib")
(string-append (assoc-ref inputs "htslib") "/include/htslib")))))
(add-after 'unpack 'remove-googletest
(lambda* (#:key inputs #:allow-other-keys)
(let ((gtest (assoc-ref inputs "googletest")))
(delete-file-recursively "src/googletest")
(substitute* "src/Makefile"
(("unittest: googletest") "unittest:"))
(substitute* "src/unittests/Makefile"
(("\\.\\./googletest/include") (string-append gtest "/include"))
(("-L\\.\\./googletest") (string-append "-L" gtest "/lib"))))))
;; This mimics 'make install'.
(replace 'install
(lambda* (#:key outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin"))
(scripts (string-append out "/share/augustus")))
(mkdir-p scripts)
(mkdir-p bin)
(copy-recursively "config" scripts)
(copy-recursively "bin" scripts)
(copy-recursively "scripts" scripts)
(for-each
(lambda (binary)
(symlink (string-append "../share/augustus/" binary)
(string-append bin "/" binary)))
'("augustus" "etraining" "prepareAlign" "fastBlockSearch"
"load2db" "getSeq")))))
(add-after 'install 'install-manpages
(lambda* (#:key outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(man1 (string-append out "/share/man/man1")))
(for-each
(lambda (adoc)
(invoke "asciidoctor"
"-a" "docdate=''"
"-b" "manpage"
(string-append "--destination-dir=" man1)
adoc))
(find-files "mansrc" "\\.adoc$"))))))))
(inputs
`(("bamtools" ,bamtools)
("boost" ,boost)
("gsl" ,gsl)
("htslib" ,htslib)
("lpsolve" ,lpsolve)
("mariadb:lib" ,mariadb "lib")
("mysql++" ,mysql++)
("perl" ,perl)
("python" ,python)
("sqlite" ,sqlite)
("suitesparse" ,suitesparse)
("zlib" ,zlib)))
(native-inputs
`(("asciidoctor" ,ruby-asciidoctor)
("googletest" ,googletest)
("mariadb:dev" ,mariadb "dev")))
(home-page "http://bioinf.uni-greifswald.de/webaugustus/")
(synopsis "Genome annotation with AUGUSTUS")
(description "AUGUSTUS is a gene prediction program. It can be used as an
ab initio program, which means it bases its prediction purely on the sequence.
AUGUSTUS may also incorporate hints on the gene structure coming from extrinsic
sources such as EST, MS/MS, protein alignments and syntenic genomic alignments.")
(license license:artistic2.0)))
;; TODO: Replace GoMoMa-1.6.2.jar with https://github.com/Jstacs/Jstacs
(define-public gushr
(let ((commit "ee26d5c7eee97170c6183089d57477b338a7bc4b")
(revision "1"))
(package
(name "gushr")
(version (git-version "0.0.0" revision commit))
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/Gaius-Augustus/GUSHR")
(commit commit)))
(file-name (git-file-name name version))
(sha256
(base32
"00alkahjzc6zgzfm7f6zcayjvgcs3kb90fb9v855gpqcxv3y1s40"))))
(build-system gnu-build-system)
(arguments
`(#:phases
(modify-phases %standard-phases
(delete 'configure) ; No configure script.
(delete 'build)
;; Override the test.sh file.
(replace 'check
(lambda* (#:key tests? parallel-tests? inputs #:allow-other-keys)
(when tests?
(with-directory-excursion "example"
(invoke "../gushr.py"
"--AUGUSTUS_SCRIPTS_PATH"
(string-append (assoc-ref inputs "augustus")
"/share/augustus")
"--bam" (assoc-ref inputs "RNAseq.bam")
"--gtf" "augustus.gtf"
"--genome" "genome.fa"
"--outfile_name_stem" "gushr"
"--cores" (if parallel-tests?
(number->string (parallel-job-count))
"1"))))))
(replace 'install
(lambda* (#:key inputs outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin")))
(install-file "gushr.py" bin)
(install-file "GeMoMa-1.6.2.jar" bin)
(wrap-script (string-append bin "/gushr.py")
`("PATH" ":" prefix (,(string-append (assoc-ref inputs "java")
"/bin")
,(string-append (assoc-ref inputs "samtools")
"/bin"))))))))))
(inputs
`(("guile" ,guile-3.0) ; for wrap-script
("augustus" ,augustus-bio)
("java" ,icedtea-8 "jdk")
("python" ,python)
("samtools" ,samtools)))
(native-inputs
`(("RNAseq.bam"
,(origin
(method url-fetch)
(uri "http://bioinf.uni-greifswald.de/bioinf/braker/RNAseq.bam")
(sha256
(base32
"1apmgh9irwhfnbpqwvcjasfs524g03i284bxbb8czdkqfsa74w47"))))))
(home-page "https://github.com/Gaius-Augustus/GUSHR")
(synopsis "Generating UTRs from SHort Reads")
(description "Assembly-free construction of UTRs from short read RNA-Seq
data on the basis of coding sequence annotation. This tool has been adapted to
the format needs of AUGUSTUS/BRAKER and employs GeMoMa for generating UTRs from
RNA-Seq coverage data.")
(license license:gpl3+))))

55
gn/packages/databases.scm

@ -2,6 +2,7 @@
#:use-module (gnu packages)
#:use-module (guix packages)
#:use-module (guix download)
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix utils)
#:use-module (guix build-system gnu)
#:use-module (guix build-system python)
@ -136,3 +137,57 @@
(inputs
`(("zlib" ,zlib)
,@(package-inputs base))))))
(define-public mysql++
(package
(name "mysql++")
(version "3.3.0")
(source
(origin
(method url-fetch)
(uri (string-append "https://tangentsoft.com/mysqlpp/releases"
"/mysql++-" version ".tar.gz"))
(sha256
(base32
"1kz7l1ngk649cpp2h1cnyqan9px8d50r0dk7kngwrhkcam3br724"))))
(build-system gnu-build-system)
(arguments
`(#:configure-flags
(list (string-append "--with-mysql-include="
(assoc-ref %build-inputs "mariadb:dev"))
(string-append "--with-mysql-lib="
(assoc-ref %build-inputs "mariadb:lib")))
#:phases
(modify-phases %standard-phases
;; It is unclear how to run the test suite so we just invoke the
;; compiled binaries which start with 'test_*'.
(replace 'check
(lambda* (#:key tests? #:allow-other-keys)
(when tests?
(setenv "LD_PRELOAD" "./libmysqlpp.so.3")
(and
(invoke "./test_array_index")
(invoke "./test_cpool")
(invoke "./test_datetime")
(invoke "./test_insertpolicy")
(invoke "./test_inttypes")
(invoke "./test_manip")
(invoke "./test_null_comparison")
(invoke "./test_qssqls")
(invoke "./test_qstream")
(invoke "./test_query_copy")
(invoke "./test_sqlstream")
(invoke "./test_ssqls2")
(invoke "./test_string")
;(invoke "./test_tcp") ; Requires TCP connection.
(invoke "./test_uds")
(invoke "./test_wnp")))
#t)))))
(inputs
`(("mariadb:dev" ,mariadb "dev")
("mariadb:lib" ,mariadb "lib")))
(home-page "https://tangentsoft.net/mysqlpp/")
(synopsis "MySQL C++ library bindings")
(description "MySQL++ is a complex C++ API for MySQL. The goal of this API
is to make working with Queries as easy as working with other STL containers.")
(license license:lgpl2.1+)))
Loading…
Cancel
Save