aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEfraim Flashner2022-06-21 12:22:39 +0300
committerEfraim Flashner2022-06-21 12:22:39 +0300
commitb00c31bbe50fb6e11951c086ca79e79a4beafa13 (patch)
treef95bc680a9d5f9603271942716b237e109a21cf5
parent86e0be138336fe2eafbb56ec3891a679b4ed4a0a (diff)
downloadguix-bioinformatics-b00c31bbe50fb6e11951c086ca79e79a4beafa13.tar.gz
gn: Add quast.
-rw-r--r--gn/packages/bioinformatics.scm130
-rw-r--r--quast.patch88
2 files changed, 218 insertions, 0 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index 2e577ee..8a6de69 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -31,6 +31,7 @@
#:use-module (gnu packages bioconductor)
#:use-module (gnu packages bioinformatics)
#:use-module (gnu packages boost)
+ #:use-module (gnu packages bootstrap)
#:use-module (gnu packages check)
#:use-module (gnu packages cmake)
#:use-module (gnu packages compression)
@@ -3297,6 +3298,135 @@ their chance of getting selected as minimizers.")
;; Rest of the code is public domain.
(license license:expat)))
+(define-public quast
+ (package
+ (name "quast")
+ (version "5.2.0")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (list (pypi-uri "quast" version)
+ (string-append "https://github.com/ablab/quast"
+ "/releases/download/quast_" version
+ "/quast-" version ".tar.gz")))
+ (sha256
+ (base32 "1nz0lz7zgrhcirmm3xcn756f91a6bpww9npap3a4l9gsgh413nfc"))
+ (patches (search-patches "quast.patch"))
+ (snippet
+ #~(begin
+ (use-modules (guix build utils))
+ (with-directory-excursion "quast_libs"
+ (substitute* "run_busco.py"
+ (("from quast_libs\\.busco import busco") "import busco"))
+ (delete-file-recursively "site_packages/joblib2")
+ (delete-file-recursively "site_packages/joblib3")
+ (delete-file-recursively "site_packages/simplejson")
+ (delete-file-recursively "minimap2") ; Accepts minimap2 >= 2.19
+ ;; These packages are needed at runtime
+ (delete-file-recursively "bedtools")
+ (delete-file-recursively "bwa")
+ ;; These files are from python itself
+ (delete-file "site_packages/bz2.py")
+ (delete-file "site_packages/_bz2.py")
+ (delete-file "site_packages/_compression.py")
+ ;; Delete some pre-compiled binaries
+ (delete-file-recursively "barrnap/binaries/darwin")
+ (delete-file "barrnap/binaries/linux/nhmmer")
+ (delete-file "busco/hmmsearch")
+ (delete-file "sambamba/sambamba_linux")
+ (delete-file "sambamba/sambamba_osx")
+ ;; TODO:
+ ;(delete-file "barrnap/bin/barrnap")
+
+ ;; Genemark is a non-free, but available to academic
+ ;; institutions. Remove some of the bundled binaries.
+ (delete-file-recursively "genemark/linux_32")
+ (delete-file-recursively "genemark/macosx")
+ (delete-file-recursively "genemark-es/linux_32")
+ (delete-file-recursively "genemark-es/macosx"))))))
+ (build-system python-build-system)
+ (arguments
+ (list
+ #:phases
+ #~(modify-phases %standard-phases
+ (add-after 'unpack 'patchelf-genemark
+ (lambda* (#:key inputs #:allow-other-keys)
+ (let ((patchelf (search-input-file inputs "/bin/patchelf"))
+ (ld-so (search-input-file inputs #$(glibc-dynamic-linker)))
+ (rpath (dirname
+ (search-input-file inputs "/lib/libstdc++.so.6"))))
+ (for-each (lambda (binary)
+ (invoke patchelf "--set-interpreter" ld-so binary)
+ (invoke patchelf "--set-rpath" rpath binary))
+ (list "quast_libs/genemark/linux_64/gmhmmp"
+ "quast_libs/genemark/linux_64/probuild"
+ "quast_libs/genemark-es/linux_64/gmhmme3"
+ "quast_libs/genemark-es/linux_64/probuild")))))
+ (add-before 'build 'replace-bundled-binaries
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "quast_libs/ca_utils/misc.py"
+ (("join\\(qconfig.LIBS_LOCATION, 'minimap2'\\)")
+ (string-append "'" (search-input-file inputs "/bin/minimap2") "'")))
+ (substitute* "./quast_libs/ra_utils/misc.py"
+ (("join\\(sambamba_dirpath, fname \\+ platform_suffix\\)")
+ (string-append "'" (search-input-file inputs "/bin/sambamba") "'"))
+ (("join\\(qconfig.LIBS_LOCATION, 'bedtools', 'bin'\\)")
+ (string-append
+ "'" (dirname (search-input-file inputs "/bin/bedtools")) "'")))))
+ (add-after 'wrap 'wrap-more
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (for-each
+ (lambda (file)
+ (wrap-program file
+ `("PATH" ":" prefix
+ ,(map (lambda (file-name)
+ (string-append (assoc-ref inputs file-name) "/bin"))
+ (list "bedtools"
+ "blast+"
+ "busco"
+ "bwa"
+ "hmmer"
+ "minimap2"
+ "sambamba")))))
+ (find-files (string-append #$output "/bin") "\\.py$"))))
+ (replace 'check
+ (lambda* (#:key tests? inputs outputs #:allow-other-keys)
+ (when tests?
+ (add-installed-pythonpath inputs outputs)
+ (invoke "python" "setup.py" "test"))))
+ (delete 'strip)))) ; Can't strip genemark binaries.
+ (native-inputs
+ (list (list (canonical-package gcc) "lib") patchelf))
+ (inputs
+ (list python-joblib
+ python-matplotlib
+ python-simplejson
+ ;; And the non-python packages:
+ ;augustus
+ bash-minimal
+ bedtools
+ blast+
+ busco
+ bwa
+ hmmer
+ minimap2
+ perl
+ sambamba))
+ (home-page "http://quast.sourceforge.net/")
+ (synopsis "Genome assembly evaluation tool")
+ (description "QUAST stands for QUality ASsessment Tool. It evaluates
+genome/metagenome assemblies by computing various metrics. The current QUAST
+toolkit includes the general QUAST tool for genome assemblies, MetaQUAST, the
+extension for metagenomic datasets, QUAST-LG, the extension for large genomes
+(e.g., mammalians), and Icarus, the interactive visualizer for these tools.")
+ (supported-systems '("x86_64-linux")) ; Due to bundled genemark
+ (license
+ (list license:gpl2 ; Main program
+ ;; Genemark (bundled) is free for non-commercial use by academic,
+ ;; government, and non-profit/not-for-profit institutions.
+ (license:non-copyleft
+ "http://topaz.gatech.edu/GeneMark/license_download.cgi")))))
+
;; TODO: Regenerate or remove docs folder.
(define-public python-pixy
(package
diff --git a/quast.patch b/quast.patch
new file mode 100644
index 0000000..31e5460
--- /dev/null
+++ b/quast.patch
@@ -0,0 +1,88 @@
+This patch is mostly adapted from the Debian patches
+https://salsa.debian.org/med-team/quast/-/tree/master/debian/patches
+
+diff --git a/setup.py b/setup.py
+index a982a430..24ab2f5f 100755
+--- a/setup.py
++++ b/setup.py
+@@ -35,10 +35,10 @@ except:
+ exit_with_code=1)
+
+ from quast_libs.glimmer import compile_glimmer
+-from quast_libs.run_busco import download_augustus, download_all_db
+-from quast_libs.search_references_meta import download_blast_binaries, download_blastdb
++from quast_libs.run_busco import download_all_db
++from quast_libs.search_references_meta import download_blastdb
+ from quast_libs.ca_utils.misc import compile_aligner
+-from quast_libs.ra_utils.misc import compile_reads_analyzer_tools, compile_bwa, compile_bedtools, download_gridss
++from quast_libs.ra_utils.misc import compile_reads_analyzer_tools, download_gridss
+
+ name = 'quast'
+ quast_package = qconfig.PACKAGE_NAME
+@@ -60,8 +60,6 @@ if cmd_in(['clean', 'sdist']):
+ logger.info('Cleaning up binary files...')
+ compile_aligner(logger, only_clean=True)
+ compile_glimmer(logger, only_clean=True)
+- compile_bwa(logger, only_clean=True)
+- compile_bedtools(logger, only_clean=True)
+ for fpath in [fn for fn in glob(join(quast_package, '*.pyc'))]: os.remove(fpath)
+ for fpath in [fn for fn in glob(join(quast_package, 'html_saver', '*.pyc'))]: os.remove(fpath)
+ for fpath in [fn for fn in glob(join(quast_package, 'site_packages', '*', '*.pyc'))]: os.remove(fpath)
+@@ -74,10 +72,7 @@ if cmd_in(['clean', 'sdist']):
+ if isdir(name + '.egg-info'):
+ shutil.rmtree(name + '.egg-info')
+ download_gridss(logger, only_clean=True)
+- download_blast_binaries(logger, only_clean=True)
+- download_blastdb(logger, only_clean=True)
+ if qconfig.platform_name != 'macosx':
+- download_augustus(logger, only_clean=True)
+ download_all_db(logger, only_clean=True)
+ logger.info('Done.')
+ sys.exit()
+@@ -168,9 +163,6 @@ if cmd_in(['install', 'develop', 'build', 'build_ext']):
+ logger.info('* Downloading GRIDSS *')
+ if not download_gridss(logger):
+ modules_failed_to_install.append('GRIDSS (affects -1/--reads1 and -2/--reads2 options)')
+- logger.info('* Downloading BLAST *')
+- if not download_blast_binaries(logger):
+- modules_failed_to_install.append('BLAST (affects metaquast.py in without references mode and --find-conserved-genes option)')
+ logger.info('* Downloading SILVA 16S rRNA gene database *')
+ if not download_blastdb(logger):
+ modules_failed_to_install.append('SILVA 16S rRNA gene database (affects metaquast.py in without references mode)')
+@@ -186,20 +178,8 @@ if cmd_in(['install', 'develop', 'build', 'build_ext']):
+ logger.info('')
+
+
+-if qconfig.platform_name == 'macosx':
+- sambamba_files = [join('sambamba', 'sambamba_osx')]
+-else:
+- sambamba_files = [join('sambamba', 'sambamba_linux')]
+-
+-minimap_files = find_package_files('minimap2')
+-bwa_files = [
+- join('bwa', fp) for fp in os.listdir(join(quast_package, 'bwa'))
+- if isfile(join(quast_package, 'bwa', fp)) and fp.startswith('bwa')]
+-bedtools_files = [join('bedtools', 'bin', '*')]
+ full_install_tools = (
+- find_package_files('gridss') +
+- find_package_files('blast') +
+- [join(quast_package, 'busco', 'hmmsearch')]
++ find_package_files('gridss')
+ )
+
+ setup(
+@@ -229,14 +209,10 @@ The tool accepts multiple assemblies, thus is suitable for comparison.''',
+ 'manual.html',
+ ] +
+ find_package_files('html_saver') +
+- minimap_files +
+ find_package_files('genemark/' + qconfig.platform_name) +
+ find_package_files('genemark-es/' + qconfig.platform_name) +
+ find_package_files('genemark-es/lib') +
+ find_package_files('glimmer') +
+- bwa_files +
+- bedtools_files +
+- sambamba_files +
+ (full_install_tools if install_full else [])
+ },
+ include_package_data=True,