From b00c31bbe50fb6e11951c086ca79e79a4beafa13 Mon Sep 17 00:00:00 2001 From: Efraim Flashner Date: Tue, 21 Jun 2022 12:22:39 +0300 Subject: gn: Add quast. --- gn/packages/bioinformatics.scm | 130 +++++++++++++++++++++++++++++++++++++++++ quast.patch | 88 ++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 quast.patch diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm index 2e577ee..8a6de69 100644 --- a/gn/packages/bioinformatics.scm +++ b/gn/packages/bioinformatics.scm @@ -31,6 +31,7 @@ #:use-module (gnu packages bioconductor) #:use-module (gnu packages bioinformatics) #:use-module (gnu packages boost) + #:use-module (gnu packages bootstrap) #:use-module (gnu packages check) #:use-module (gnu packages cmake) #:use-module (gnu packages compression) @@ -3297,6 +3298,135 @@ their chance of getting selected as minimizers.") ;; Rest of the code is public domain. (license license:expat))) +(define-public quast + (package + (name "quast") + (version "5.2.0") + (source + (origin + (method url-fetch) + (uri (list (pypi-uri "quast" version) + (string-append "https://github.com/ablab/quast" + "/releases/download/quast_" version + "/quast-" version ".tar.gz"))) + (sha256 + (base32 "1nz0lz7zgrhcirmm3xcn756f91a6bpww9npap3a4l9gsgh413nfc")) + (patches (search-patches "quast.patch")) + (snippet + #~(begin + (use-modules (guix build utils)) + (with-directory-excursion "quast_libs" + (substitute* "run_busco.py" + (("from quast_libs\\.busco import busco") "import busco")) + (delete-file-recursively "site_packages/joblib2") + (delete-file-recursively "site_packages/joblib3") + (delete-file-recursively "site_packages/simplejson") + (delete-file-recursively "minimap2") ; Accepts minimap2 >= 2.19 + ;; These packages are needed at runtime + (delete-file-recursively "bedtools") + (delete-file-recursively "bwa") + ;; These files are from python itself + (delete-file "site_packages/bz2.py") + (delete-file "site_packages/_bz2.py") + (delete-file "site_packages/_compression.py") + ;; Delete some pre-compiled binaries + (delete-file-recursively "barrnap/binaries/darwin") + (delete-file "barrnap/binaries/linux/nhmmer") + (delete-file "busco/hmmsearch") + (delete-file "sambamba/sambamba_linux") + (delete-file "sambamba/sambamba_osx") + ;; TODO: + ;(delete-file "barrnap/bin/barrnap") + + ;; Genemark is a non-free, but available to academic + ;; institutions. Remove some of the bundled binaries. + (delete-file-recursively "genemark/linux_32") + (delete-file-recursively "genemark/macosx") + (delete-file-recursively "genemark-es/linux_32") + (delete-file-recursively "genemark-es/macosx")))))) + (build-system python-build-system) + (arguments + (list + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'patchelf-genemark + (lambda* (#:key inputs #:allow-other-keys) + (let ((patchelf (search-input-file inputs "/bin/patchelf")) + (ld-so (search-input-file inputs #$(glibc-dynamic-linker))) + (rpath (dirname + (search-input-file inputs "/lib/libstdc++.so.6")))) + (for-each (lambda (binary) + (invoke patchelf "--set-interpreter" ld-so binary) + (invoke patchelf "--set-rpath" rpath binary)) + (list "quast_libs/genemark/linux_64/gmhmmp" + "quast_libs/genemark/linux_64/probuild" + "quast_libs/genemark-es/linux_64/gmhmme3" + "quast_libs/genemark-es/linux_64/probuild"))))) + (add-before 'build 'replace-bundled-binaries + (lambda* (#:key inputs #:allow-other-keys) + (substitute* "quast_libs/ca_utils/misc.py" + (("join\\(qconfig.LIBS_LOCATION, 'minimap2'\\)") + (string-append "'" (search-input-file inputs "/bin/minimap2") "'"))) + (substitute* "./quast_libs/ra_utils/misc.py" + (("join\\(sambamba_dirpath, fname \\+ platform_suffix\\)") + (string-append "'" (search-input-file inputs "/bin/sambamba") "'")) + (("join\\(qconfig.LIBS_LOCATION, 'bedtools', 'bin'\\)") + (string-append + "'" (dirname (search-input-file inputs "/bin/bedtools")) "'"))))) + (add-after 'wrap 'wrap-more + (lambda* (#:key inputs outputs #:allow-other-keys) + (for-each + (lambda (file) + (wrap-program file + `("PATH" ":" prefix + ,(map (lambda (file-name) + (string-append (assoc-ref inputs file-name) "/bin")) + (list "bedtools" + "blast+" + "busco" + "bwa" + "hmmer" + "minimap2" + "sambamba"))))) + (find-files (string-append #$output "/bin") "\\.py$")))) + (replace 'check + (lambda* (#:key tests? inputs outputs #:allow-other-keys) + (when tests? + (add-installed-pythonpath inputs outputs) + (invoke "python" "setup.py" "test")))) + (delete 'strip)))) ; Can't strip genemark binaries. + (native-inputs + (list (list (canonical-package gcc) "lib") patchelf)) + (inputs + (list python-joblib + python-matplotlib + python-simplejson + ;; And the non-python packages: + ;augustus + bash-minimal + bedtools + blast+ + busco + bwa + hmmer + minimap2 + perl + sambamba)) + (home-page "http://quast.sourceforge.net/") + (synopsis "Genome assembly evaluation tool") + (description "QUAST stands for QUality ASsessment Tool. It evaluates +genome/metagenome assemblies by computing various metrics. The current QUAST +toolkit includes the general QUAST tool for genome assemblies, MetaQUAST, the +extension for metagenomic datasets, QUAST-LG, the extension for large genomes +(e.g., mammalians), and Icarus, the interactive visualizer for these tools.") + (supported-systems '("x86_64-linux")) ; Due to bundled genemark + (license + (list license:gpl2 ; Main program + ;; Genemark (bundled) is free for non-commercial use by academic, + ;; government, and non-profit/not-for-profit institutions. + (license:non-copyleft + "http://topaz.gatech.edu/GeneMark/license_download.cgi"))))) + ;; TODO: Regenerate or remove docs folder. (define-public python-pixy (package diff --git a/quast.patch b/quast.patch new file mode 100644 index 0000000..31e5460 --- /dev/null +++ b/quast.patch @@ -0,0 +1,88 @@ +This patch is mostly adapted from the Debian patches +https://salsa.debian.org/med-team/quast/-/tree/master/debian/patches + +diff --git a/setup.py b/setup.py +index a982a430..24ab2f5f 100755 +--- a/setup.py ++++ b/setup.py +@@ -35,10 +35,10 @@ except: + exit_with_code=1) + + from quast_libs.glimmer import compile_glimmer +-from quast_libs.run_busco import download_augustus, download_all_db +-from quast_libs.search_references_meta import download_blast_binaries, download_blastdb ++from quast_libs.run_busco import download_all_db ++from quast_libs.search_references_meta import download_blastdb + from quast_libs.ca_utils.misc import compile_aligner +-from quast_libs.ra_utils.misc import compile_reads_analyzer_tools, compile_bwa, compile_bedtools, download_gridss ++from quast_libs.ra_utils.misc import compile_reads_analyzer_tools, download_gridss + + name = 'quast' + quast_package = qconfig.PACKAGE_NAME +@@ -60,8 +60,6 @@ if cmd_in(['clean', 'sdist']): + logger.info('Cleaning up binary files...') + compile_aligner(logger, only_clean=True) + compile_glimmer(logger, only_clean=True) +- compile_bwa(logger, only_clean=True) +- compile_bedtools(logger, only_clean=True) + for fpath in [fn for fn in glob(join(quast_package, '*.pyc'))]: os.remove(fpath) + for fpath in [fn for fn in glob(join(quast_package, 'html_saver', '*.pyc'))]: os.remove(fpath) + for fpath in [fn for fn in glob(join(quast_package, 'site_packages', '*', '*.pyc'))]: os.remove(fpath) +@@ -74,10 +72,7 @@ if cmd_in(['clean', 'sdist']): + if isdir(name + '.egg-info'): + shutil.rmtree(name + '.egg-info') + download_gridss(logger, only_clean=True) +- download_blast_binaries(logger, only_clean=True) +- download_blastdb(logger, only_clean=True) + if qconfig.platform_name != 'macosx': +- download_augustus(logger, only_clean=True) + download_all_db(logger, only_clean=True) + logger.info('Done.') + sys.exit() +@@ -168,9 +163,6 @@ if cmd_in(['install', 'develop', 'build', 'build_ext']): + logger.info('* Downloading GRIDSS *') + if not download_gridss(logger): + modules_failed_to_install.append('GRIDSS (affects -1/--reads1 and -2/--reads2 options)') +- logger.info('* Downloading BLAST *') +- if not download_blast_binaries(logger): +- modules_failed_to_install.append('BLAST (affects metaquast.py in without references mode and --find-conserved-genes option)') + logger.info('* Downloading SILVA 16S rRNA gene database *') + if not download_blastdb(logger): + modules_failed_to_install.append('SILVA 16S rRNA gene database (affects metaquast.py in without references mode)') +@@ -186,20 +178,8 @@ if cmd_in(['install', 'develop', 'build', 'build_ext']): + logger.info('') + + +-if qconfig.platform_name == 'macosx': +- sambamba_files = [join('sambamba', 'sambamba_osx')] +-else: +- sambamba_files = [join('sambamba', 'sambamba_linux')] +- +-minimap_files = find_package_files('minimap2') +-bwa_files = [ +- join('bwa', fp) for fp in os.listdir(join(quast_package, 'bwa')) +- if isfile(join(quast_package, 'bwa', fp)) and fp.startswith('bwa')] +-bedtools_files = [join('bedtools', 'bin', '*')] + full_install_tools = ( +- find_package_files('gridss') + +- find_package_files('blast') + +- [join(quast_package, 'busco', 'hmmsearch')] ++ find_package_files('gridss') + ) + + setup( +@@ -229,14 +209,10 @@ The tool accepts multiple assemblies, thus is suitable for comparison.''', + 'manual.html', + ] + + find_package_files('html_saver') + +- minimap_files + + find_package_files('genemark/' + qconfig.platform_name) + + find_package_files('genemark-es/' + qconfig.platform_name) + + find_package_files('genemark-es/lib') + + find_package_files('glimmer') + +- bwa_files + +- bedtools_files + +- sambamba_files + + (full_install_tools if install_full else []) + }, + include_package_data=True, -- cgit v1.2.3