about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEfraim Flashner2022-06-21 12:22:39 +0300
committerEfraim Flashner2022-06-21 12:22:39 +0300
commitb00c31bbe50fb6e11951c086ca79e79a4beafa13 (patch)
treef95bc680a9d5f9603271942716b237e109a21cf5
parent86e0be138336fe2eafbb56ec3891a679b4ed4a0a (diff)
downloadguix-bioinformatics-b00c31bbe50fb6e11951c086ca79e79a4beafa13.tar.gz
gn: Add quast.
-rw-r--r--gn/packages/bioinformatics.scm130
-rw-r--r--quast.patch88
2 files changed, 218 insertions, 0 deletions
diff --git a/gn/packages/bioinformatics.scm b/gn/packages/bioinformatics.scm
index 2e577ee..8a6de69 100644
--- a/gn/packages/bioinformatics.scm
+++ b/gn/packages/bioinformatics.scm
@@ -31,6 +31,7 @@
   #:use-module (gnu packages bioconductor)
   #:use-module (gnu packages bioinformatics)
   #:use-module (gnu packages boost)
+  #:use-module (gnu packages bootstrap)
   #:use-module (gnu packages check)
   #:use-module (gnu packages cmake)
   #:use-module (gnu packages compression)
@@ -3297,6 +3298,135 @@ their chance of getting selected as minimizers.")
     ;; Rest of the code is public domain.
     (license license:expat)))
 
+(define-public quast
+  (package
+    (name "quast")
+    (version "5.2.0")
+    (source
+      (origin
+        (method url-fetch)
+        (uri (list (pypi-uri "quast" version)
+                   (string-append "https://github.com/ablab/quast"
+                                  "/releases/download/quast_" version
+                                  "/quast-" version ".tar.gz")))
+        (sha256
+         (base32 "1nz0lz7zgrhcirmm3xcn756f91a6bpww9npap3a4l9gsgh413nfc"))
+        (patches (search-patches "quast.patch"))
+        (snippet
+         #~(begin
+             (use-modules (guix build utils))
+             (with-directory-excursion "quast_libs"
+               (substitute* "run_busco.py"
+                 (("from quast_libs\\.busco import busco") "import busco"))
+               (delete-file-recursively "site_packages/joblib2")
+               (delete-file-recursively "site_packages/joblib3")
+               (delete-file-recursively "site_packages/simplejson")
+               (delete-file-recursively "minimap2")     ; Accepts minimap2 >= 2.19
+               ;; These packages are needed at runtime
+               (delete-file-recursively "bedtools")
+               (delete-file-recursively "bwa")
+               ;; These files are from python itself
+               (delete-file "site_packages/bz2.py")
+               (delete-file "site_packages/_bz2.py")
+               (delete-file "site_packages/_compression.py")
+               ;; Delete some pre-compiled binaries
+               (delete-file-recursively "barrnap/binaries/darwin")
+               (delete-file "barrnap/binaries/linux/nhmmer")
+               (delete-file "busco/hmmsearch")
+               (delete-file "sambamba/sambamba_linux")
+               (delete-file "sambamba/sambamba_osx")
+               ;; TODO:
+               ;(delete-file "barrnap/bin/barrnap")
+
+               ;; Genemark is a non-free, but available to academic
+               ;; institutions. Remove some of the bundled binaries.
+               (delete-file-recursively "genemark/linux_32")
+               (delete-file-recursively "genemark/macosx")
+               (delete-file-recursively "genemark-es/linux_32")
+               (delete-file-recursively "genemark-es/macosx"))))))
+    (build-system python-build-system)
+    (arguments
+     (list
+       #:phases
+       #~(modify-phases %standard-phases
+           (add-after 'unpack 'patchelf-genemark
+             (lambda* (#:key inputs #:allow-other-keys)
+               (let ((patchelf (search-input-file inputs "/bin/patchelf"))
+                     (ld-so    (search-input-file inputs #$(glibc-dynamic-linker)))
+                     (rpath    (dirname
+                                 (search-input-file inputs "/lib/libstdc++.so.6"))))
+                 (for-each (lambda (binary)
+                             (invoke patchelf "--set-interpreter" ld-so binary)
+                             (invoke patchelf "--set-rpath" rpath binary))
+                           (list "quast_libs/genemark/linux_64/gmhmmp"
+                                 "quast_libs/genemark/linux_64/probuild"
+                                 "quast_libs/genemark-es/linux_64/gmhmme3"
+                                 "quast_libs/genemark-es/linux_64/probuild")))))
+           (add-before 'build 'replace-bundled-binaries
+             (lambda* (#:key inputs #:allow-other-keys)
+               (substitute* "quast_libs/ca_utils/misc.py"
+                 (("join\\(qconfig.LIBS_LOCATION, 'minimap2'\\)")
+                  (string-append "'" (search-input-file inputs "/bin/minimap2") "'")))
+               (substitute* "./quast_libs/ra_utils/misc.py"
+                 (("join\\(sambamba_dirpath, fname \\+ platform_suffix\\)")
+                  (string-append "'" (search-input-file inputs "/bin/sambamba") "'"))
+                 (("join\\(qconfig.LIBS_LOCATION, 'bedtools', 'bin'\\)")
+                  (string-append
+                    "'" (dirname (search-input-file inputs "/bin/bedtools")) "'")))))
+           (add-after 'wrap 'wrap-more
+             (lambda* (#:key inputs outputs #:allow-other-keys)
+               (for-each
+                 (lambda (file)
+                   (wrap-program file
+                     `("PATH" ":" prefix
+                       ,(map (lambda (file-name)
+                               (string-append (assoc-ref inputs file-name) "/bin"))
+                             (list "bedtools"
+                                   "blast+"
+                                   "busco"
+                                   "bwa"
+                                   "hmmer"
+                                   "minimap2"
+                                   "sambamba")))))
+                 (find-files (string-append #$output "/bin") "\\.py$"))))
+           (replace 'check
+             (lambda* (#:key tests? inputs outputs #:allow-other-keys)
+               (when tests?
+                 (add-installed-pythonpath inputs outputs)
+                 (invoke "python" "setup.py" "test"))))
+           (delete 'strip))))       ; Can't strip genemark binaries.
+    (native-inputs
+     (list (list (canonical-package gcc) "lib") patchelf))
+    (inputs
+     (list python-joblib
+           python-matplotlib
+           python-simplejson
+           ;; And the non-python packages:
+           ;augustus
+           bash-minimal
+           bedtools
+           blast+
+           busco
+           bwa
+           hmmer
+           minimap2
+           perl
+           sambamba))
+    (home-page "http://quast.sourceforge.net/")
+    (synopsis "Genome assembly evaluation tool")
+    (description "QUAST stands for QUality ASsessment Tool.  It evaluates
+genome/metagenome assemblies by computing various metrics.  The current QUAST
+toolkit includes the general QUAST tool for genome assemblies, MetaQUAST, the
+extension for metagenomic datasets, QUAST-LG, the extension for large genomes
+(e.g., mammalians), and Icarus, the interactive visualizer for these tools.")
+    (supported-systems '("x86_64-linux"))   ; Due to bundled genemark
+    (license
+      (list license:gpl2    ; Main program
+            ;; Genemark (bundled) is free for non-commercial use by academic,
+            ;; government, and non-profit/not-for-profit institutions.
+            (license:non-copyleft
+              "http://topaz.gatech.edu/GeneMark/license_download.cgi")))))
+
 ;; TODO: Regenerate or remove docs folder.
 (define-public python-pixy
   (package
diff --git a/quast.patch b/quast.patch
new file mode 100644
index 0000000..31e5460
--- /dev/null
+++ b/quast.patch
@@ -0,0 +1,88 @@
+This patch is mostly adapted from the Debian patches
+https://salsa.debian.org/med-team/quast/-/tree/master/debian/patches
+
+diff --git a/setup.py b/setup.py
+index a982a430..24ab2f5f 100755
+--- a/setup.py
++++ b/setup.py
+@@ -35,10 +35,10 @@ except:
+                  exit_with_code=1)
+ 
+ from quast_libs.glimmer import compile_glimmer
+-from quast_libs.run_busco import download_augustus, download_all_db
+-from quast_libs.search_references_meta import download_blast_binaries, download_blastdb
++from quast_libs.run_busco import download_all_db
++from quast_libs.search_references_meta import download_blastdb
+ from quast_libs.ca_utils.misc import compile_aligner
+-from quast_libs.ra_utils.misc import compile_reads_analyzer_tools, compile_bwa, compile_bedtools, download_gridss
++from quast_libs.ra_utils.misc import compile_reads_analyzer_tools, download_gridss
+ 
+ name = 'quast'
+ quast_package = qconfig.PACKAGE_NAME
+@@ -60,8 +60,6 @@ if cmd_in(['clean', 'sdist']):
+     logger.info('Cleaning up binary files...')
+     compile_aligner(logger, only_clean=True)
+     compile_glimmer(logger, only_clean=True)
+-    compile_bwa(logger, only_clean=True)
+-    compile_bedtools(logger, only_clean=True)
+     for fpath in [fn for fn in glob(join(quast_package, '*.pyc'))]: os.remove(fpath)
+     for fpath in [fn for fn in glob(join(quast_package, 'html_saver', '*.pyc'))]: os.remove(fpath)
+     for fpath in [fn for fn in glob(join(quast_package, 'site_packages', '*', '*.pyc'))]: os.remove(fpath)
+@@ -74,10 +72,7 @@ if cmd_in(['clean', 'sdist']):
+         if isdir(name + '.egg-info'):
+             shutil.rmtree(name + '.egg-info')
+         download_gridss(logger, only_clean=True)
+-        download_blast_binaries(logger, only_clean=True)
+-        download_blastdb(logger, only_clean=True)
+         if qconfig.platform_name != 'macosx':
+-            download_augustus(logger, only_clean=True)
+             download_all_db(logger, only_clean=True)
+         logger.info('Done.')
+         sys.exit()
+@@ -168,9 +163,6 @@ if cmd_in(['install', 'develop', 'build', 'build_ext']):
+         logger.info('* Downloading GRIDSS *')
+         if not download_gridss(logger):
+             modules_failed_to_install.append('GRIDSS (affects -1/--reads1 and -2/--reads2 options)')
+-        logger.info('* Downloading BLAST *')
+-        if not download_blast_binaries(logger):
+-            modules_failed_to_install.append('BLAST (affects metaquast.py in without references mode and --find-conserved-genes option)')
+         logger.info('* Downloading SILVA 16S rRNA gene database *')
+         if not download_blastdb(logger):
+             modules_failed_to_install.append('SILVA 16S rRNA gene database (affects metaquast.py in without references mode)')
+@@ -186,20 +178,8 @@ if cmd_in(['install', 'develop', 'build', 'build_ext']):
+     logger.info('')
+ 
+ 
+-if qconfig.platform_name == 'macosx':
+-    sambamba_files = [join('sambamba', 'sambamba_osx')]
+-else:
+-    sambamba_files = [join('sambamba', 'sambamba_linux')]
+-
+-minimap_files = find_package_files('minimap2')
+-bwa_files = [
+-    join('bwa', fp) for fp in os.listdir(join(quast_package, 'bwa'))
+-    if isfile(join(quast_package, 'bwa', fp)) and fp.startswith('bwa')]
+-bedtools_files = [join('bedtools', 'bin', '*')]
+ full_install_tools = (
+-    find_package_files('gridss') +
+-    find_package_files('blast') +
+-    [join(quast_package, 'busco', 'hmmsearch')]
++    find_package_files('gridss')
+ )
+ 
+ setup(
+@@ -229,14 +209,10 @@ The tool accepts multiple assemblies, thus is suitable for comparison.''',
+             'manual.html',
+             ] +
+             find_package_files('html_saver') +
+-            minimap_files +
+             find_package_files('genemark/' + qconfig.platform_name) +
+             find_package_files('genemark-es/' + qconfig.platform_name) +
+             find_package_files('genemark-es/lib') +
+             find_package_files('glimmer') +
+-            bwa_files +
+-            bedtools_files +
+-            sambamba_files +
+            (full_install_tools if install_full else [])
+     },
+     include_package_data=True,