diff options
author | Muriithi Frederick Muriuki | 2021-08-30 07:00:38 +0300 |
---|---|---|
committer | Muriithi Frederick Muriuki | 2021-08-30 07:00:38 +0300 |
commit | 983acfdfc523677b4d7501287a000b7fd52a2c39 (patch) | |
tree | 92547cd368fd0f024d4aedf92b1c3dc62417e65d | |
parent | c045122908d36bba4ca197f3f67e89d80958f38f (diff) | |
download | genenetwork3-983acfdfc523677b4d7501287a000b7fd52a2c39.tar.gz |
Implement module for interfacing with rust-qtlreaper
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
* gn3/computations/heatmap.py: move `generate_traits_file` function to new
module
* gn3/computations/qtlreaper.py: new module to interface with the
`rust-qtlreaper` utility.
* gn3/settings.py: Provide setting for the path to the `rust-qtlreaper`
utility
* qtlfilesexport.py: Move `random_string` function to new module. Update to
use functions in new module.
Provide a module with functions to be used to interface with
`rust-qtlreaper`. This module essentially contains all the functions that
are needed to build the files needed for, and to run the qtlreaper utility.
-rw-r--r-- | gn3/computations/heatmap.py | 8 | ||||
-rw-r--r-- | gn3/computations/qtlreaper.py | 88 | ||||
-rw-r--r-- | gn3/settings.py | 3 | ||||
-rw-r--r-- | qtlfilesexport.py | 10 |
4 files changed, 92 insertions, 17 deletions
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py index 3e96ed2..dcd64b1 100644 --- a/gn3/computations/heatmap.py +++ b/gn3/computations/heatmap.py @@ -230,11 +230,3 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list): values = [] return rets - -def generate_traits_file(strains, trait_values, traits_filename): - header = "Traits\t{}\n".format("\t".join(strains)) - data = [header] + [ - "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t])) - for i,t in enumerate(trait_values)] - with open(traits_filename, "w") as outfile: - outfile.writelines(data) diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py new file mode 100644 index 0000000..49d363b --- /dev/null +++ b/gn3/computations/qtlreaper.py @@ -0,0 +1,88 @@ +""" +This module contains functions to interact with the `qtlreaper` utility for +computation of QTLs. +""" +import os +import random +import string +import subprocess +from gn3.settings import TMPDIR, REAPER_COMMAND + +def random_string(length): + """Generate a random string of length `length`.""" + return "".join( + random.choices( + string.ascii_letters + string.digits, k=length)) + +def generate_traits_file(strains, trait_values, traits_filename): + """ + Generate a traits file for use with `qtlreaper`. + + PARAMETERS: + strains: A list of strains to use as the headers for the various columns. + trait_values: A list of lists of values for each trait and strain. + traits_filename: The tab-separated value to put the values in for + computation of QTLs. + """ + header = "Traits\t{}\n".format("\t".join(strains)) + data = [header] + [ + "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t])) + for i, t in enumerate(trait_values)] + with open(traits_filename, "w") as outfile: + outfile.writelines(data) + +def create_output_directory(path: str): + """Create the output directory at `path` if it does not exist.""" + try: + os.mkdir(path) + except OSError: + pass + +def run_reaper( + genotype_filename: str, traits_filename: str, + other_options: tuple = ("--n_permutations", 1000), + separate_nperm_output: bool = False, + output_dir: str = TMPDIR): + """ + Run the QTLReaper command to compute the QTLs. + + PARAMETERS: + genotype_filename: The complete path to a genotype file to use in the QTL + computation. + traits_filename: A path to a file previously generated with the + `generate_traits_file` function in this module, to be used in the QTL + computation. + other_options: Other options to pass to the `qtlreaper` command to modify + the QTL computations. + separate_nperm_output: A flag indicating whether or not to provide a + separate output for the permutations computation. The default is False, + which means by default, no separate output file is created. + output_dir: A path to the directory where the outputs are put + + RETURNS: + The function returns a tuple of the main output file, and the output file + for the permutation computations. If the `separate_nperm_output` is `False`, + the second value in the tuple returned is `None`. + + RAISES: + The function will raise a `subprocess.CalledProcessError` exception in case + of any errors running the `qtlreaper` command. + """ + create_output_directory(output_dir) + output_filename = "{}/qtlreaper/main_output_{}.txt".format( + output_dir, random_string(10)) + output_list = ["--main_output", output_filename] + if separate_nperm_output: + permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format( + output_dir, random_string(10)) + output_list = output_list + ["--permu_output", permu_output_filename] + else: + permu_output_filename = None + + command_list = [ + REAPER_COMMAND, "--geno", genotype_filename, + *other_options, # this splices the `other_options` list here + "--traits", traits_filename, "--main_output", output_filename] + + subprocess.run(command_list, check=True) + return (output_filename, permu_output_filename) diff --git a/gn3/settings.py b/gn3/settings.py index f4866d5..d137370 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -24,3 +24,6 @@ GN2_BASE_URL = "http://www.genenetwork.org/" # biweight script BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R" + +# qtlreaper command +REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT")) diff --git a/qtlfilesexport.py b/qtlfilesexport.py index 2e7c9c2..0543dc9 100644 --- a/qtlfilesexport.py +++ b/qtlfilesexport.py @@ -7,16 +7,14 @@ Run with: replacing the variables in the angled brackets with the appropriate values """ -import random -import string from gn3.computations.slink import slink from gn3.db_utils import database_connector from gn3.computations.heatmap import export_trait_data from gn3.db.traits import retrieve_trait_data, retrieve_trait_info +from gn3.computations.qtlreaper import random_string, generate_traits_file from gn3.computations.heatmap import ( cluster_traits, compute_heatmap_order, - generate_traits_file, retrieve_strains_and_values) TMPDIR = "tmp/qtltests" @@ -35,11 +33,6 @@ def trait_fullnames(): "UCLA_BXDBXH_CARTILAGE_V2::ILM4200064", "UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"] -def random_string(length): - return "".join( - random.choices( - string.ascii_letters + string.digits, k=length)) - def main(): """entrypoint function""" conn = database_connector()[0] @@ -56,7 +49,6 @@ def main(): strains_and_values = retrieve_strains_and_values( orders, strains, exported_traits_data_list) strains_values = strains_and_values[0][1] - strains_values2 = strains_and_values[1][1] trait_values = [t[2] for t in strains_and_values] traits_filename = "{}/traits_test_file_{}.txt".format( TMPDIR, random_string(10)) |