aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-08-30 07:00:38 +0300
committerMuriithi Frederick Muriuki2021-08-30 07:00:38 +0300
commit983acfdfc523677b4d7501287a000b7fd52a2c39 (patch)
tree92547cd368fd0f024d4aedf92b1c3dc62417e65d
parentc045122908d36bba4ca197f3f67e89d80958f38f (diff)
downloadgenenetwork3-983acfdfc523677b4d7501287a000b7fd52a2c39.tar.gz
Implement module for interfacing with rust-qtlreaper
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/computations/heatmap.py: move `generate_traits_file` function to new module * gn3/computations/qtlreaper.py: new module to interface with the `rust-qtlreaper` utility. * gn3/settings.py: Provide setting for the path to the `rust-qtlreaper` utility * qtlfilesexport.py: Move `random_string` function to new module. Update to use functions in new module. Provide a module with functions to be used to interface with `rust-qtlreaper`. This module essentially contains all the functions that are needed to build the files needed for, and to run the qtlreaper utility.
-rw-r--r--gn3/computations/heatmap.py8
-rw-r--r--gn3/computations/qtlreaper.py88
-rw-r--r--gn3/settings.py3
-rw-r--r--qtlfilesexport.py10
4 files changed, 92 insertions, 17 deletions
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 3e96ed2..dcd64b1 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -230,11 +230,3 @@ def retrieve_strains_and_values(orders, strainlist, traits_data_list):
values = []
return rets
-
-def generate_traits_file(strains, trait_values, traits_filename):
- header = "Traits\t{}\n".format("\t".join(strains))
- data = [header] + [
- "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
- for i,t in enumerate(trait_values)]
- with open(traits_filename, "w") as outfile:
- outfile.writelines(data)
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
new file mode 100644
index 0000000..49d363b
--- /dev/null
+++ b/gn3/computations/qtlreaper.py
@@ -0,0 +1,88 @@
+"""
+This module contains functions to interact with the `qtlreaper` utility for
+computation of QTLs.
+"""
+import os
+import random
+import string
+import subprocess
+from gn3.settings import TMPDIR, REAPER_COMMAND
+
+def random_string(length):
+ """Generate a random string of length `length`."""
+ return "".join(
+ random.choices(
+ string.ascii_letters + string.digits, k=length))
+
+def generate_traits_file(strains, trait_values, traits_filename):
+ """
+ Generate a traits file for use with `qtlreaper`.
+
+ PARAMETERS:
+ strains: A list of strains to use as the headers for the various columns.
+ trait_values: A list of lists of values for each trait and strain.
+ traits_filename: The tab-separated value to put the values in for
+ computation of QTLs.
+ """
+ header = "Traits\t{}\n".format("\t".join(strains))
+ data = [header] + [
+ "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+ for i, t in enumerate(trait_values)]
+ with open(traits_filename, "w") as outfile:
+ outfile.writelines(data)
+
+def create_output_directory(path: str):
+ """Create the output directory at `path` if it does not exist."""
+ try:
+ os.mkdir(path)
+ except OSError:
+ pass
+
+def run_reaper(
+ genotype_filename: str, traits_filename: str,
+ other_options: tuple = ("--n_permutations", 1000),
+ separate_nperm_output: bool = False,
+ output_dir: str = TMPDIR):
+ """
+ Run the QTLReaper command to compute the QTLs.
+
+ PARAMETERS:
+ genotype_filename: The complete path to a genotype file to use in the QTL
+ computation.
+ traits_filename: A path to a file previously generated with the
+ `generate_traits_file` function in this module, to be used in the QTL
+ computation.
+ other_options: Other options to pass to the `qtlreaper` command to modify
+ the QTL computations.
+ separate_nperm_output: A flag indicating whether or not to provide a
+ separate output for the permutations computation. The default is False,
+ which means by default, no separate output file is created.
+ output_dir: A path to the directory where the outputs are put
+
+ RETURNS:
+ The function returns a tuple of the main output file, and the output file
+ for the permutation computations. If the `separate_nperm_output` is `False`,
+ the second value in the tuple returned is `None`.
+
+ RAISES:
+ The function will raise a `subprocess.CalledProcessError` exception in case
+ of any errors running the `qtlreaper` command.
+ """
+ create_output_directory(output_dir)
+ output_filename = "{}/qtlreaper/main_output_{}.txt".format(
+ output_dir, random_string(10))
+ output_list = ["--main_output", output_filename]
+ if separate_nperm_output:
+ permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format(
+ output_dir, random_string(10))
+ output_list = output_list + ["--permu_output", permu_output_filename]
+ else:
+ permu_output_filename = None
+
+ command_list = [
+ REAPER_COMMAND, "--geno", genotype_filename,
+ *other_options, # this splices the `other_options` list here
+ "--traits", traits_filename, "--main_output", output_filename]
+
+ subprocess.run(command_list, check=True)
+ return (output_filename, permu_output_filename)
diff --git a/gn3/settings.py b/gn3/settings.py
index f4866d5..d137370 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -24,3 +24,6 @@ GN2_BASE_URL = "http://www.genenetwork.org/"
# biweight script
BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R"
+
+# qtlreaper command
+REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT"))
diff --git a/qtlfilesexport.py b/qtlfilesexport.py
index 2e7c9c2..0543dc9 100644
--- a/qtlfilesexport.py
+++ b/qtlfilesexport.py
@@ -7,16 +7,14 @@ Run with:
replacing the variables in the angled brackets with the appropriate values
"""
-import random
-import string
from gn3.computations.slink import slink
from gn3.db_utils import database_connector
from gn3.computations.heatmap import export_trait_data
from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
+from gn3.computations.qtlreaper import random_string, generate_traits_file
from gn3.computations.heatmap import (
cluster_traits,
compute_heatmap_order,
- generate_traits_file,
retrieve_strains_and_values)
TMPDIR = "tmp/qtltests"
@@ -35,11 +33,6 @@ def trait_fullnames():
"UCLA_BXDBXH_CARTILAGE_V2::ILM4200064",
"UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"]
-def random_string(length):
- return "".join(
- random.choices(
- string.ascii_letters + string.digits, k=length))
-
def main():
"""entrypoint function"""
conn = database_connector()[0]
@@ -56,7 +49,6 @@ def main():
strains_and_values = retrieve_strains_and_values(
orders, strains, exported_traits_data_list)
strains_values = strains_and_values[0][1]
- strains_values2 = strains_and_values[1][1]
trait_values = [t[2] for t in strains_and_values]
traits_filename = "{}/traits_test_file_{}.txt".format(
TMPDIR, random_string(10))