aboutsummaryrefslogtreecommitdiff
path: root/gn3
diff options
context:
space:
mode:
Diffstat (limited to 'gn3')
-rw-r--r--gn3/computations/heatmap.py68
-rw-r--r--gn3/computations/qtlreaper.py92
-rw-r--r--gn3/db/genotypes.py69
-rw-r--r--gn3/settings.py7
4 files changed, 229 insertions, 7 deletions
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
index 3c35029..e0ff05b 100644
--- a/gn3/computations/heatmap.py
+++ b/gn3/computations/heatmap.py
@@ -149,22 +149,22 @@ def heatmap_data(formd, search_result, conn: Any):
def __retrieve_traitlist_and_datalist(threshold, fullname):
trait = retrieve_trait_info(threshold, fullname, conn)
- return (
- trait,
- export_trait_data(retrieve_trait_data(trait, conn), strainlist))
+ return (trait, retrieve_trait_data(trait, conn))
traits_details = [
__retrieve_traitlist_and_datalist(threshold, fullname)
for fullname in search_result]
- traits_list = map(lambda x: x[0], traits_details)
- traits_data_list = map(lambda x: x[1], traits_details)
+ traits_list = tuple(x[0] for x in traits_details)
+ traits_data_list = [x[1] for x in traits_details]
+ exported_traits_data_list = tuple(
+ export_trait_data(td, strainlist) for td in traits_data_list)
return {
"target_description_checked": formd.formdata.getvalue(
"targetDescriptionCheck", ""),
"cluster_checked": cluster_checked,
"slink_data": (
- slink(cluster_traits(traits_data_list))
+ slink(cluster_traits(exported_traits_data_list))
if cluster_checked else False),
"sessionfile": formd.formdata.getvalue("session"),
"genotype": genotype,
@@ -173,5 +173,59 @@ def heatmap_data(formd, search_result, conn: Any):
"ppolar": formd.ppolar,
"mpolar":formd.mpolar,
"traits_list": traits_list,
- "traits_data_list": traits_data_list
+ "traits_data_list": traits_data_list,
+ "exported_traits_data_list": exported_traits_data_list
}
+
+def compute_heatmap_order(
+ slink_data, xoffset: int = 40, neworder: tuple = tuple()):
+ """
+ Compute the data used for drawing the heatmap proper from `slink_data`.
+
+ This function tries to reproduce the creation and update of the `neworder`
+ variable in
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
+ and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
+ """
+ d_1 = (0, 0, 0) # returned from self.draw in lines 391 and 399. This is just a placeholder
+
+ def __order_maker(norder, slnk_dt):
+ if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
+ return norder + (
+ (xoffset+20, slnk_dt[0]), (xoffset + 40, slnk_dt[1]))
+
+ if isinstance(slnk_dt[0], int):
+ return norder + ((xoffset + 20, slnk_dt[0]), )
+
+ if isinstance(slnk_dt[1], int):
+ return norder + ((xoffset + d_1[0] + 20, slnk_dt[1]), )
+
+ return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
+
+ return __order_maker(neworder, slink_data)
+
+def retrieve_strains_and_values(orders, strainlist, traits_data_list):
+ """
+ Get the strains and their corresponding values from `strainlist` and
+ `traits_data_list`.
+
+ This migrates the code in
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
+ """
+ # This feels nasty! There's a lot of mutation of values here, that might
+ # indicate something untoward in the design of this function and its
+ # dependents ==> Review
+ strains = []
+ values = []
+ rets = []
+ for order in orders:
+ temp_val = traits_data_list[order[1]]
+ for i, strain in enumerate(strainlist):
+ if temp_val[i] is not None:
+ strains.append(strain)
+ values.append(temp_val[i])
+ rets.append([order, strains[:], values[:]])
+ strains = []
+ values = []
+
+ return rets
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
new file mode 100644
index 0000000..c058e14
--- /dev/null
+++ b/gn3/computations/qtlreaper.py
@@ -0,0 +1,92 @@
+"""
+This module contains functions to interact with the `qtlreaper` utility for
+computation of QTLs.
+"""
+import os
+import random
+import string
+import subprocess
+from gn3.settings import TMPDIR, REAPER_COMMAND
+
+def random_string(length):
+ """Generate a random string of length `length`."""
+ return "".join(
+ random.choices(
+ string.ascii_letters + string.digits, k=length))
+
+def generate_traits_file(strains, trait_values, traits_filename):
+ """
+ Generate a traits file for use with `qtlreaper`.
+
+ PARAMETERS:
+ strains: A list of strains to use as the headers for the various columns.
+ trait_values: A list of lists of values for each trait and strain.
+ traits_filename: The tab-separated value to put the values in for
+ computation of QTLs.
+ """
+ header = "Trait\t{}\n".format("\t".join(strains))
+ data = [header] + [
+ "T{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+ for i, t in enumerate(trait_values[:-1])] + [
+ "T{}\t{}".format(len(trait_values), "\t".join([str(i) for i in t]))
+ for t in trait_values[-1:]]
+ with open(traits_filename, "w") as outfile:
+ outfile.writelines(data)
+
+def create_output_directory(path: str):
+ """Create the output directory at `path` if it does not exist."""
+ try:
+ os.mkdir(path)
+ except OSError:
+ pass
+
+def run_reaper(
+ genotype_filename: str, traits_filename: str,
+ other_options: tuple = ("--n_permutations", "1000"),
+ separate_nperm_output: bool = False,
+ output_dir: str = TMPDIR):
+ """
+ Run the QTLReaper command to compute the QTLs.
+
+ PARAMETERS:
+ genotype_filename: The complete path to a genotype file to use in the QTL
+ computation.
+ traits_filename: A path to a file previously generated with the
+ `generate_traits_file` function in this module, to be used in the QTL
+ computation.
+ other_options: Other options to pass to the `qtlreaper` command to modify
+ the QTL computations.
+ separate_nperm_output: A flag indicating whether or not to provide a
+ separate output for the permutations computation. The default is False,
+ which means by default, no separate output file is created.
+ output_dir: A path to the directory where the outputs are put
+
+ RETURNS:
+ The function returns a tuple of the main output file, and the output file
+ for the permutation computations. If the `separate_nperm_output` is `False`,
+ the second value in the tuple returned is `None`.
+
+ RAISES:
+ The function will raise a `subprocess.CalledProcessError` exception in case
+ of any errors running the `qtlreaper` command.
+ """
+ create_output_directory("{}/qtlreaper".format(output_dir))
+ output_filename = "{}/qtlreaper/main_output_{}.txt".format(
+ output_dir, random_string(10))
+ output_list = ["--main_output", output_filename]
+ if separate_nperm_output:
+ permu_output_filename = "{}/qtlreaper/permu_output_{}.txt".format(
+ output_dir, random_string(10))
+ output_list = output_list + ["--permu_output", permu_output_filename]
+ else:
+ permu_output_filename = None
+
+ command_list = [
+ REAPER_COMMAND, "--geno", genotype_filename,
+ *other_options, # this splices the `other_options` list here
+ "--traits", traits_filename,
+ *output_list # this splices the `output_list` list here
+ ]
+
+ subprocess.run(command_list, check=True)
+ return (output_filename, permu_output_filename)
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
new file mode 100644
index 0000000..610ddde
--- /dev/null
+++ b/gn3/db/genotypes.py
@@ -0,0 +1,69 @@
+"""Genotype utilities"""
+
+import os
+import gzip
+from gn3.settings import GENOTYPE_FILES
+
+def build_genotype_file(
+ geno_name: str, base_dir: str = GENOTYPE_FILES,
+ extension: str = "geno"):
+ """Build the absolute path for the genotype file."""
+ return "{}/{}.{}".format(os.path.abspath(base_dir), geno_name, extension)
+
+def load_genotype_samples(genotype_filename: str, file_type: str = "geno"):
+ """
+ Load sample of strains from genotype files.
+
+ DESCRIPTION:
+ Traits can contain a varied number of strains, some of which do not exist in
+ certain genotypes. In order to compute QTLs, GEMMAs, etc, we need to ensure
+ to pick only those strains that exist in the genotype under consideration
+ for the traits used in the computation.
+
+ This function loads a list of samples from the genotype files for use in
+ filtering out unusable strains.
+
+
+ PARAMETERS:
+ genotype_filename: The absolute path to the genotype file to load the
+ samples from.
+ file_type: The type of file. Currently supported values are 'geno' and
+ 'plink'.
+ """
+ file_type_fns = {
+ "geno": __load_genotype_samples_from_geno,
+ "plink": __load_genotype_samples_from_plink
+ }
+ return file_type_fns[file_type](genotype_filename)
+
+def __load_genotype_samples_from_geno(genotype_filename: str):
+ """
+ Helper function for `load_genotype_samples` function.
+
+ Loads samples from '.geno' files.
+ """
+ gzipped_filename = "{}.gz".format(genotype_filename)
+ if os.path.isfile(gzipped_filename):
+ genofile = gzip.open(gzipped_filename)
+ else:
+ genofile = open(genotype_filename)
+
+ for row in genofile:
+ line = row.strip()
+ if (not line) or (line.startswith(("#", "@"))):
+ continue
+ break
+
+ headers = line.split("\t")
+ if headers[3] == "Mb":
+ return headers[4:]
+ return headers[3:]
+
+def __load_genotype_samples_from_plink(genotype_filename: str):
+ """
+ Helper function for `load_genotype_samples` function.
+
+ Loads samples from '.plink' files.
+ """
+ genofile = open(genotype_filename)
+ return [line.split(" ")[1] for line in genofile]
diff --git a/gn3/settings.py b/gn3/settings.py
index f4866d5..a08f846 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -24,3 +24,10 @@ GN2_BASE_URL = "http://www.genenetwork.org/"
# biweight script
BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R"
+
+# qtlreaper command
+REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT"))
+
+# genotype files
+GENOTYPE_FILES = os.environ.get(
+ "GENOTYPE_FILES", "{}/genotype_files/genotype".format(os.environ.get("HOME")))