aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations
diff options
context:
space:
mode:
authorAlexander Kabui2021-06-20 09:11:03 +0300
committerAlexander Kabui2021-06-20 09:11:03 +0300
commit123ad47af288d6b94f0354a0abd5bc669bc988d4 (patch)
tree999eb41c1fab3d443215c0e4a8533abecf64f9af /gn3/computations
parent75801d83c8302b48051d413490e6ce2a0b8ff01f (diff)
parentd653a635d0efd2291754c18f51d31f91a1c0a25c (diff)
downloadgenenetwork3-123ad47af288d6b94f0354a0abd5bc669bc988d4.tar.gz
merge main
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/correlations.py26
-rw-r--r--gn3/computations/rqtl.py103
2 files changed, 116 insertions, 13 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index eae7ae4..bc738a7 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -69,8 +69,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value
"spearman": scipy.stats.spearmanr
}
use_corr_method = corr_mapping.get(corr_method, "spearman")
- corr_coeffient, p_val = use_corr_method(primary_values, target_values)
- return (corr_coeffient, p_val)
+ corr_coefficient, p_val = use_corr_method(primary_values, target_values)
+ return (corr_coefficient, p_val)
def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
@@ -85,13 +85,13 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
if num_overlap > 5:
- (corr_coeffient, p_value) =\
+ (corr_coefficient, p_value) =\
compute_corr_coeff_p_value(primary_values=sanitized_traits_vals,
target_values=sanitized_target_vals,
corr_method=corr_method)
- if corr_coeffient is not None:
- return (trait_name, corr_coeffient, p_value, num_overlap)
+ if corr_coefficient is not None:
+ return (trait_name, corr_coefficient, p_value, num_overlap)
return None
@@ -145,10 +145,10 @@ def compute_all_sample_correlation(this_trait,
for sample_correlation in results:
if sample_correlation is not None:
- (trait_name, corr_coeffient, p_value,
+ (trait_name, corr_coefficient, p_value,
num_overlap) = sample_correlation
corr_result = {
- "corr_coeffient": corr_coeffient,
+ "corr_coefficient": corr_coefficient,
"p_value": p_value,
"num_overlap": num_overlap
}
@@ -156,7 +156,7 @@ def compute_all_sample_correlation(this_trait,
corr_results.append({trait_name: corr_result})
return sorted(
corr_results,
- key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"]))
+ key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"]))
def benchmark_compute_all_sample(this_trait,
@@ -179,12 +179,12 @@ def benchmark_compute_all_sample(this_trait,
trait_vals=this_vals,
target_samples_vals=target_vals)
if sample_correlation is not None:
- (trait_name, corr_coeffient,
+ (trait_name, corr_coefficient,
p_value, num_overlap) = sample_correlation
else:
continue
corr_result = {
- "corr_coeffient": corr_coeffient,
+ "corr_coefficient": corr_coefficient,
"p_value": p_value,
"num_overlap": num_overlap
}
@@ -200,20 +200,20 @@ def tissue_correlation_for_trait(
compute_corr_p_value: Callable = compute_corr_coeff_p_value) -> dict:
"""Given a primary tissue values for a trait and the target tissues values
compute the correlation_cooeff and p value the input required are arrays
- output -> List containing Dicts with corr_coefficient value,P_value and
+ output -> List containing Dicts with corr_coefficient value, P_value and
also the tissue numbers is len(primary) == len(target)
"""
# ax :todo assertion that length one one target tissue ==primary_tissue
- (tissue_corr_coeffient,
+ (tissue_corr_coefficient,
p_value) = compute_corr_p_value(primary_values=primary_tissue_vals,
target_values=target_tissues_values,
corr_method=corr_method)
tiss_corr_result = {trait_id: {
- "tissue_corr": tissue_corr_coeffient,
+ "tissue_corr": tissue_corr_coefficient,
"tissue_number": len(primary_tissue_vals),
"tissue_p_val": p_value}}
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
new file mode 100644
index 0000000..0433b3f
--- /dev/null
+++ b/gn3/computations/rqtl.py
@@ -0,0 +1,103 @@
+"""Procedures related rqtl computations"""
+import os
+from typing import Dict, List, Union
+
+import numpy as np
+
+from flask import current_app
+
+from gn3.commands import compose_rqtl_cmd
+from gn3.computations.gemma import generate_hash_of_string
+from gn3.fs_helpers import get_hash_of_files
+
+def generate_rqtl_cmd(rqtl_wrapper_cmd: str,
+ rqtl_wrapper_kwargs: Dict,
+ rqtl_wrapper_bool_kwargs: list) -> Dict:
+ """Given the base rqtl_wrapper command and
+dict of keyword arguments, return the full rqtl_wrapper command and an
+output filename generated from a hash of the genotype and phenotype files
+
+ """
+
+ # Generate a hash from contents of the genotype and phenotype files
+ _hash = get_hash_of_files(
+ [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]])
+
+ # Append to hash a hash of keyword arguments
+ _hash += generate_hash_of_string(
+ ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]]))
+
+ # Append to hash a hash of boolean keyword arguments
+ _hash += generate_hash_of_string(
+ ",".join(rqtl_wrapper_bool_kwargs))
+
+ # Temporarily substitute forward-slashes in hash with underscores
+ _hash = _hash.replace("/", "_")
+
+ _output_filename = f"{_hash}-output.csv"
+ rqtl_wrapper_kwargs["filename"] = _output_filename
+
+ return {
+ "output_file":
+ _output_filename,
+ "rqtl_cmd":
+ compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd,
+ rqtl_wrapper_kwargs=rqtl_wrapper_kwargs,
+ rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs)
+ }
+
+
+def process_rqtl_output(file_name: str) -> List:
+ """Given an output file name, read in R/qtl results and return
+ a List of marker objects
+
+ """
+ marker_obs = []
+ # Later I should probably redo this using csv.read to avoid the
+ # awkwardness with removing quotes with [1:-1]
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", file_name), "r") as the_file:
+ for line in the_file:
+ line_items = line.split(",")
+ if line_items[1][1:-1] == "chr" or not line_items:
+ # Skip header line
+ continue
+
+ # Convert chr to int if possible
+ the_chr: Union[int, str]
+ try:
+ the_chr = int(line_items[1][1:-1])
+ except ValueError:
+ the_chr = line_items[1][1:-1]
+ this_marker = {
+ "name": line_items[0][1:-1],
+ "chr": the_chr,
+ "cM": float(line_items[2]),
+ "Mb": float(line_items[2]),
+ "lod_score": float(line_items[3])
+ }
+ marker_obs.append(this_marker)
+
+ return marker_obs
+
+
+def process_perm_output(file_name: str):
+ """Given base filename, read in R/qtl permutation output and calculate
+ suggestive and significant thresholds
+
+ """
+ perm_results = []
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", "PERM_" + file_name), "r") as the_file:
+ for i, line in enumerate(the_file):
+ if i == 0:
+ # Skip header line
+ continue
+
+ line_items = line.split(",")
+ perm_results.append(float(line_items[1]))
+
+ suggestive = np.percentile(np.array(perm_results), 67)
+ significant = np.percentile(np.array(perm_results), 95)
+
+ return perm_results, suggestive, significant