diff options
Diffstat (limited to 'gn3/computations')
-rw-r--r-- | gn3/computations/correlations.py | 26 | ||||
-rw-r--r-- | gn3/computations/rqtl.py | 103 |
2 files changed, 116 insertions, 13 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index eae7ae4..bc738a7 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -69,8 +69,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value "spearman": scipy.stats.spearmanr } use_corr_method = corr_mapping.get(corr_method, "spearman") - corr_coeffient, p_val = use_corr_method(primary_values, target_values) - return (corr_coeffient, p_val) + corr_coefficient, p_val = use_corr_method(primary_values, target_values) + return (corr_coefficient, p_val) def compute_sample_r_correlation(trait_name, corr_method, trait_vals, @@ -85,13 +85,13 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, if num_overlap > 5: - (corr_coeffient, p_value) =\ + (corr_coefficient, p_value) =\ compute_corr_coeff_p_value(primary_values=sanitized_traits_vals, target_values=sanitized_target_vals, corr_method=corr_method) - if corr_coeffient is not None: - return (trait_name, corr_coeffient, p_value, num_overlap) + if corr_coefficient is not None: + return (trait_name, corr_coefficient, p_value, num_overlap) return None @@ -145,10 +145,10 @@ def compute_all_sample_correlation(this_trait, for sample_correlation in results: if sample_correlation is not None: - (trait_name, corr_coeffient, p_value, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -156,7 +156,7 @@ def compute_all_sample_correlation(this_trait, corr_results.append({trait_name: corr_result}) return sorted( corr_results, - key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"])) + key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"])) def benchmark_compute_all_sample(this_trait, @@ -179,12 +179,12 @@ def benchmark_compute_all_sample(this_trait, trait_vals=this_vals, target_samples_vals=target_vals) if sample_correlation is not None: - (trait_name, corr_coeffient, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation else: continue corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -200,20 +200,20 @@ def tissue_correlation_for_trait( compute_corr_p_value: Callable = compute_corr_coeff_p_value) -> dict: """Given a primary tissue values for a trait and the target tissues values compute the correlation_cooeff and p value the input required are arrays - output -> List containing Dicts with corr_coefficient value,P_value and + output -> List containing Dicts with corr_coefficient value, P_value and also the tissue numbers is len(primary) == len(target) """ # ax :todo assertion that length one one target tissue ==primary_tissue - (tissue_corr_coeffient, + (tissue_corr_coefficient, p_value) = compute_corr_p_value(primary_values=primary_tissue_vals, target_values=target_tissues_values, corr_method=corr_method) tiss_corr_result = {trait_id: { - "tissue_corr": tissue_corr_coeffient, + "tissue_corr": tissue_corr_coefficient, "tissue_number": len(primary_tissue_vals), "tissue_p_val": p_value}} diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py new file mode 100644 index 0000000..0433b3f --- /dev/null +++ b/gn3/computations/rqtl.py @@ -0,0 +1,103 @@ +"""Procedures related rqtl computations""" +import os +from typing import Dict, List, Union + +import numpy as np + +from flask import current_app + +from gn3.commands import compose_rqtl_cmd +from gn3.computations.gemma import generate_hash_of_string +from gn3.fs_helpers import get_hash_of_files + +def generate_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> Dict: + """Given the base rqtl_wrapper command and +dict of keyword arguments, return the full rqtl_wrapper command and an +output filename generated from a hash of the genotype and phenotype files + + """ + + # Generate a hash from contents of the genotype and phenotype files + _hash = get_hash_of_files( + [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]) + + # Append to hash a hash of keyword arguments + _hash += generate_hash_of_string( + ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]])) + + # Append to hash a hash of boolean keyword arguments + _hash += generate_hash_of_string( + ",".join(rqtl_wrapper_bool_kwargs)) + + # Temporarily substitute forward-slashes in hash with underscores + _hash = _hash.replace("/", "_") + + _output_filename = f"{_hash}-output.csv" + rqtl_wrapper_kwargs["filename"] = _output_filename + + return { + "output_file": + _output_filename, + "rqtl_cmd": + compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, + rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) + } + + +def process_rqtl_output(file_name: str) -> List: + """Given an output file name, read in R/qtl results and return + a List of marker objects + + """ + marker_obs = [] + # Later I should probably redo this using csv.read to avoid the + # awkwardness with removing quotes with [1:-1] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for line in the_file: + line_items = line.split(",") + if line_items[1][1:-1] == "chr" or not line_items: + # Skip header line + continue + + # Convert chr to int if possible + the_chr: Union[int, str] + try: + the_chr = int(line_items[1][1:-1]) + except ValueError: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) + + return marker_obs + + +def process_perm_output(file_name: str): + """Given base filename, read in R/qtl permutation output and calculate + suggestive and significant thresholds + + """ + perm_results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "PERM_" + file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: + # Skip header line + continue + + line_items = line.split(",") + perm_results.append(float(line_items[1])) + + suggestive = np.percentile(np.array(perm_results), 67) + significant = np.percentile(np.array(perm_results), 95) + + return perm_results, suggestive, significant |