diff options
author | Alexander Kabui | 2021-06-20 09:11:03 +0300 |
---|---|---|
committer | Alexander Kabui | 2021-06-20 09:11:03 +0300 |
commit | 123ad47af288d6b94f0354a0abd5bc669bc988d4 (patch) | |
tree | 999eb41c1fab3d443215c0e4a8533abecf64f9af /gn3 | |
parent | 75801d83c8302b48051d413490e6ce2a0b8ff01f (diff) | |
parent | d653a635d0efd2291754c18f51d31f91a1c0a25c (diff) | |
download | genenetwork3-123ad47af288d6b94f0354a0abd5bc669bc988d4.tar.gz |
merge main
Diffstat (limited to 'gn3')
-rw-r--r-- | gn3/api/rqtl.py | 58 | ||||
-rw-r--r-- | gn3/app.py | 2 | ||||
-rw-r--r-- | gn3/commands.py | 14 | ||||
-rw-r--r-- | gn3/computations/correlations.py | 26 | ||||
-rw-r--r-- | gn3/computations/rqtl.py | 103 | ||||
-rw-r--r-- | gn3/settings.py | 1 |
6 files changed, 191 insertions, 13 deletions
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py new file mode 100644 index 0000000..ebb746c --- /dev/null +++ b/gn3/api/rqtl.py @@ -0,0 +1,58 @@ +"""Endpoints for running the rqtl cmd""" +import os + +from flask import Blueprint +from flask import current_app +from flask import jsonify +from flask import request + +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.gemma import do_paths_exist + +rqtl = Blueprint("rqtl", __name__) + +@rqtl.route("/compute", methods=["POST"]) +def compute(): + """Given at least a geno_file and pheno_file, generate and +run the rqtl_wrapper script and return the results as JSON + + """ + genofile = request.form['geno_file'] + phenofile = request.form['pheno_file'] + + if not do_paths_exist([genofile, phenofile]): + raise FileNotFoundError + + # Split kwargs by those with values and boolean ones that just convert to True/False + kwargs = ["model", "method", "nperm", "scale", "control_marker"] + boolean_kwargs = ["addcovar", "interval", "pstrata"] + all_kwargs = kwargs + boolean_kwargs + + rqtl_kwargs = {"geno": genofile, "pheno": phenofile} + rqtl_bool_kwargs = [] + for kwarg in all_kwargs: + if kwarg in request.form: + if kwarg in kwargs: + rqtl_kwargs[kwarg] = request.form[kwarg] + if kwarg in boolean_kwargs: + rqtl_bool_kwargs.append(kwarg) + + rqtl_cmd = generate_rqtl_cmd( + rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), + rqtl_wrapper_kwargs=rqtl_kwargs, + rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs + ) + + rqtl_output = {} + if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), + "output", rqtl_cmd.get('output_file'))): + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if int(rqtl_kwargs['nperm']) > 0: + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ + process_perm_output(rqtl_cmd.get('output_file')) + + return jsonify(rqtl_output) @@ -5,6 +5,7 @@ from typing import Dict from typing import Union from flask import Flask from gn3.api.gemma import gemma +from gn3.api.rqtl import rqtl from gn3.api.general import general from gn3.api.correlation import correlation from gn3.api.data_entry import data_entry @@ -28,6 +29,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.config.from_pyfile(config) app.register_blueprint(general, url_prefix="/api/") app.register_blueprint(gemma, url_prefix="/api/gemma") + app.register_blueprint(rqtl, url_prefix="/api/rqtl") app.register_blueprint(correlation, url_prefix="/api/correlation") app.register_blueprint(data_entry, url_prefix="/api/dataentry") return app diff --git a/gn3/commands.py b/gn3/commands.py index 4b0d62d..14bd295 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -30,6 +30,20 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper", cmd += " ".join([f"{arg}" for arg in gemma_args]) return cmd +def compose_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> str: + """Compose a valid R/qtl command given the correct input""" + # Add kwargs with values + cmd = f"Rscript { rqtl_wrapper_cmd } " + " ".join( + [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) + + # Add boolean kwargs (kwargs that are either on or off, like --interval) + if rqtl_wrapper_bool_kwargs: + cmd += " " + cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs]) + + return cmd def queue_cmd(conn: Redis, job_queue: str, diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index eae7ae4..bc738a7 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -69,8 +69,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value "spearman": scipy.stats.spearmanr } use_corr_method = corr_mapping.get(corr_method, "spearman") - corr_coeffient, p_val = use_corr_method(primary_values, target_values) - return (corr_coeffient, p_val) + corr_coefficient, p_val = use_corr_method(primary_values, target_values) + return (corr_coefficient, p_val) def compute_sample_r_correlation(trait_name, corr_method, trait_vals, @@ -85,13 +85,13 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, if num_overlap > 5: - (corr_coeffient, p_value) =\ + (corr_coefficient, p_value) =\ compute_corr_coeff_p_value(primary_values=sanitized_traits_vals, target_values=sanitized_target_vals, corr_method=corr_method) - if corr_coeffient is not None: - return (trait_name, corr_coeffient, p_value, num_overlap) + if corr_coefficient is not None: + return (trait_name, corr_coefficient, p_value, num_overlap) return None @@ -145,10 +145,10 @@ def compute_all_sample_correlation(this_trait, for sample_correlation in results: if sample_correlation is not None: - (trait_name, corr_coeffient, p_value, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -156,7 +156,7 @@ def compute_all_sample_correlation(this_trait, corr_results.append({trait_name: corr_result}) return sorted( corr_results, - key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"])) + key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"])) def benchmark_compute_all_sample(this_trait, @@ -179,12 +179,12 @@ def benchmark_compute_all_sample(this_trait, trait_vals=this_vals, target_samples_vals=target_vals) if sample_correlation is not None: - (trait_name, corr_coeffient, + (trait_name, corr_coefficient, p_value, num_overlap) = sample_correlation else: continue corr_result = { - "corr_coeffient": corr_coeffient, + "corr_coefficient": corr_coefficient, "p_value": p_value, "num_overlap": num_overlap } @@ -200,20 +200,20 @@ def tissue_correlation_for_trait( compute_corr_p_value: Callable = compute_corr_coeff_p_value) -> dict: """Given a primary tissue values for a trait and the target tissues values compute the correlation_cooeff and p value the input required are arrays - output -> List containing Dicts with corr_coefficient value,P_value and + output -> List containing Dicts with corr_coefficient value, P_value and also the tissue numbers is len(primary) == len(target) """ # ax :todo assertion that length one one target tissue ==primary_tissue - (tissue_corr_coeffient, + (tissue_corr_coefficient, p_value) = compute_corr_p_value(primary_values=primary_tissue_vals, target_values=target_tissues_values, corr_method=corr_method) tiss_corr_result = {trait_id: { - "tissue_corr": tissue_corr_coeffient, + "tissue_corr": tissue_corr_coefficient, "tissue_number": len(primary_tissue_vals), "tissue_p_val": p_value}} diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py new file mode 100644 index 0000000..0433b3f --- /dev/null +++ b/gn3/computations/rqtl.py @@ -0,0 +1,103 @@ +"""Procedures related rqtl computations""" +import os +from typing import Dict, List, Union + +import numpy as np + +from flask import current_app + +from gn3.commands import compose_rqtl_cmd +from gn3.computations.gemma import generate_hash_of_string +from gn3.fs_helpers import get_hash_of_files + +def generate_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> Dict: + """Given the base rqtl_wrapper command and +dict of keyword arguments, return the full rqtl_wrapper command and an +output filename generated from a hash of the genotype and phenotype files + + """ + + # Generate a hash from contents of the genotype and phenotype files + _hash = get_hash_of_files( + [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]) + + # Append to hash a hash of keyword arguments + _hash += generate_hash_of_string( + ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]])) + + # Append to hash a hash of boolean keyword arguments + _hash += generate_hash_of_string( + ",".join(rqtl_wrapper_bool_kwargs)) + + # Temporarily substitute forward-slashes in hash with underscores + _hash = _hash.replace("/", "_") + + _output_filename = f"{_hash}-output.csv" + rqtl_wrapper_kwargs["filename"] = _output_filename + + return { + "output_file": + _output_filename, + "rqtl_cmd": + compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, + rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) + } + + +def process_rqtl_output(file_name: str) -> List: + """Given an output file name, read in R/qtl results and return + a List of marker objects + + """ + marker_obs = [] + # Later I should probably redo this using csv.read to avoid the + # awkwardness with removing quotes with [1:-1] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for line in the_file: + line_items = line.split(",") + if line_items[1][1:-1] == "chr" or not line_items: + # Skip header line + continue + + # Convert chr to int if possible + the_chr: Union[int, str] + try: + the_chr = int(line_items[1][1:-1]) + except ValueError: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) + + return marker_obs + + +def process_perm_output(file_name: str): + """Given base filename, read in R/qtl permutation output and calculate + suggestive and significant thresholds + + """ + perm_results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "PERM_" + file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: + # Skip header line + continue + + line_items = line.split(",") + perm_results.append(float(line_items[1])) + + suggestive = np.percentile(np.array(perm_results), 67) + significant = np.percentile(np.array(perm_results), 95) + + return perm_results, suggestive, significant diff --git a/gn3/settings.py b/gn3/settings.py index bde856d..770ba3d 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -6,6 +6,7 @@ import os BCRYPT_SALT = "$2b$12$mxLvu9XRLlIaaSeDxt8Sle" # Change this! DATA_DIR = "" GEMMA_WRAPPER_CMD = os.environ.get("GEMMA_WRAPPER", "gemma-wrapper") +RQTL_WRAPPER_CMD = os.environ.get("RQTL_WRAPPER") CACHEDIR = "" REDIS_URI = "redis://localhost:6379/0" REDIS_JOB_QUEUE = "GN3::job-queue" |