diff options
author | zsloan | 2021-06-18 17:33:09 -0500 |
---|---|---|
committer | GitHub | 2021-06-18 17:33:09 -0500 |
commit | d653a635d0efd2291754c18f51d31f91a1c0a25c (patch) | |
tree | 309ea800da204f721e92ebc1c725144eab939d0f /gn3 | |
parent | c553b52e140de1d7e5ed49f07bae2f4a120266f8 (diff) | |
parent | f7becfa11ca857104ecc1b668b4bd3d0a721083c (diff) | |
download | genenetwork3-d653a635d0efd2291754c18f51d31f91a1c0a25c.tar.gz |
Merge pull request #13 from zsloan/feature/add_rqtl_endpoints
Feature/add rqtl endpoints
Diffstat (limited to 'gn3')
-rw-r--r-- | gn3/api/rqtl.py | 58 | ||||
-rw-r--r-- | gn3/app.py | 2 | ||||
-rw-r--r-- | gn3/commands.py | 14 | ||||
-rw-r--r-- | gn3/computations/rqtl.py | 103 | ||||
-rw-r--r-- | gn3/settings.py | 1 |
5 files changed, 178 insertions, 0 deletions
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py new file mode 100644 index 0000000..ebb746c --- /dev/null +++ b/gn3/api/rqtl.py @@ -0,0 +1,58 @@ +"""Endpoints for running the rqtl cmd""" +import os + +from flask import Blueprint +from flask import current_app +from flask import jsonify +from flask import request + +from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output +from gn3.computations.gemma import do_paths_exist + +rqtl = Blueprint("rqtl", __name__) + +@rqtl.route("/compute", methods=["POST"]) +def compute(): + """Given at least a geno_file and pheno_file, generate and +run the rqtl_wrapper script and return the results as JSON + + """ + genofile = request.form['geno_file'] + phenofile = request.form['pheno_file'] + + if not do_paths_exist([genofile, phenofile]): + raise FileNotFoundError + + # Split kwargs by those with values and boolean ones that just convert to True/False + kwargs = ["model", "method", "nperm", "scale", "control_marker"] + boolean_kwargs = ["addcovar", "interval", "pstrata"] + all_kwargs = kwargs + boolean_kwargs + + rqtl_kwargs = {"geno": genofile, "pheno": phenofile} + rqtl_bool_kwargs = [] + for kwarg in all_kwargs: + if kwarg in request.form: + if kwarg in kwargs: + rqtl_kwargs[kwarg] = request.form[kwarg] + if kwarg in boolean_kwargs: + rqtl_bool_kwargs.append(kwarg) + + rqtl_cmd = generate_rqtl_cmd( + rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"), + rqtl_wrapper_kwargs=rqtl_kwargs, + rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs + ) + + rqtl_output = {} + if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"), + "output", rqtl_cmd.get('output_file'))): + os.system(rqtl_cmd.get('rqtl_cmd')) + + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + + rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file')) + if int(rqtl_kwargs['nperm']) > 0: + rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \ + process_perm_output(rqtl_cmd.get('output_file')) + + return jsonify(rqtl_output) @@ -5,6 +5,7 @@ from typing import Dict from typing import Union from flask import Flask from gn3.api.gemma import gemma +from gn3.api.rqtl import rqtl from gn3.api.general import general from gn3.api.correlation import correlation from gn3.api.data_entry import data_entry @@ -28,6 +29,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.config.from_pyfile(config) app.register_blueprint(general, url_prefix="/api/") app.register_blueprint(gemma, url_prefix="/api/gemma") + app.register_blueprint(rqtl, url_prefix="/api/rqtl") app.register_blueprint(correlation, url_prefix="/api/correlation") app.register_blueprint(data_entry, url_prefix="/api/dataentry") return app diff --git a/gn3/commands.py b/gn3/commands.py index 4b0d62d..14bd295 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -30,6 +30,20 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper", cmd += " ".join([f"{arg}" for arg in gemma_args]) return cmd +def compose_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> str: + """Compose a valid R/qtl command given the correct input""" + # Add kwargs with values + cmd = f"Rscript { rqtl_wrapper_cmd } " + " ".join( + [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()]) + + # Add boolean kwargs (kwargs that are either on or off, like --interval) + if rqtl_wrapper_bool_kwargs: + cmd += " " + cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs]) + + return cmd def queue_cmd(conn: Redis, job_queue: str, diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py new file mode 100644 index 0000000..0433b3f --- /dev/null +++ b/gn3/computations/rqtl.py @@ -0,0 +1,103 @@ +"""Procedures related rqtl computations""" +import os +from typing import Dict, List, Union + +import numpy as np + +from flask import current_app + +from gn3.commands import compose_rqtl_cmd +from gn3.computations.gemma import generate_hash_of_string +from gn3.fs_helpers import get_hash_of_files + +def generate_rqtl_cmd(rqtl_wrapper_cmd: str, + rqtl_wrapper_kwargs: Dict, + rqtl_wrapper_bool_kwargs: list) -> Dict: + """Given the base rqtl_wrapper command and +dict of keyword arguments, return the full rqtl_wrapper command and an +output filename generated from a hash of the genotype and phenotype files + + """ + + # Generate a hash from contents of the genotype and phenotype files + _hash = get_hash_of_files( + [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]]) + + # Append to hash a hash of keyword arguments + _hash += generate_hash_of_string( + ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]])) + + # Append to hash a hash of boolean keyword arguments + _hash += generate_hash_of_string( + ",".join(rqtl_wrapper_bool_kwargs)) + + # Temporarily substitute forward-slashes in hash with underscores + _hash = _hash.replace("/", "_") + + _output_filename = f"{_hash}-output.csv" + rqtl_wrapper_kwargs["filename"] = _output_filename + + return { + "output_file": + _output_filename, + "rqtl_cmd": + compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd, + rqtl_wrapper_kwargs=rqtl_wrapper_kwargs, + rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs) + } + + +def process_rqtl_output(file_name: str) -> List: + """Given an output file name, read in R/qtl results and return + a List of marker objects + + """ + marker_obs = [] + # Later I should probably redo this using csv.read to avoid the + # awkwardness with removing quotes with [1:-1] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", file_name), "r") as the_file: + for line in the_file: + line_items = line.split(",") + if line_items[1][1:-1] == "chr" or not line_items: + # Skip header line + continue + + # Convert chr to int if possible + the_chr: Union[int, str] + try: + the_chr = int(line_items[1][1:-1]) + except ValueError: + the_chr = line_items[1][1:-1] + this_marker = { + "name": line_items[0][1:-1], + "chr": the_chr, + "cM": float(line_items[2]), + "Mb": float(line_items[2]), + "lod_score": float(line_items[3]) + } + marker_obs.append(this_marker) + + return marker_obs + + +def process_perm_output(file_name: str): + """Given base filename, read in R/qtl permutation output and calculate + suggestive and significant thresholds + + """ + perm_results = [] + with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"), + "output", "PERM_" + file_name), "r") as the_file: + for i, line in enumerate(the_file): + if i == 0: + # Skip header line + continue + + line_items = line.split(",") + perm_results.append(float(line_items[1])) + + suggestive = np.percentile(np.array(perm_results), 67) + significant = np.percentile(np.array(perm_results), 95) + + return perm_results, suggestive, significant diff --git a/gn3/settings.py b/gn3/settings.py index 2057ce1..ecfd502 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -6,6 +6,7 @@ import os BCRYPT_SALT = "$2b$12$mxLvu9XRLlIaaSeDxt8Sle" # Change this! DATA_DIR = "" GEMMA_WRAPPER_CMD = os.environ.get("GEMMA_WRAPPER", "gemma-wrapper") +RQTL_WRAPPER_CMD = os.environ.get("RQTL_WRAPPER") CACHEDIR = "" REDIS_URI = "redis://localhost:6379/0" REDIS_JOB_QUEUE = "GN3::job-queue" |