aboutsummaryrefslogtreecommitdiff
path: root/gn3
diff options
context:
space:
mode:
authorzsloan2021-06-18 17:33:09 -0500
committerGitHub2021-06-18 17:33:09 -0500
commitd653a635d0efd2291754c18f51d31f91a1c0a25c (patch)
tree309ea800da204f721e92ebc1c725144eab939d0f /gn3
parentc553b52e140de1d7e5ed49f07bae2f4a120266f8 (diff)
parentf7becfa11ca857104ecc1b668b4bd3d0a721083c (diff)
downloadgenenetwork3-d653a635d0efd2291754c18f51d31f91a1c0a25c.tar.gz
Merge pull request #13 from zsloan/feature/add_rqtl_endpoints
Feature/add rqtl endpoints
Diffstat (limited to 'gn3')
-rw-r--r--gn3/api/rqtl.py58
-rw-r--r--gn3/app.py2
-rw-r--r--gn3/commands.py14
-rw-r--r--gn3/computations/rqtl.py103
-rw-r--r--gn3/settings.py1
5 files changed, 178 insertions, 0 deletions
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
new file mode 100644
index 0000000..ebb746c
--- /dev/null
+++ b/gn3/api/rqtl.py
@@ -0,0 +1,58 @@
+"""Endpoints for running the rqtl cmd"""
+import os
+
+from flask import Blueprint
+from flask import current_app
+from flask import jsonify
+from flask import request
+
+from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output
+from gn3.computations.gemma import do_paths_exist
+
+rqtl = Blueprint("rqtl", __name__)
+
+@rqtl.route("/compute", methods=["POST"])
+def compute():
+ """Given at least a geno_file and pheno_file, generate and
+run the rqtl_wrapper script and return the results as JSON
+
+ """
+ genofile = request.form['geno_file']
+ phenofile = request.form['pheno_file']
+
+ if not do_paths_exist([genofile, phenofile]):
+ raise FileNotFoundError
+
+ # Split kwargs by those with values and boolean ones that just convert to True/False
+ kwargs = ["model", "method", "nperm", "scale", "control_marker"]
+ boolean_kwargs = ["addcovar", "interval", "pstrata"]
+ all_kwargs = kwargs + boolean_kwargs
+
+ rqtl_kwargs = {"geno": genofile, "pheno": phenofile}
+ rqtl_bool_kwargs = []
+ for kwarg in all_kwargs:
+ if kwarg in request.form:
+ if kwarg in kwargs:
+ rqtl_kwargs[kwarg] = request.form[kwarg]
+ if kwarg in boolean_kwargs:
+ rqtl_bool_kwargs.append(kwarg)
+
+ rqtl_cmd = generate_rqtl_cmd(
+ rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"),
+ rqtl_wrapper_kwargs=rqtl_kwargs,
+ rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs
+ )
+
+ rqtl_output = {}
+ if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"),
+ "output", rqtl_cmd.get('output_file'))):
+ os.system(rqtl_cmd.get('rqtl_cmd'))
+
+ rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+
+ rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+ if int(rqtl_kwargs['nperm']) > 0:
+ rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \
+ process_perm_output(rqtl_cmd.get('output_file'))
+
+ return jsonify(rqtl_output)
diff --git a/gn3/app.py b/gn3/app.py
index dc89f55..046b5de 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -5,6 +5,7 @@ from typing import Dict
from typing import Union
from flask import Flask
from gn3.api.gemma import gemma
+from gn3.api.rqtl import rqtl
from gn3.api.general import general
from gn3.api.correlation import correlation
from gn3.api.data_entry import data_entry
@@ -28,6 +29,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
app.config.from_pyfile(config)
app.register_blueprint(general, url_prefix="/api/")
app.register_blueprint(gemma, url_prefix="/api/gemma")
+ app.register_blueprint(rqtl, url_prefix="/api/rqtl")
app.register_blueprint(correlation, url_prefix="/api/correlation")
app.register_blueprint(data_entry, url_prefix="/api/dataentry")
return app
diff --git a/gn3/commands.py b/gn3/commands.py
index 4b0d62d..14bd295 100644
--- a/gn3/commands.py
+++ b/gn3/commands.py
@@ -30,6 +30,20 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper",
cmd += " ".join([f"{arg}" for arg in gemma_args])
return cmd
+def compose_rqtl_cmd(rqtl_wrapper_cmd: str,
+ rqtl_wrapper_kwargs: Dict,
+ rqtl_wrapper_bool_kwargs: list) -> str:
+ """Compose a valid R/qtl command given the correct input"""
+ # Add kwargs with values
+ cmd = f"Rscript { rqtl_wrapper_cmd } " + " ".join(
+ [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()])
+
+ # Add boolean kwargs (kwargs that are either on or off, like --interval)
+ if rqtl_wrapper_bool_kwargs:
+ cmd += " "
+ cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs])
+
+ return cmd
def queue_cmd(conn: Redis,
job_queue: str,
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
new file mode 100644
index 0000000..0433b3f
--- /dev/null
+++ b/gn3/computations/rqtl.py
@@ -0,0 +1,103 @@
+"""Procedures related rqtl computations"""
+import os
+from typing import Dict, List, Union
+
+import numpy as np
+
+from flask import current_app
+
+from gn3.commands import compose_rqtl_cmd
+from gn3.computations.gemma import generate_hash_of_string
+from gn3.fs_helpers import get_hash_of_files
+
+def generate_rqtl_cmd(rqtl_wrapper_cmd: str,
+ rqtl_wrapper_kwargs: Dict,
+ rqtl_wrapper_bool_kwargs: list) -> Dict:
+ """Given the base rqtl_wrapper command and
+dict of keyword arguments, return the full rqtl_wrapper command and an
+output filename generated from a hash of the genotype and phenotype files
+
+ """
+
+ # Generate a hash from contents of the genotype and phenotype files
+ _hash = get_hash_of_files(
+ [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]])
+
+ # Append to hash a hash of keyword arguments
+ _hash += generate_hash_of_string(
+ ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]]))
+
+ # Append to hash a hash of boolean keyword arguments
+ _hash += generate_hash_of_string(
+ ",".join(rqtl_wrapper_bool_kwargs))
+
+ # Temporarily substitute forward-slashes in hash with underscores
+ _hash = _hash.replace("/", "_")
+
+ _output_filename = f"{_hash}-output.csv"
+ rqtl_wrapper_kwargs["filename"] = _output_filename
+
+ return {
+ "output_file":
+ _output_filename,
+ "rqtl_cmd":
+ compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd,
+ rqtl_wrapper_kwargs=rqtl_wrapper_kwargs,
+ rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs)
+ }
+
+
+def process_rqtl_output(file_name: str) -> List:
+ """Given an output file name, read in R/qtl results and return
+ a List of marker objects
+
+ """
+ marker_obs = []
+ # Later I should probably redo this using csv.read to avoid the
+ # awkwardness with removing quotes with [1:-1]
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", file_name), "r") as the_file:
+ for line in the_file:
+ line_items = line.split(",")
+ if line_items[1][1:-1] == "chr" or not line_items:
+ # Skip header line
+ continue
+
+ # Convert chr to int if possible
+ the_chr: Union[int, str]
+ try:
+ the_chr = int(line_items[1][1:-1])
+ except ValueError:
+ the_chr = line_items[1][1:-1]
+ this_marker = {
+ "name": line_items[0][1:-1],
+ "chr": the_chr,
+ "cM": float(line_items[2]),
+ "Mb": float(line_items[2]),
+ "lod_score": float(line_items[3])
+ }
+ marker_obs.append(this_marker)
+
+ return marker_obs
+
+
+def process_perm_output(file_name: str):
+ """Given base filename, read in R/qtl permutation output and calculate
+ suggestive and significant thresholds
+
+ """
+ perm_results = []
+ with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+ "output", "PERM_" + file_name), "r") as the_file:
+ for i, line in enumerate(the_file):
+ if i == 0:
+ # Skip header line
+ continue
+
+ line_items = line.split(",")
+ perm_results.append(float(line_items[1]))
+
+ suggestive = np.percentile(np.array(perm_results), 67)
+ significant = np.percentile(np.array(perm_results), 95)
+
+ return perm_results, suggestive, significant
diff --git a/gn3/settings.py b/gn3/settings.py
index 2057ce1..ecfd502 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -6,6 +6,7 @@ import os
BCRYPT_SALT = "$2b$12$mxLvu9XRLlIaaSeDxt8Sle" # Change this!
DATA_DIR = ""
GEMMA_WRAPPER_CMD = os.environ.get("GEMMA_WRAPPER", "gemma-wrapper")
+RQTL_WRAPPER_CMD = os.environ.get("RQTL_WRAPPER")
CACHEDIR = ""
REDIS_URI = "redis://localhost:6379/0"
REDIS_JOB_QUEUE = "GN3::job-queue"