merge main

author: Alexander Kabui 2021-06-20 09:11:03 +0300
committer: Alexander Kabui 2021-06-20 09:11:03 +0300
commit: 123ad47af288d6b94f0354a0abd5bc669bc988d4 (patch)
tree: 999eb41c1fab3d443215c0e4a8533abecf64f9af /gn3
parent: 75801d83c8302b48051d413490e6ce2a0b8ff01f (diff)
parent: d653a635d0efd2291754c18f51d31f91a1c0a25c (diff)
download: genenetwork3-123ad47af288d6b94f0354a0abd5bc669bc988d4.tar.gz
6 files changed, 191 insertions, 13 deletions
diff --git a/gn3/api/rqtl.py b/gn3/api/rqtl.py
new file mode 100644
index 0000000..ebb746c
--- /dev/null
+++ b/gn3/api/rqtl.py
@@ -0,0 +1,58 @@
+"""Endpoints for running the rqtl cmd"""
+import os
+
+from flask import Blueprint
+from flask import current_app
+from flask import jsonify
+from flask import request
+
+from gn3.computations.rqtl import generate_rqtl_cmd, process_rqtl_output, process_perm_output
+from gn3.computations.gemma import do_paths_exist
+
+rqtl = Blueprint("rqtl", __name__)
+
+@rqtl.route("/compute", methods=["POST"])
+def compute():
+    """Given at least a geno_file and pheno_file, generate and
+run the rqtl_wrapper script and return the results as JSON
+
+    """
+    genofile = request.form['geno_file']
+    phenofile = request.form['pheno_file']
+
+    if not do_paths_exist([genofile, phenofile]):
+        raise FileNotFoundError
+
+    # Split kwargs by those with values and boolean ones that just convert to True/False
+    kwargs = ["model", "method", "nperm", "scale", "control_marker"]
+    boolean_kwargs = ["addcovar", "interval", "pstrata"]
+    all_kwargs = kwargs + boolean_kwargs
+
+    rqtl_kwargs = {"geno": genofile, "pheno": phenofile}
+    rqtl_bool_kwargs = []
+    for kwarg in all_kwargs:
+        if kwarg in request.form:
+            if kwarg in kwargs:
+                rqtl_kwargs[kwarg] = request.form[kwarg]
+            if kwarg in boolean_kwargs:
+                rqtl_bool_kwargs.append(kwarg)
+
+    rqtl_cmd = generate_rqtl_cmd(
+        rqtl_wrapper_cmd=current_app.config.get("RQTL_WRAPPER_CMD"),
+        rqtl_wrapper_kwargs=rqtl_kwargs,
+        rqtl_wrapper_bool_kwargs=rqtl_bool_kwargs
+    )
+
+    rqtl_output = {}
+    if not os.path.isfile(os.path.join(current_app.config.get("TMPDIR"),
+                                       "output", rqtl_cmd.get('output_file'))):
+        os.system(rqtl_cmd.get('rqtl_cmd'))
+
+    rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+
+    rqtl_output['results'] = process_rqtl_output(rqtl_cmd.get('output_file'))
+    if int(rqtl_kwargs['nperm']) > 0:
+        rqtl_output['perm_results'], rqtl_output['suggestive'], rqtl_output['significant'] = \
+        process_perm_output(rqtl_cmd.get('output_file'))
+
+    return jsonify(rqtl_output)
diff --git a/gn3/app.py b/gn3/app.py
index dc89f55..046b5de 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -5,6 +5,7 @@ from typing import Dict
 from typing import Union
 from flask import Flask
 from gn3.api.gemma import gemma
+from gn3.api.rqtl import rqtl
 from gn3.api.general import general
 from gn3.api.correlation import correlation
 from gn3.api.data_entry import data_entry
@@ -28,6 +29,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
             app.config.from_pyfile(config)
     app.register_blueprint(general, url_prefix="/api/")
     app.register_blueprint(gemma, url_prefix="/api/gemma")
+    app.register_blueprint(rqtl, url_prefix="/api/rqtl")
     app.register_blueprint(correlation, url_prefix="/api/correlation")
     app.register_blueprint(data_entry, url_prefix="/api/dataentry")
     return app
diff --git a/gn3/commands.py b/gn3/commands.py
index 4b0d62d..14bd295 100644
--- a/gn3/commands.py
+++ b/gn3/commands.py
@@ -30,6 +30,20 @@ def compose_gemma_cmd(gemma_wrapper_cmd: str = "gemma-wrapper",
         cmd += " ".join([f"{arg}" for arg in gemma_args])
     return cmd
 
+def compose_rqtl_cmd(rqtl_wrapper_cmd: str,
+                     rqtl_wrapper_kwargs: Dict,
+                     rqtl_wrapper_bool_kwargs: list) -> str:
+    """Compose a valid R/qtl command given the correct input"""
+    # Add kwargs with values
+    cmd = f"Rscript { rqtl_wrapper_cmd } " + " ".join(
+        [f"--{key} {val}" for key, val in rqtl_wrapper_kwargs.items()])
+
+    # Add boolean kwargs (kwargs that are either on or off, like --interval)
+    if rqtl_wrapper_bool_kwargs:
+        cmd += " "
+        cmd += " ".join([f"--{val}" for val in rqtl_wrapper_bool_kwargs])
+
+    return cmd
 
 def queue_cmd(conn: Redis,
               job_queue: str,
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index eae7ae4..bc738a7 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -69,8 +69,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value
         "spearman": scipy.stats.spearmanr
     }
     use_corr_method = corr_mapping.get(corr_method, "spearman")
-    corr_coeffient, p_val = use_corr_method(primary_values, target_values)
-    return (corr_coeffient, p_val)
+    corr_coefficient, p_val = use_corr_method(primary_values, target_values)
+    return (corr_coefficient, p_val)
 
 
 def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
@@ -85,13 +85,13 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
 
     if num_overlap > 5:
 
-        (corr_coeffient, p_value) =\
+        (corr_coefficient, p_value) =\
             compute_corr_coeff_p_value(primary_values=sanitized_traits_vals,
                                        target_values=sanitized_target_vals,
                                        corr_method=corr_method)
 
-        if corr_coeffient is not None:
-            return (trait_name, corr_coeffient, p_value, num_overlap)
+        if corr_coefficient is not None:
+            return (trait_name, corr_coefficient, p_value, num_overlap)
     return None
 
 
@@ -145,10 +145,10 @@ def compute_all_sample_correlation(this_trait,
 
         for sample_correlation in results:
             if sample_correlation is not None:
-                (trait_name, corr_coeffient, p_value,
+                (trait_name, corr_coefficient, p_value,
                  num_overlap) = sample_correlation
                 corr_result = {
-                    "corr_coeffient": corr_coeffient,
+                    "corr_coefficient": corr_coefficient,
                     "p_value": p_value,
                     "num_overlap": num_overlap
                 }
@@ -156,7 +156,7 @@ def compute_all_sample_correlation(this_trait,
                 corr_results.append({trait_name: corr_result})
     return sorted(
         corr_results,
-        key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coeffient"]))
+        key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"]))
 
 
 def benchmark_compute_all_sample(this_trait,
@@ -179,12 +179,12 @@ def benchmark_compute_all_sample(this_trait,
             trait_vals=this_vals,
             target_samples_vals=target_vals)
         if sample_correlation is not None:
-            (trait_name, corr_coeffient,
+            (trait_name, corr_coefficient,
              p_value, num_overlap) = sample_correlation
         else:
             continue
         corr_result = {
-            "corr_coeffient": corr_coeffient,
+            "corr_coefficient": corr_coefficient,
             "p_value": p_value,
             "num_overlap": num_overlap
         }
@@ -200,20 +200,20 @@ def tissue_correlation_for_trait(
         compute_corr_p_value: Callable = compute_corr_coeff_p_value) -> dict:
     """Given a primary tissue values for a trait and the target tissues values
     compute the correlation_cooeff and p value the input required are arrays
-    output -> List containing Dicts with corr_coefficient value,P_value and
+    output -> List containing Dicts with corr_coefficient value, P_value and
     also the tissue numbers is len(primary) == len(target)
 
     """
 
     # ax :todo assertion that length one one target tissue ==primary_tissue
 
-    (tissue_corr_coeffient,
+    (tissue_corr_coefficient,
      p_value) = compute_corr_p_value(primary_values=primary_tissue_vals,
                                      target_values=target_tissues_values,
                                      corr_method=corr_method)
 
     tiss_corr_result = {trait_id: {
-        "tissue_corr": tissue_corr_coeffient,
+        "tissue_corr": tissue_corr_coefficient,
         "tissue_number": len(primary_tissue_vals),
         "tissue_p_val": p_value}}
 
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
new file mode 100644
index 0000000..0433b3f
--- /dev/null
+++ b/gn3/computations/rqtl.py
@@ -0,0 +1,103 @@
+"""Procedures related rqtl computations"""
+import os
+from typing import Dict, List, Union
+
+import numpy as np
+
+from flask import current_app
+
+from gn3.commands import compose_rqtl_cmd
+from gn3.computations.gemma import generate_hash_of_string
+from gn3.fs_helpers import get_hash_of_files
+
+def generate_rqtl_cmd(rqtl_wrapper_cmd: str,
+                      rqtl_wrapper_kwargs: Dict,
+                      rqtl_wrapper_bool_kwargs: list) -> Dict:
+    """Given the base rqtl_wrapper command and
+dict of keyword arguments, return the full rqtl_wrapper command and an
+output filename generated from a hash of the genotype and phenotype files
+
+    """
+
+    # Generate a hash from contents of the genotype and phenotype files
+    _hash = get_hash_of_files(
+        [v for k, v in rqtl_wrapper_kwargs.items() if k in ["g", "p"]])
+
+    # Append to hash a hash of keyword arguments
+    _hash += generate_hash_of_string(
+        ",".join([f"{k}:{v}" for k, v in rqtl_wrapper_kwargs.items() if k not in ["g", "p"]]))
+
+    # Append to hash a hash of boolean keyword arguments
+    _hash += generate_hash_of_string(
+        ",".join(rqtl_wrapper_bool_kwargs))
+
+    # Temporarily substitute forward-slashes in hash with underscores
+    _hash = _hash.replace("/", "_")
+
+    _output_filename = f"{_hash}-output.csv"
+    rqtl_wrapper_kwargs["filename"] = _output_filename
+
+    return {
+        "output_file":
+        _output_filename,
+        "rqtl_cmd":
+        compose_rqtl_cmd(rqtl_wrapper_cmd=rqtl_wrapper_cmd,
+                         rqtl_wrapper_kwargs=rqtl_wrapper_kwargs,
+                         rqtl_wrapper_bool_kwargs=rqtl_wrapper_bool_kwargs)
+    }
+
+
+def process_rqtl_output(file_name: str) -> List:
+    """Given an output file name, read in R/qtl results and return
+    a List of marker objects
+
+    """
+    marker_obs = []
+    # Later I should probably redo this using csv.read to avoid the
+    # awkwardness with removing quotes with [1:-1]
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", file_name), "r") as the_file:
+        for line in the_file:
+            line_items = line.split(",")
+            if line_items[1][1:-1] == "chr" or not line_items:
+                # Skip header line
+                continue
+
+            # Convert chr to int if possible
+            the_chr: Union[int, str]
+            try:
+                the_chr = int(line_items[1][1:-1])
+            except ValueError:
+                the_chr = line_items[1][1:-1]
+            this_marker = {
+                "name": line_items[0][1:-1],
+                "chr": the_chr,
+                "cM": float(line_items[2]),
+                "Mb": float(line_items[2]),
+                "lod_score": float(line_items[3])
+            }
+            marker_obs.append(this_marker)
+
+    return marker_obs
+
+
+def process_perm_output(file_name: str):
+    """Given base filename, read in R/qtl permutation output and calculate
+    suggestive and significant thresholds
+
+    """
+    perm_results = []
+    with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
+                           "output", "PERM_" + file_name), "r") as the_file:
+        for i, line in enumerate(the_file):
+            if i == 0:
+                # Skip header line
+                continue
+
+            line_items = line.split(",")
+            perm_results.append(float(line_items[1]))
+
+    suggestive = np.percentile(np.array(perm_results), 67)
+    significant = np.percentile(np.array(perm_results), 95)
+
+    return perm_results, suggestive, significant
diff --git a/gn3/settings.py b/gn3/settings.py
index bde856d..770ba3d 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -6,6 +6,7 @@ import os
 BCRYPT_SALT = "$2b$12$mxLvu9XRLlIaaSeDxt8Sle"  # Change this!
 DATA_DIR = ""
 GEMMA_WRAPPER_CMD = os.environ.get("GEMMA_WRAPPER", "gemma-wrapper")
+RQTL_WRAPPER_CMD = os.environ.get("RQTL_WRAPPER")
 CACHEDIR = ""
 REDIS_URI = "redis://localhost:6379/0"
 REDIS_JOB_QUEUE = "GN3::job-queue"
author	Alexander Kabui	2021-06-20 09:11:03 +0300
committer	Alexander Kabui	2021-06-20 09:11:03 +0300
commit	123ad47af288d6b94f0354a0abd5bc669bc988d4 (patch)
tree	999eb41c1fab3d443215c0e4a8533abecf64f9af /gn3
parent	75801d83c8302b48051d413490e6ce2a0b8ff01f (diff)
parent	d653a635d0efd2291754c18f51d31f91a1c0a25c (diff)
download	genenetwork3-123ad47af288d6b94f0354a0abd5bc669bc988d4.tar.gz