about summary refs log tree commit diff
path: root/wqflask
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-12-06 17:58:28 +0300
committerzsloan2022-01-29 00:42:44 -0600
commita01033438a85e3097e4ade30e9a208fd5109d4e1 (patch)
tree256aeec8bb4cdaa9101581709568b5506838f791 /wqflask
parentb9f16a3b28c4d996ac91befa5a919a9e0a7ac981 (diff)
downloadgenenetwork2-a01033438a85e3097e4ade30e9a208fd5109d4e1.tar.gz
Add "Partial Correlations" UI elements on collections page
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Add UI elements to trigger the partial correlations feature
* Connect partial correlation view to the application
* Point to correct application element for gunicorn
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/wqflask/__init__.py1
-rw-r--r--wqflask/wqflask/partial_correlations_views.py263
-rw-r--r--wqflask/wqflask/templates/collections/view.html17
-rw-r--r--wqflask/wqflask/templates/partial_correlations.html199
4 files changed, 480 insertions, 0 deletions
diff --git a/wqflask/wqflask/__init__.py b/wqflask/wqflask/__init__.py
index 05e040ed..ab8b9e66 100644
--- a/wqflask/wqflask/__init__.py
+++ b/wqflask/wqflask/__init__.py
@@ -98,3 +98,4 @@ from wqflask import user_login
 from wqflask import user_session
 
 import wqflask.views
+import wqflask.partial_correlations_views
diff --git a/wqflask/wqflask/partial_correlations_views.py b/wqflask/wqflask/partial_correlations_views.py
new file mode 100644
index 00000000..bee0a033
--- /dev/null
+++ b/wqflask/wqflask/partial_correlations_views.py
@@ -0,0 +1,263 @@
+from typing import Union, Tuple
+
+import MySQLdb
+from gn3.db.traits import retrieve_trait_info
+from flask import flash, request, current_app, render_template
+from gn3.computations.partial_correlations import partial_correlations_entry
+
+from wqflask import app
+
+def parse_trait(trait_str: str) -> Union[dict, None]:
+    keys = ("name", "dataset", "symbol", "description", "data_hmac")
+    parts = tuple(part.strip() for part in trait_str.split(":::"))
+    if len(parts) == len(keys):
+        return dict(zip(keys, parts))
+    return None
+
+def process_step_select_primary(
+        primary_trait: dict, control_traits: Tuple[dict, ...],
+        target_traits: Tuple[dict, ...],
+        traits_list: Tuple[dict, ...], corr_method: str) -> Tuple[
+            str, dict, Tuple[dict, ...], Tuple[dict, ...], Tuple[dict, ...],
+            str]:
+    if primary_trait is None:
+        flash("You must select a primary trait", "alert-danger")
+        return (
+            "select-primary", primary_trait, control_traits, target_traits,
+            traits_list, corr_method)
+
+    return (
+        "select-controls", primary_trait, control_traits, target_traits,
+        tuple(
+            trait for trait in traits_list
+            if trait["data_hmac"] != primary_trait["data_hmac"]),
+        corr_method)
+
+def process_step_select_controls(
+        primary_trait: dict, control_traits: Tuple[dict, ...],
+        target_traits: Tuple[dict, ...],
+        traits_list: Tuple[dict, ...], corr_method: str) -> Tuple[
+            str, dict, Tuple[dict, ...], Tuple[dict, ...], Tuple[dict, ...],
+            str]:
+    if len(control_traits) == 0 or len(control_traits) > 3:
+        flash(
+            ("You must select a minimum of one control trait, up to a maximum "
+             "of three control traits."),
+            "alert-danger")
+        return (
+            "select-controls", primary_trait, control_traits, target_traits,
+            traits_list, corr_method)
+
+    hmacs =(primary_trait["data_hmac"],) + tuple(
+        trait["data_hmac"] for trait in control_traits)
+    return (
+        "select-targets", primary_trait, control_traits, target_traits,
+        tuple(
+            trait for trait in traits_list if trait["data_hmac"] not in hmacs),
+        corr_method)
+
+def process_step_select_targets(
+        primary_trait: dict, control_traits: Tuple[dict, ...],
+        target_traits: Tuple[dict, ...],
+        traits_list: Tuple[dict, ...], corr_method: str) -> Tuple[
+            str, dict, Tuple[dict, ...], Tuple[dict, ...], Tuple[dict, ...],
+            str]:
+    if len(target_traits) == 0:
+        flash(
+            "You must select at least one target trait.", "alert-danger")
+        return (
+            "select-targets", primary_trait, control_traits, target_traits,
+            traits_list, corr_method)
+
+    hmacs =(primary_trait["data_hmac"],) + tuple(
+        trait["data_hmac"] for trait in (control_traits + target_traits))
+    return (
+        "select-corr-method", primary_trait, control_traits, target_traits,
+        tuple(
+            trait for trait in traits_list if trait["data_hmac"] not in hmacs),
+        corr_method)
+
+def process_step_select_corr_method(
+        primary_trait: dict, control_traits: Tuple[dict, ...],
+        target_traits: Tuple[dict, ...],
+        traits_list: Tuple[dict, ...], corr_method: str) -> Tuple[
+            str, dict, Tuple[dict, ...], Tuple[dict, ...], Tuple[dict, ...],
+            str]:
+    methods = (
+        "genetic correlation, pearson's r",
+        "genetic correlation, spearman's rho",
+        "sgo literature correlation",
+        "tissue correlation, pearson's r",
+        "tissue correlation, spearman's rho")
+    if corr_method.lower() not in methods:
+        flash(
+            "Selected method is unknown.", "alert-danger")
+        return (
+            "select-corr-method", primary_trait, control_traits, target_traits,
+            traits_list, corr_method)
+
+    hmacs =(primary_trait["data_hmac"],) + tuple(
+        trait["data_hmac"] for trait in (control_traits + target_traits))
+    return (
+        "run-correlation", primary_trait, control_traits, target_traits,
+        tuple(
+            trait for trait in traits_list if trait["data_hmac"] not in hmacs),
+        corr_method)
+
+def process_step(
+        step: str, primary_trait: dict, control_traits: Tuple[dict, ...],
+        target_traits: Tuple[dict, ...], traits_list: Tuple[dict, ...],
+        corr_method: str) -> Tuple[
+            str, dict, Tuple[dict, ...], Tuple[dict, ...], Tuple[dict, ...],
+            str]:
+    processor_functions = {
+        # "select-traits": lambda arg: arg,
+        "select-primary": process_step_select_primary,
+        "select-controls": process_step_select_controls,
+        "select-targets": process_step_select_targets,
+        "select-corr-method": process_step_select_corr_method
+    }
+    return processor_functions[(step or "select-primary")](
+        primary_trait, control_traits, target_traits, traits_list, corr_method)
+
+def sequence_of_traits(trait_strs) -> Tuple[dict, ...]:
+    return tuple(filter(
+        lambda trt: trt is not None,
+        (parse_trait(tstr.strip()) for tstr in trait_strs)))
+
+def publish_target_dabases(conn, group, threshold):
+    query = (
+        "SELECT PublishFreeze.FullName,PublishFreeze.Name "
+        "FROM PublishFreeze, InbredSet "
+        "WHERE PublishFreeze.InbredSetId = InbredSet.Id "
+        "AND InbredSet.Name = %(group)s "
+        "AND PublishFreeze.public > %(threshold)s")
+    with conn.cursor() as cursor:
+        cursor.execute(query, {"group": group, "threshold": threshold})
+        res = cursor.fetchall()
+        if res:
+            return tuple(
+                dict(zip(("description", "value"), row)) for row in res)
+
+    return tuple()
+
+def geno_target_databases(conn, group, threshold):
+    query = (
+        "SELECT GenoFreeze.FullName,GenoFreeze.Name "
+        "FROM GenoFreeze, InbredSet "
+        "WHERE GenoFreeze.InbredSetId = InbredSet.Id "
+        "AND InbredSet.Name = %(group)s "
+        "AND GenoFreeze.public > %(threshold)s")
+    with conn.cursor() as cursor:
+        cursor.execute(query, {"group": group, "threshold": threshold})
+        res = cursor.fetchall()
+        if res:
+            return tuple(
+                dict(zip(("description", "value"), row)) for row in res)
+
+    return tuple()
+
+def probeset_target_databases(conn, group, threshold):
+    query1 = "SELECT Id, Name FROM Tissue order by Name"
+    query2 = (
+        "SELECT ProbeFreeze.TissueId, ProbeSetFreeze.FullName, ProbeSetFreeze.Name "
+        "FROM ProbeSetFreeze, ProbeFreeze, InbredSet "
+        "WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id "
+        "AND ProbeFreeze.TissueId IN %(tissue_ids)s "
+        "AND ProbeSetFreeze.public > %(threshold)s "
+        "AND ProbeFreeze.InbredSetId = InbredSet.Id "
+        "AND InbredSet.Name like %(group)s "
+        "ORDER BY ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId")
+    with conn.cursor() as cursor:
+        cursor.execute(query1)
+        tissue_res = cursor.fetchall()
+        if tissue_res:
+            tissue_ids = tuple(row[0] for row in tissue_res)
+            cursor.execute(
+                query2,{
+                    "tissue_ids": tissue_ids, "group": f"{group}%%",
+                    "threshold": threshold
+                })
+            db_res = cursor.fetchall()
+            if db_res:
+                databases = tuple(
+                    dict(zip(("tissue_id", "description", "value"), row))
+                    for row in db_res)
+                return tuple(
+                    {tissue_name: tuple(
+                        {
+                            "value": item["value"],
+                            "description": item["description"]
+                         } for item in databases
+                        if item["tissue_id"] == tissue_id)}
+                    for tissue_id, tissue_name in tissue_res)
+
+    return tuple()
+
+def target_databases(conn, step, trait, threshold):
+    """
+    Retrieves the names of possible target databases from the database.
+    """
+    if step != "select-corr-method":
+        return None
+
+    trait_info = retrieve_trait_info(
+        threshold, f"{trait['dataset']}::{trait['name']}", conn)
+    group = trait_info["group"]
+    return (
+        publish_target_dabases(conn, group, threshold) +
+        geno_target_databases(conn, group, threshold) +
+        probeset_target_databases(conn, group, threshold))
+
+def pcorrelations(conn, values):
+    if values["step"] != "run-correlation":
+        return None
+
+    def trait_fullname(trait):
+        return f"{trait['dataset']}::{trait['name']}"
+
+    return partial_correlations_entry(
+        conn, trait_fullname(values["primary_trait"]),
+        tuple(trait_fullname(trait) for trait in values["control_traits"]),
+        values["method"], values["criteria"], values["target_db"])
+
+@app.route("/partial_correlations", methods=("POST",))
+def partial_correlations():
+    form = request.form
+    traits_list = tuple(filter(
+        lambda trt: trt is not None,
+        (parse_trait(tstr) for tstr in form.get("traits_list", "").split("|||"))))
+
+    args_dict = dict(zip(
+        ("step", "primary_trait", "control_traits", "target_traits",
+         "traits_list", "method"),
+        process_step(
+            form.get("step", None),
+            parse_trait(form.get("primary_trait", "")),
+            sequence_of_traits(
+                form.getlist("control_traits[]") or
+                form.get("control_traits", "").split("|||")),
+            sequence_of_traits(
+                form.getlist("target_traits[]") or
+                form.get("target_traits", "").split("|||")),
+            sequence_of_traits(form.get("traits_list", "").split("|||")),
+            form.get("method"))))
+
+    conn = MySQLdb.Connect(
+        db=current_app.config.get("DB_NAME"),
+        user=current_app.config.get("DB_USER"),
+        passwd=current_app.config.get("DB_PASS"),
+        host=current_app.config.get("DB_HOST"))
+    target_dbs = target_databases(
+        conn, args_dict["step"], args_dict["primary_trait"], 0)
+
+    if args_dict["step"] == "run-correlation":
+        args_dict = {
+            **args_dict, "target_db": form.get("target_db"),
+            "criteria": int(form.get("criteria", 500))}
+
+    corr_results = pcorrelations(conn, args_dict)
+
+    return render_template(
+        "partial_correlations.html", **args_dict, target_dbs=target_dbs,
+        corr_results=corr_results)
diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html
index 0ded66a6..f4270b67 100644
--- a/wqflask/wqflask/templates/collections/view.html
+++ b/wqflask/wqflask/templates/collections/view.html
@@ -34,6 +34,23 @@
           </form>
         </div>
 
+	<form id="partial-correlations-form"
+	      method="POST"
+	      action="{{url_for('partial_correlations')}}">
+	  <input type="hidden"
+		 id="collection_uuid"
+		 value="{{uc.uc_id}}" />
+	  <input type="hidden"
+		 name="traits_list"
+		 value="{% for this_trait in trait_obs %}{{ this_trait.name }}:::{{ this_trait.dataset.name }}:::{{this_trait.symbol}}:::{{this_trait.description_display}}:::{{data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name))}}|||{% endfor %}" />
+	  <button id="run-partial-correlations"
+		  class="btn btn-primary"
+		  title="Run partial correlations"
+		  type="submit">
+	    Partial Correlations
+	  </button>
+	</form>
+
         <div>
             <br />
 	    <form id="heatmaps_form">
diff --git a/wqflask/wqflask/templates/partial_correlations.html b/wqflask/wqflask/templates/partial_correlations.html
new file mode 100644
index 00000000..b61f7fc4
--- /dev/null
+++ b/wqflask/wqflask/templates/partial_correlations.html
@@ -0,0 +1,199 @@
+{%extends "base.html"%}
+
+{%block content%}
+<div class="container">
+  <form id="partial-correlations-form"
+	method="POST"
+	action="{{url_for('partial_correlations')}}">
+    {%with messages = get_flashed_messages(with_categories=true)%}
+    {%if messages:%}
+    <ul class=flashes>
+      {%for category, message in messages:%}
+      <li class="{{category}}">{{message}}</li>
+      {%endfor%}
+    </ul>
+    {%endif%}
+    {%endwith%}
+
+    <input type="hidden" name="step" id="step-indicator" value="{{step}}" />
+    <input type="hidden"
+	   name="traits_list"
+	   value="{% for trait in traits_list %}{{trait['name']}}:::{{trait['dataset']}}:::{{trait['symbol']}}:::{{trait['description']}}:::{{trait['data_hmac']}}|||{% endfor %}">
+
+    {%if primary_trait:%}
+    <input type="hidden"
+	   name="primary_trait"
+	   value="{{primary_trait['name']}}:::{{primary_trait['dataset']}}:::{{primary_trait['symbol']}}:::{{primary_trait['description']}}:::{{primary_trait['data_hmac']}}"
+	   id="trait_{{primary_trait['data_hmac']}}" />
+    <p>
+      Primary Trait: {{primary_trait["name"]}}&nbsp;-&nbsp;
+      {{primary_trait["symbol"]}}&nbsp;-&nbsp;{{primary_trait["description"]}}
+    </p>
+    {%endif%}
+
+    {%if control_traits:%}
+    <input type="hidden"
+	   name="control_traits"
+	   value="{%for trait in control_traits:%}{{trait['name']}}:::{{trait['dataset']}}:::{{trait['symbol']}}:::{{trait['description']}}:::{{trait['data_hmac']}}|||{%endfor%}" />
+    <p>
+      Control Traits:
+      <ul>
+	{%for trait in control_traits:%}
+	<li>
+	  {{trait["name"]}}&nbsp;-&nbsp;{{trait["symbol"]}}&nbsp;-&nbsp;
+	  {{trait["description"]}}</li>
+	{%endfor%}
+      </ul>
+    </p>
+    {%endif%}
+
+    {%if target_traits:%}
+    <input type="hidden"
+	   name="target_traits"
+	   value="{%for trait in target_traits:%}{{trait['name']}}:::{{trait['dataset']}}:::{{trait['symbol']}}:::{{trait['description']}}:::{{trait['data_hmac']}},{%endfor%}" />
+        <p>
+      Target Traits:
+      <ul>
+	{%for trait in target_traits:%}
+	<li>
+	  {{trait["name"]}}&nbsp;-&nbsp;{{trait["symbol"]}}&nbsp;-&nbsp;
+	  {{trait["description"]}}</li>
+	{%endfor%}
+      </ul>
+    </p>
+    {%endif%}
+
+
+
+    {%if step == "select-primary":%}
+    <p>Please select the primary trait (X)</p>
+    {%for trait in traits_list:%}
+    <div class="form-group">
+      <input type="radio"
+	     name="primary_trait"
+	     value="{{trait['name']}}:::{{trait['dataset']}}:::{{trait['symbol']}}:::{{trait['description']}}:::{{trait['data_hmac']}}"
+	     id="trait_{{trait['data_hmac']}}" />
+      <label for="trait_{{trait['data_hmac']}}">
+	{{trait["name"]}} - {{trait["symbol"]}} - {{trait["description"]}}
+      </label>
+    </div>
+    {%endfor%}
+    <button type="submit" class="btn btn-primary">
+      Next: Select Control Traits
+    </button>
+    {%endif%}
+
+
+
+    {%if step == "select-controls":%}
+
+    <p>Select a maximum of three (3) control traits (Z)</p>
+    {%for trait in traits_list:%}
+    <div class="form-group">
+      <input type="checkbox"
+	     name="control_traits[]"
+	     value="{{trait['name']}}:::{{trait['dataset']}}:::{{trait['symbol']}}:::{{trait['description']}}:::{{trait['data_hmac']}}"
+	     id="trait_{{trait['data_hmac']}}" />
+      <label for="trait_{{trait['data_hmac']}}">
+	{{trait["name"]}} - {{trait["symbol"]}} - {{trait["description"]}}
+      </label>
+    </div>
+    {%endfor%}
+    <button type="submit" class="btn btn-primary">
+      Next: Select Target Traits
+    </button>
+    {%endif%}
+
+
+
+    {%if step == "select-targets":%}
+    <p>Select at least one target trait (Y)</p>
+    {%for trait in traits_list:%}
+    <div class="form-group">
+      <input type="checkbox"
+	     name="target_traits[]"
+	     value="{{trait['name']}}:::{{trait['dataset']}}:::{{trait['symbol']}}:::{{trait['description']}}:::{{trait['data_hmac']}}"
+	     checked="checked"
+	     id="trait_{{trait['data_hmac']}}" />
+      <label for="trait_{{trait['data_hmac']}}">
+	{{trait["name"]}} - {{trait["symbol"]}} - {{trait["description"]}}
+      </label>
+    </div>
+    {%endfor%}
+    <button type="submit" class="btn btn-primary">
+      Next: Select Correlation Method
+    </button>
+    {%endif%}
+
+
+
+    {%if step == "select-corr-method":%}
+    <div class="form-group">
+      <label for="target-db-input">Choose Database</label>
+      <select id="target-db-input" required="required" name="target_db">
+	{%if target_dbs:%}
+	{%for item in target_dbs:%}
+	{%if "description" in item.keys():%}
+        <option value="{{item['value']}}">{{item['description']}}</option>
+	{%else:%}
+	{%for group, opts in item.items()%}
+	{%if opts | length > 0:%}
+	<optgroup label="{{group}} ------">
+	  {%for item2 in opts:%}
+	  <option value="{{item2['value']}}">{{item2['description']}}</option>
+	  {%endfor%}
+	</optgroup>
+	{%endif%}
+	{%endfor%}
+	{%endif%}
+	{%endfor%}
+	{%endif%}
+      </select>
+    </div>
+
+    <div class="form-group">
+      <label for="corr-method-input">Compute</label>
+      <select id="corr-method-input" required="required" name="method">
+	<option value="Genetic Correlation, Pearson's r">
+	  Genetic Correlation, Pearson's r</option>
+	<option value="Genetic Correlation, Spearman's rho">
+	  Genetic Correlation, Spearman's rho</option>
+	<option value="SGO Literature Correlation">
+	  SGO Literature Correlation</option>
+	<option value="Tissue Correlation, Pearson's r">
+	  Tissue Correlation, Pearson's r</option>
+	<option value="Tissue Correlation, Spearman's rho">
+	  Tissue Correlation, Spearman's rho</option>
+      </select>
+    </div>
+
+    <div class="form-group">
+      <label for="criteria-input">Return</label>
+      <select id="criteria-input" required="required" name="criteria" size="1">
+	<option value="100">top 100</option>
+	<option value="200">top 200</option>
+	<option value="500" selected="selected">top 500</option>
+	<option value="1000">top 1000</option>
+	<option value="2000">top 2000</option>
+	<option value="5000">top 5000</option>
+	<option value="10000">top 10000</option>
+	<option value="15000">top 15000</option>
+	<option value="20000">top 20000</option>
+      </select>
+    </div>
+
+    <button type="submit" class="btn btn-primary">
+      Run Partial Correlation
+    </button>
+    {%endif%}
+
+    {%if step == "run-correlation":%}
+    <input type="hidden" name="selected_method" value="{{method}}" />
+    <input type="hidden" name="selected_target_db" value="{{target_db}}" />
+    <input type="hidden" name="selected_criteria" value="{{criteria}}" />
+
+    {{corr_results}}
+    {%endif%}
+  </form>
+</div>
+{%endblock%}