aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2019-03-18 17:03:30 -0500
committerzsloan2019-03-18 17:03:30 -0500
commitd515061c4878b448f5b866e32eee7e37301ecdb7 (patch)
treef88efd6fb87044065d68ff1602a8d57692de5f1f
parentf8f0a2aecd14b7a45172d67d7eac3c9c2ac3618f (diff)
downloadgenenetwork2-d515061c4878b448f5b866e32eee7e37301ecdb7.tar.gz
- Added PCA traits to correlation matrix. You can't change their names yet or add them to a collection from the corr matrix page,
but you can click them and access them from the trait page. I'll add the option to access them from the corr matrix page as well, but adding the option to change their names might be trickier since they're currently used as their Redis keys. I need some better way of passing the Redis key around so it can be stored in collections, but this is tricky without changing the structure in ElasticSearch - Fixed the way temp traits work so you can use them with various functions, like mapping, correlations, network graph, etc - Fixed some appearance issue where the network graph options were too wide if a trait name was too long
-rw-r--r--wqflask/base/data_set.py3
-rw-r--r--wqflask/base/trait.py40
-rw-r--r--wqflask/utility/helper_functions.py5
-rw-r--r--wqflask/wqflask/collect.py5
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py2
-rw-r--r--wqflask/wqflask/correlation_matrix/show_corr_matrix.py37
-rw-r--r--wqflask/wqflask/marker_regression/display_mapping_results.py11
-rw-r--r--wqflask/wqflask/templates/correlation_matrix.html7
-rw-r--r--wqflask/wqflask/templates/network_graph.html15
9 files changed, 95 insertions, 30 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index ca6621e9..beb2a8a2 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -277,7 +277,6 @@ class DatasetGroup(object):
"""
def __init__(self, dataset, name=None):
"""This sets self.group and self.group_id"""
- #logger.debug("DATASET NAME2:", dataset.name)
if name == None:
self.name, self.id, self.genetic_type = fetchone(dataset.query_for_group)
else:
@@ -500,7 +499,7 @@ class DataSet(object):
self.setup()
if self.type == "Temp": #Need to supply group name as input if temp trait
- self.group = DatasetGroup(self, group_name) # sets self.group and self.group_id and gets genotype
+ self.group = DatasetGroup(self, name=group_name) # sets self.group and self.group_id and gets genotype
else:
self.check_confidentiality()
self.retrieve_other_names()
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 79aa196f..0689e950 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -6,6 +6,9 @@ import codecs
from htmlgen import HTMLgen2 as HT
+import redis
+Redis = redis.StrictRedis()
+
from base import webqtlConfig
from base.webqtlCaseData import webqtlCaseData
from base.data_set import create_dataset
@@ -35,13 +38,15 @@ class GeneralTrait(object):
def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
# xor assertion
assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name";
+ self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
if kw.get('dataset_name'):
- self.dataset = create_dataset(kw.get('dataset_name'))
- #print(" in GeneralTrait created dataset:", self.dataset)
+ if kw.get('dataset_name') == "Temp":
+ temp_group = self.name.split("_")[2]
+ self.dataset = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group)
+ else:
+ self.dataset = create_dataset(kw.get('dataset_name'))
else:
self.dataset = kw.get('dataset')
- self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
- #print("THE NAME IS:", self.name)
self.cellid = kw.get('cellid')
self.identification = kw.get('identification', 'un-named trait')
self.haveinfo = kw.get('haveinfo', False)
@@ -73,8 +78,8 @@ class GeneralTrait(object):
# So we could add a simple if statement to short-circuit this if necessary
if self.dataset.type != "Temp":
self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info)
- if get_sample_info != False:
- self = retrieve_sample_data(self, self.dataset)
+ if get_sample_info != False:
+ self = retrieve_sample_data(self, self.dataset)
def export_informative(self, include_variance=0):
"""
@@ -154,18 +159,27 @@ def retrieve_sample_data(trait, dataset, samplelist=None):
if samplelist == None:
samplelist = []
- results = dataset.retrieve_sample_data(trait.name)
+ if dataset.type == "Temp":
+ results = Redis.get(trait.name).split()
+ else:
+ results = dataset.retrieve_sample_data(trait.name)
# Todo: is this necessary? If not remove
trait.data.clear()
- all_samples_ordered = dataset.group.all_samples_ordered()
-
if results:
- for item in results:
- name, value, variance, num_cases, name2 = item
- if not samplelist or (samplelist and name in samplelist):
- trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
+ if dataset.type == "Temp":
+ all_samples_ordered = dataset.group.all_samples_ordered()
+ for i, item in enumerate(results):
+ try:
+ trait.data[all_samples_ordered[i]] = webqtlCaseData(all_samples_ordered[i], float(item))
+ except:
+ pass
+ else:
+ for item in results:
+ name, value, variance, num_cases, name2 = item
+ if not samplelist or (samplelist and name in samplelist):
+ trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
return trait
@app.route("/trait/get_sample_data")
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 1c8dad10..6980af4e 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -45,7 +45,10 @@ def get_trait_db_obs(self, trait_db_list):
data = data.strip()
assert hmac==user_manager.actual_hmac_creation(data), "Data tampering?"
trait_name, dataset_name = data.split(":")
- dataset_ob = data_set.create_dataset(dataset_name)
+ if dataset_name == "Temp":
+ dataset_ob = data_set.create_dataset(dataset_name=dataset_name, dataset_type="Temp", group_name=trait_name.split("_")[2])
+ else:
+ dataset_ob = data_set.create_dataset(dataset_name)
trait_ob = GeneralTrait(dataset=dataset_ob,
name=trait_name,
cellid=None)
diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py
index eb0e2726..6e1ac592 100644
--- a/wqflask/wqflask/collect.py
+++ b/wqflask/wqflask/collect.py
@@ -233,7 +233,10 @@ def collections_new():
collection_id = params['existing_collection'].split(":")[0]
collection_name = params['existing_collection'].split(":")[1]
if g.user_session.logged_in:
- unprocessed_traits = Redis.get(params['hash'])
+ if "hash" in params:
+ unprocessed_traits = Redis.get(params['hash'])
+ else:
+ unprocessed_traits = params['traits']
traits = list(process_traits(unprocessed_traits))
g.user_session.add_traits_to_collection(collection_id, traits)
return redirect(url_for('view_collection', uc_id=collection_id))
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index c15c3579..0c6b8a2b 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -93,7 +93,7 @@ class CorrelationResults(object):
with Bench("Doing correlations"):
if start_vars['dataset'] == "Temp":
self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
- self.trait_id = "Temp"
+ self.trait_id = start_vars['trait_id']
self.this_trait = GeneralTrait(dataset=self.dataset,
name=self.trait_id,
cellid=None)
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index 4bb4d65d..007e8e47 100644
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -26,6 +26,7 @@ import sys
import string
import cPickle
import os
+import datetime
import time
import pp
import math
@@ -45,6 +46,9 @@ from pprint import pformat as pf
from htmlgen import HTMLgen2 as HT
import reaper
+import redis
+Redis = redis.StrictRedis()
+
from utility.THCell import THCell
from utility.TDCell import TDCell
from base.trait import GeneralTrait
@@ -59,8 +63,10 @@ from MySQLdb import escape_string as escape
from pprint import pformat as pf
-from flask import Flask, g
+from flask import Flask, g, url_for
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
class CorrelationMatrix(object):
@@ -111,6 +117,7 @@ class CorrelationMatrix(object):
self.corr_results = []
self.pca_corr_results = []
self.trait_data_array = []
+ self.shared_samples_list = self.all_sample_list
for trait_db in self.trait_list:
this_trait = trait_db[0]
this_db = trait_db[1]
@@ -138,6 +145,8 @@ class CorrelationMatrix(object):
target_vals = []
for index, sample in enumerate(target_samples):
if (sample in this_sample_data) and (sample in target_sample_data):
+ if sample not in self.shared_samples_list:
+ self.shared_samples_list.remove(sample)
sample_value = this_sample_data[sample].value
target_sample_value = target_sample_data[sample].value
this_trait_vals.append(sample_value)
@@ -174,6 +183,7 @@ class CorrelationMatrix(object):
try:
self.pca_works = "True"
+ self.pca_trait_ids = []
pca = self.calculate_pca(range(len(self.traits)), corr_eigen_value, corr_eigen_vectors)
self.loadings_array = self.process_loadings()
except:
@@ -201,9 +211,6 @@ class CorrelationMatrix(object):
cellid=None)
self.trait_list.append((trait_ob, dataset_ob))
- #print("trait_list:", self.trait_list)
-
-
def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors):
base = importr('base')
stats = importr('stats')
@@ -224,9 +231,27 @@ class CorrelationMatrix(object):
pca_traits = []
for i, vector in enumerate(trait_array_vectors):
if corr_eigen_value[i-1] < 100.0/len(self.trait_list):
- pca_traits.append(vector*-1.0)
+ pca_traits.append((vector*-1.0).tolist())
+
+ this_group_name = self.trait_list[0][1].group.name
+ temp_dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = this_group_name)
+ temp_dataset.group.get_samplelist()
+ for i, pca_trait in enumerate(pca_traits):
+ trait_id = "PCA" + str(i+1) + "_" + temp_dataset.group.species + "_" + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
+ this_vals_string = ""
+ position = 0
+ for sample in temp_dataset.group.all_samples_ordered():
+ if sample in self.shared_samples_list:
+ this_vals_string += str(pca_trait[position])
+ this_vals_string += " "
+ position += 1
+ else:
+ this_vals_string += "x "
+ this_vals_string = this_vals_string[:-1]
+
+ Redis.set(trait_id, this_vals_string)
+ self.pca_trait_ids.append(trait_id)
- print("pca_traits:", pca_traits)
return pca
def process_loadings(self):
diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py
index 41d5c9d0..39e0d712 100644
--- a/wqflask/wqflask/marker_regression/display_mapping_results.py
+++ b/wqflask/wqflask/marker_regression/display_mapping_results.py
@@ -932,10 +932,13 @@ class DisplayMappingResults(object):
if self.this_trait.symbol:
identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.symbol)
- elif self.this_trait.post_publication_abbreviation:
- identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation)
- elif self.this_trait.pre_publication_abbreviation:
- identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation)
+ elif self.dataset.type == "Publish":
+ if self.this_trait.post_publication_abbreviation:
+ identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation)
+ elif self.this_trait.pre_publication_abbreviation:
+ identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation)
+ else:
+ identification += "Trait: %s" % (self.this_trait.name)
else:
identification += "Trait: %s" % (self.this_trait.name)
identification += " with %s samples" % (self.n_samples)
diff --git a/wqflask/wqflask/templates/correlation_matrix.html b/wqflask/wqflask/templates/correlation_matrix.html
index 8698b710..9c790780 100644
--- a/wqflask/wqflask/templates/correlation_matrix.html
+++ b/wqflask/wqflask/templates/correlation_matrix.html
@@ -60,6 +60,13 @@
{% if pca_works == "True" %}
<br>
<br>
+<h2>PCA Traits</h2>
+<ul>
+ {% for this_trait_id in pca_trait_ids %}
+ <li><a href="{{ url_for('show_trait_page', trait_id = pca_trait_ids[loop.index - 1], dataset = "Temp") }}">{{ pca_trait_ids[loop.index - 1] }}</a></li>
+ {% endfor %}
+</ul>
+<br>
<h2>Factor Loadings Plot</h2>
<div id="loadings_plot" style="margin-top: 20px; margin-bottom: 20px; width: 980px; border-style: solid; border-width: 1px;"></div>
<h2>Factor Loadings Table</h2>
diff --git a/wqflask/wqflask/templates/network_graph.html b/wqflask/wqflask/templates/network_graph.html
index 6bee11e7..4492dd3f 100644
--- a/wqflask/wqflask/templates/network_graph.html
+++ b/wqflask/wqflask/templates/network_graph.html
@@ -34,7 +34,11 @@
<select name="focus_select">
<option disabled selected value>Select Trait</option>
{% for trait in traits %}
+ {% if trait.symbol == None %}
+ <option value="{{ trait.name }}:{{ trait.dataset.name }}">{{ trait.name }}</option>
+ {% else %}
<option value="{{ trait.name }}:{{ trait.dataset.name }}">{{ trait.symbol }} ({{ trait.name }})</option>
+ {% endif %}
{% endfor %}
</select>
</td>
@@ -46,11 +50,18 @@
</tr>
<tr>
<td colspan="1">
+ <div style="text-align: center;">
+ <div style="float: left;"><font size="2"><b>-1</b></font></div>
+ <font size="2"><b>0</b></font>
+ <div style="float: right;"><font size="2"><b>1</b></font></div>
+ </div>
+ <!--
<font size="2"><b>-1 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;0&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 1</b></font><br>
- <input type="range" id="neg_slide" min="-1" max="0" value="0" step="0.001" list="corr_range" style="display: inline; width: 45%">
- <input type="range" id="pos_slide" min="0" max="1" value="0" step="0.001" list="corr_range" style="display: inline; width: 45%">
+ -->
+ <input type="range" id="neg_slide" min="-1" max="0" value="0" step="0.001" list="corr_range" style="display: inline; width: 49%">
+ <input type="range" id="pos_slide" min="0" max="1" value="0" step="0.001" list="corr_range" style="display: inline; width: 49%">
</td>
</tr>
<tr>