diff options
author | zsloan | 2019-03-18 17:03:30 -0500 |
---|---|---|
committer | zsloan | 2019-03-18 17:03:30 -0500 |
commit | d515061c4878b448f5b866e32eee7e37301ecdb7 (patch) | |
tree | f88efd6fb87044065d68ff1602a8d57692de5f1f /wqflask | |
parent | f8f0a2aecd14b7a45172d67d7eac3c9c2ac3618f (diff) | |
download | genenetwork2-d515061c4878b448f5b866e32eee7e37301ecdb7.tar.gz |
- Added PCA traits to correlation matrix. You can't change their names yet or add them to a collection from the corr matrix page,
but you can click them and access them from the trait page. I'll add the option to access them from the corr matrix page as well,
but adding the option to change their names might be trickier since they're currently used as their Redis keys. I need some better
way of passing the Redis key around so it can be stored in collections, but this is tricky without changing the structure in ElasticSearch
- Fixed the way temp traits work so you can use them with various functions, like mapping, correlations, network graph, etc
- Fixed some appearance issue where the network graph options were too wide if a trait name was too long
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/base/data_set.py | 3 | ||||
-rw-r--r-- | wqflask/base/trait.py | 40 | ||||
-rw-r--r-- | wqflask/utility/helper_functions.py | 5 | ||||
-rw-r--r-- | wqflask/wqflask/collect.py | 5 | ||||
-rw-r--r-- | wqflask/wqflask/correlation/show_corr_results.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/correlation_matrix/show_corr_matrix.py | 37 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/display_mapping_results.py | 11 | ||||
-rw-r--r-- | wqflask/wqflask/templates/correlation_matrix.html | 7 | ||||
-rw-r--r-- | wqflask/wqflask/templates/network_graph.html | 15 |
9 files changed, 95 insertions, 30 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index ca6621e9..beb2a8a2 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -277,7 +277,6 @@ class DatasetGroup(object): """ def __init__(self, dataset, name=None): """This sets self.group and self.group_id""" - #logger.debug("DATASET NAME2:", dataset.name) if name == None: self.name, self.id, self.genetic_type = fetchone(dataset.query_for_group) else: @@ -500,7 +499,7 @@ class DataSet(object): self.setup() if self.type == "Temp": #Need to supply group name as input if temp trait - self.group = DatasetGroup(self, group_name) # sets self.group and self.group_id and gets genotype + self.group = DatasetGroup(self, name=group_name) # sets self.group and self.group_id and gets genotype else: self.check_confidentiality() self.retrieve_other_names() diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 79aa196f..0689e950 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -6,6 +6,9 @@ import codecs from htmlgen import HTMLgen2 as HT +import redis +Redis = redis.StrictRedis() + from base import webqtlConfig from base.webqtlCaseData import webqtlCaseData from base.data_set import create_dataset @@ -35,13 +38,15 @@ class GeneralTrait(object): def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; + self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. if kw.get('dataset_name'): - self.dataset = create_dataset(kw.get('dataset_name')) - #print(" in GeneralTrait created dataset:", self.dataset) + if kw.get('dataset_name') == "Temp": + temp_group = self.name.split("_")[2] + self.dataset = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group) + else: + self.dataset = create_dataset(kw.get('dataset_name')) else: self.dataset = kw.get('dataset') - self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. - #print("THE NAME IS:", self.name) self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) @@ -73,8 +78,8 @@ class GeneralTrait(object): # So we could add a simple if statement to short-circuit this if necessary if self.dataset.type != "Temp": self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info) - if get_sample_info != False: - self = retrieve_sample_data(self, self.dataset) + if get_sample_info != False: + self = retrieve_sample_data(self, self.dataset) def export_informative(self, include_variance=0): """ @@ -154,18 +159,27 @@ def retrieve_sample_data(trait, dataset, samplelist=None): if samplelist == None: samplelist = [] - results = dataset.retrieve_sample_data(trait.name) + if dataset.type == "Temp": + results = Redis.get(trait.name).split() + else: + results = dataset.retrieve_sample_data(trait.name) # Todo: is this necessary? If not remove trait.data.clear() - all_samples_ordered = dataset.group.all_samples_ordered() - if results: - for item in results: - name, value, variance, num_cases, name2 = item - if not samplelist or (samplelist and name in samplelist): - trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases) + if dataset.type == "Temp": + all_samples_ordered = dataset.group.all_samples_ordered() + for i, item in enumerate(results): + try: + trait.data[all_samples_ordered[i]] = webqtlCaseData(all_samples_ordered[i], float(item)) + except: + pass + else: + for item in results: + name, value, variance, num_cases, name2 = item + if not samplelist or (samplelist and name in samplelist): + trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases) return trait @app.route("/trait/get_sample_data") diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 1c8dad10..6980af4e 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -45,7 +45,10 @@ def get_trait_db_obs(self, trait_db_list): data = data.strip() assert hmac==user_manager.actual_hmac_creation(data), "Data tampering?" trait_name, dataset_name = data.split(":") - dataset_ob = data_set.create_dataset(dataset_name) + if dataset_name == "Temp": + dataset_ob = data_set.create_dataset(dataset_name=dataset_name, dataset_type="Temp", group_name=trait_name.split("_")[2]) + else: + dataset_ob = data_set.create_dataset(dataset_name) trait_ob = GeneralTrait(dataset=dataset_ob, name=trait_name, cellid=None) diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py index eb0e2726..6e1ac592 100644 --- a/wqflask/wqflask/collect.py +++ b/wqflask/wqflask/collect.py @@ -233,7 +233,10 @@ def collections_new(): collection_id = params['existing_collection'].split(":")[0] collection_name = params['existing_collection'].split(":")[1] if g.user_session.logged_in: - unprocessed_traits = Redis.get(params['hash']) + if "hash" in params: + unprocessed_traits = Redis.get(params['hash']) + else: + unprocessed_traits = params['traits'] traits = list(process_traits(unprocessed_traits)) g.user_session.add_traits_to_collection(collection_id, traits) return redirect(url_for('view_collection', uc_id=collection_id)) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index c15c3579..0c6b8a2b 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -93,7 +93,7 @@ class CorrelationResults(object): with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) - self.trait_id = "Temp" + self.trait_id = start_vars['trait_id'] self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 4bb4d65d..007e8e47 100644 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -26,6 +26,7 @@ import sys import string import cPickle import os +import datetime import time import pp import math @@ -45,6 +46,9 @@ from pprint import pformat as pf from htmlgen import HTMLgen2 as HT import reaper +import redis +Redis = redis.StrictRedis() + from utility.THCell import THCell from utility.TDCell import TDCell from base.trait import GeneralTrait @@ -59,8 +63,10 @@ from MySQLdb import escape_string as escape from pprint import pformat as pf -from flask import Flask, g +from flask import Flask, g, url_for +import utility.logger +logger = utility.logger.getLogger(__name__ ) class CorrelationMatrix(object): @@ -111,6 +117,7 @@ class CorrelationMatrix(object): self.corr_results = [] self.pca_corr_results = [] self.trait_data_array = [] + self.shared_samples_list = self.all_sample_list for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] @@ -138,6 +145,8 @@ class CorrelationMatrix(object): target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): + if sample not in self.shared_samples_list: + self.shared_samples_list.remove(sample) sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) @@ -174,6 +183,7 @@ class CorrelationMatrix(object): try: self.pca_works = "True" + self.pca_trait_ids = [] pca = self.calculate_pca(range(len(self.traits)), corr_eigen_value, corr_eigen_vectors) self.loadings_array = self.process_loadings() except: @@ -201,9 +211,6 @@ class CorrelationMatrix(object): cellid=None) self.trait_list.append((trait_ob, dataset_ob)) - #print("trait_list:", self.trait_list) - - def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors): base = importr('base') stats = importr('stats') @@ -224,9 +231,27 @@ class CorrelationMatrix(object): pca_traits = [] for i, vector in enumerate(trait_array_vectors): if corr_eigen_value[i-1] < 100.0/len(self.trait_list): - pca_traits.append(vector*-1.0) + pca_traits.append((vector*-1.0).tolist()) + + this_group_name = self.trait_list[0][1].group.name + temp_dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = this_group_name) + temp_dataset.group.get_samplelist() + for i, pca_trait in enumerate(pca_traits): + trait_id = "PCA" + str(i+1) + "_" + temp_dataset.group.species + "_" + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") + this_vals_string = "" + position = 0 + for sample in temp_dataset.group.all_samples_ordered(): + if sample in self.shared_samples_list: + this_vals_string += str(pca_trait[position]) + this_vals_string += " " + position += 1 + else: + this_vals_string += "x " + this_vals_string = this_vals_string[:-1] + + Redis.set(trait_id, this_vals_string) + self.pca_trait_ids.append(trait_id) - print("pca_traits:", pca_traits) return pca def process_loadings(self): diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py index 41d5c9d0..39e0d712 100644 --- a/wqflask/wqflask/marker_regression/display_mapping_results.py +++ b/wqflask/wqflask/marker_regression/display_mapping_results.py @@ -932,10 +932,13 @@ class DisplayMappingResults(object): if self.this_trait.symbol: identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.symbol) - elif self.this_trait.post_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation) - elif self.this_trait.pre_publication_abbreviation: - identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation) + elif self.dataset.type == "Publish": + if self.this_trait.post_publication_abbreviation: + identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation) + elif self.this_trait.pre_publication_abbreviation: + identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation) + else: + identification += "Trait: %s" % (self.this_trait.name) else: identification += "Trait: %s" % (self.this_trait.name) identification += " with %s samples" % (self.n_samples) diff --git a/wqflask/wqflask/templates/correlation_matrix.html b/wqflask/wqflask/templates/correlation_matrix.html index 8698b710..9c790780 100644 --- a/wqflask/wqflask/templates/correlation_matrix.html +++ b/wqflask/wqflask/templates/correlation_matrix.html @@ -60,6 +60,13 @@ {% if pca_works == "True" %} <br> <br> +<h2>PCA Traits</h2> +<ul> + {% for this_trait_id in pca_trait_ids %} + <li><a href="{{ url_for('show_trait_page', trait_id = pca_trait_ids[loop.index - 1], dataset = "Temp") }}">{{ pca_trait_ids[loop.index - 1] }}</a></li> + {% endfor %} +</ul> +<br> <h2>Factor Loadings Plot</h2> <div id="loadings_plot" style="margin-top: 20px; margin-bottom: 20px; width: 980px; border-style: solid; border-width: 1px;"></div> <h2>Factor Loadings Table</h2> diff --git a/wqflask/wqflask/templates/network_graph.html b/wqflask/wqflask/templates/network_graph.html index 6bee11e7..4492dd3f 100644 --- a/wqflask/wqflask/templates/network_graph.html +++ b/wqflask/wqflask/templates/network_graph.html @@ -34,7 +34,11 @@ <select name="focus_select"> <option disabled selected value>Select Trait</option> {% for trait in traits %} + {% if trait.symbol == None %} + <option value="{{ trait.name }}:{{ trait.dataset.name }}">{{ trait.name }}</option> + {% else %} <option value="{{ trait.name }}:{{ trait.dataset.name }}">{{ trait.symbol }} ({{ trait.name }})</option> + {% endif %} {% endfor %} </select> </td> @@ -46,11 +50,18 @@ </tr> <tr> <td colspan="1"> + <div style="text-align: center;"> + <div style="float: left;"><font size="2"><b>-1</b></font></div> + <font size="2"><b>0</b></font> + <div style="float: right;"><font size="2"><b>1</b></font></div> + </div> + <!-- <font size="2"><b>-1 0 1</b></font><br> - <input type="range" id="neg_slide" min="-1" max="0" value="0" step="0.001" list="corr_range" style="display: inline; width: 45%"> - <input type="range" id="pos_slide" min="0" max="1" value="0" step="0.001" list="corr_range" style="display: inline; width: 45%"> + --> + <input type="range" id="neg_slide" min="-1" max="0" value="0" step="0.001" list="corr_range" style="display: inline; width: 49%"> + <input type="range" id="pos_slide" min="0" max="1" value="0" step="0.001" list="corr_range" style="display: inline; width: 49%"> </td> </tr> <tr> |