From a3365dae23f204e489939d3defc55edc1b4872d8 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 1 Oct 2018 16:09:47 +0000
Subject: - Can now remove cofactors from correlation scatterplot and select
them by just clicking their row in collection
- Cofactor color picker now works in Safari/Macs
- Displays N for relevant samples in trait page sample table
- Don't show bar chart when N>256
- Mapping loading page contents better centered
- Anonymous collections timeout correctly listed as 30 days now
- Minor allele frequency can actually be changed for GEMMA now (previously didn't work)
- Fixed transcript position marker location for mapping results
- Notifies user if their e-mail isn't associated with an account when they attempt to request forgotten password
- Users can now map with submitted traits
- Histogram width changes depending upon number of bins (need to improve this still)
- Improved Q-q plot (previously called "probability plot")
---
wqflask/utility/helper_functions.py | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
(limited to 'wqflask/utility/helper_functions.py')
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index cf16879f..1c8dad10 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -14,7 +14,13 @@ logger = logging.getLogger(__name__ )
def get_species_dataset_trait(self, start_vars):
#assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
- self.dataset = data_set.create_dataset(start_vars['dataset'])
+ if "temp_trait" in start_vars.keys():
+ if start_vars['temp_trait'] == "True":
+ self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
+ else:
+ self.dataset = data_set.create_dataset(start_vars['dataset'])
+ else:
+ self.dataset = data_set.create_dataset(start_vars['dataset'])
logger.debug("After creating dataset")
self.species = TheSpecies(dataset=self.dataset)
logger.debug("After creating species")
--
cgit v1.2.3
From d515061c4878b448f5b866e32eee7e37301ecdb7 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 18 Mar 2019 17:03:30 -0500
Subject: - Added PCA traits to correlation matrix. You can't change their
names yet or add them to a collection from the corr matrix page, but you can
click them and access them from the trait page. I'll add the option to access
them from the corr matrix page as well, but adding the option to change their
names might be trickier since they're currently used as their Redis keys. I
need some better way of passing the Redis key around so it can be stored in
collections, but this is tricky without changing the structure in
ElasticSearch
- Fixed the way temp traits work so you can use them with various functions, like mapping, correlations, network graph, etc
- Fixed some appearance issue where the network graph options were too wide if a trait name was too long
---
wqflask/base/data_set.py | 3 +-
wqflask/base/trait.py | 40 +++++++++++++++-------
wqflask/utility/helper_functions.py | 5 ++-
wqflask/wqflask/collect.py | 5 ++-
wqflask/wqflask/correlation/show_corr_results.py | 2 +-
.../wqflask/correlation_matrix/show_corr_matrix.py | 37 ++++++++++++++++----
.../marker_regression/display_mapping_results.py | 11 +++---
wqflask/wqflask/templates/correlation_matrix.html | 7 ++++
wqflask/wqflask/templates/network_graph.html | 15 ++++++--
9 files changed, 95 insertions(+), 30 deletions(-)
(limited to 'wqflask/utility/helper_functions.py')
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index ca6621e9..beb2a8a2 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -277,7 +277,6 @@ class DatasetGroup(object):
"""
def __init__(self, dataset, name=None):
"""This sets self.group and self.group_id"""
- #logger.debug("DATASET NAME2:", dataset.name)
if name == None:
self.name, self.id, self.genetic_type = fetchone(dataset.query_for_group)
else:
@@ -500,7 +499,7 @@ class DataSet(object):
self.setup()
if self.type == "Temp": #Need to supply group name as input if temp trait
- self.group = DatasetGroup(self, group_name) # sets self.group and self.group_id and gets genotype
+ self.group = DatasetGroup(self, name=group_name) # sets self.group and self.group_id and gets genotype
else:
self.check_confidentiality()
self.retrieve_other_names()
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 79aa196f..0689e950 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -6,6 +6,9 @@ import codecs
from htmlgen import HTMLgen2 as HT
+import redis
+Redis = redis.StrictRedis()
+
from base import webqtlConfig
from base.webqtlCaseData import webqtlCaseData
from base.data_set import create_dataset
@@ -35,13 +38,15 @@ class GeneralTrait(object):
def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
# xor assertion
assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name";
+ self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
if kw.get('dataset_name'):
- self.dataset = create_dataset(kw.get('dataset_name'))
- #print(" in GeneralTrait created dataset:", self.dataset)
+ if kw.get('dataset_name') == "Temp":
+ temp_group = self.name.split("_")[2]
+ self.dataset = create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = temp_group)
+ else:
+ self.dataset = create_dataset(kw.get('dataset_name'))
else:
self.dataset = kw.get('dataset')
- self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
- #print("THE NAME IS:", self.name)
self.cellid = kw.get('cellid')
self.identification = kw.get('identification', 'un-named trait')
self.haveinfo = kw.get('haveinfo', False)
@@ -73,8 +78,8 @@ class GeneralTrait(object):
# So we could add a simple if statement to short-circuit this if necessary
if self.dataset.type != "Temp":
self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info)
- if get_sample_info != False:
- self = retrieve_sample_data(self, self.dataset)
+ if get_sample_info != False:
+ self = retrieve_sample_data(self, self.dataset)
def export_informative(self, include_variance=0):
"""
@@ -154,18 +159,27 @@ def retrieve_sample_data(trait, dataset, samplelist=None):
if samplelist == None:
samplelist = []
- results = dataset.retrieve_sample_data(trait.name)
+ if dataset.type == "Temp":
+ results = Redis.get(trait.name).split()
+ else:
+ results = dataset.retrieve_sample_data(trait.name)
# Todo: is this necessary? If not remove
trait.data.clear()
- all_samples_ordered = dataset.group.all_samples_ordered()
-
if results:
- for item in results:
- name, value, variance, num_cases, name2 = item
- if not samplelist or (samplelist and name in samplelist):
- trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
+ if dataset.type == "Temp":
+ all_samples_ordered = dataset.group.all_samples_ordered()
+ for i, item in enumerate(results):
+ try:
+ trait.data[all_samples_ordered[i]] = webqtlCaseData(all_samples_ordered[i], float(item))
+ except:
+ pass
+ else:
+ for item in results:
+ name, value, variance, num_cases, name2 = item
+ if not samplelist or (samplelist and name in samplelist):
+ trait.data[name] = webqtlCaseData(*item) #name, value, variance, num_cases)
return trait
@app.route("/trait/get_sample_data")
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 1c8dad10..6980af4e 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -45,7 +45,10 @@ def get_trait_db_obs(self, trait_db_list):
data = data.strip()
assert hmac==user_manager.actual_hmac_creation(data), "Data tampering?"
trait_name, dataset_name = data.split(":")
- dataset_ob = data_set.create_dataset(dataset_name)
+ if dataset_name == "Temp":
+ dataset_ob = data_set.create_dataset(dataset_name=dataset_name, dataset_type="Temp", group_name=trait_name.split("_")[2])
+ else:
+ dataset_ob = data_set.create_dataset(dataset_name)
trait_ob = GeneralTrait(dataset=dataset_ob,
name=trait_name,
cellid=None)
diff --git a/wqflask/wqflask/collect.py b/wqflask/wqflask/collect.py
index eb0e2726..6e1ac592 100644
--- a/wqflask/wqflask/collect.py
+++ b/wqflask/wqflask/collect.py
@@ -233,7 +233,10 @@ def collections_new():
collection_id = params['existing_collection'].split(":")[0]
collection_name = params['existing_collection'].split(":")[1]
if g.user_session.logged_in:
- unprocessed_traits = Redis.get(params['hash'])
+ if "hash" in params:
+ unprocessed_traits = Redis.get(params['hash'])
+ else:
+ unprocessed_traits = params['traits']
traits = list(process_traits(unprocessed_traits))
g.user_session.add_traits_to_collection(collection_id, traits)
return redirect(url_for('view_collection', uc_id=collection_id))
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index c15c3579..0c6b8a2b 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -93,7 +93,7 @@ class CorrelationResults(object):
with Bench("Doing correlations"):
if start_vars['dataset'] == "Temp":
self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
- self.trait_id = "Temp"
+ self.trait_id = start_vars['trait_id']
self.this_trait = GeneralTrait(dataset=self.dataset,
name=self.trait_id,
cellid=None)
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index 4bb4d65d..007e8e47 100644
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -26,6 +26,7 @@ import sys
import string
import cPickle
import os
+import datetime
import time
import pp
import math
@@ -45,6 +46,9 @@ from pprint import pformat as pf
from htmlgen import HTMLgen2 as HT
import reaper
+import redis
+Redis = redis.StrictRedis()
+
from utility.THCell import THCell
from utility.TDCell import TDCell
from base.trait import GeneralTrait
@@ -59,8 +63,10 @@ from MySQLdb import escape_string as escape
from pprint import pformat as pf
-from flask import Flask, g
+from flask import Flask, g, url_for
+import utility.logger
+logger = utility.logger.getLogger(__name__ )
class CorrelationMatrix(object):
@@ -111,6 +117,7 @@ class CorrelationMatrix(object):
self.corr_results = []
self.pca_corr_results = []
self.trait_data_array = []
+ self.shared_samples_list = self.all_sample_list
for trait_db in self.trait_list:
this_trait = trait_db[0]
this_db = trait_db[1]
@@ -138,6 +145,8 @@ class CorrelationMatrix(object):
target_vals = []
for index, sample in enumerate(target_samples):
if (sample in this_sample_data) and (sample in target_sample_data):
+ if sample not in self.shared_samples_list:
+ self.shared_samples_list.remove(sample)
sample_value = this_sample_data[sample].value
target_sample_value = target_sample_data[sample].value
this_trait_vals.append(sample_value)
@@ -174,6 +183,7 @@ class CorrelationMatrix(object):
try:
self.pca_works = "True"
+ self.pca_trait_ids = []
pca = self.calculate_pca(range(len(self.traits)), corr_eigen_value, corr_eigen_vectors)
self.loadings_array = self.process_loadings()
except:
@@ -201,9 +211,6 @@ class CorrelationMatrix(object):
cellid=None)
self.trait_list.append((trait_ob, dataset_ob))
- #print("trait_list:", self.trait_list)
-
-
def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors):
base = importr('base')
stats = importr('stats')
@@ -224,9 +231,27 @@ class CorrelationMatrix(object):
pca_traits = []
for i, vector in enumerate(trait_array_vectors):
if corr_eigen_value[i-1] < 100.0/len(self.trait_list):
- pca_traits.append(vector*-1.0)
+ pca_traits.append((vector*-1.0).tolist())
+
+ this_group_name = self.trait_list[0][1].group.name
+ temp_dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = this_group_name)
+ temp_dataset.group.get_samplelist()
+ for i, pca_trait in enumerate(pca_traits):
+ trait_id = "PCA" + str(i+1) + "_" + temp_dataset.group.species + "_" + this_group_name + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
+ this_vals_string = ""
+ position = 0
+ for sample in temp_dataset.group.all_samples_ordered():
+ if sample in self.shared_samples_list:
+ this_vals_string += str(pca_trait[position])
+ this_vals_string += " "
+ position += 1
+ else:
+ this_vals_string += "x "
+ this_vals_string = this_vals_string[:-1]
+
+ Redis.set(trait_id, this_vals_string)
+ self.pca_trait_ids.append(trait_id)
- print("pca_traits:", pca_traits)
return pca
def process_loadings(self):
diff --git a/wqflask/wqflask/marker_regression/display_mapping_results.py b/wqflask/wqflask/marker_regression/display_mapping_results.py
index 41d5c9d0..39e0d712 100644
--- a/wqflask/wqflask/marker_regression/display_mapping_results.py
+++ b/wqflask/wqflask/marker_regression/display_mapping_results.py
@@ -932,10 +932,13 @@ class DisplayMappingResults(object):
if self.this_trait.symbol:
identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.symbol)
- elif self.this_trait.post_publication_abbreviation:
- identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation)
- elif self.this_trait.pre_publication_abbreviation:
- identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation)
+ elif self.dataset.type == "Publish":
+ if self.this_trait.post_publication_abbreviation:
+ identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.post_publication_abbreviation)
+ elif self.this_trait.pre_publication_abbreviation:
+ identification += "Trait: %s - %s" % (self.this_trait.name, self.this_trait.pre_publication_abbreviation)
+ else:
+ identification += "Trait: %s" % (self.this_trait.name)
else:
identification += "Trait: %s" % (self.this_trait.name)
identification += " with %s samples" % (self.n_samples)
diff --git a/wqflask/wqflask/templates/correlation_matrix.html b/wqflask/wqflask/templates/correlation_matrix.html
index 8698b710..9c790780 100644
--- a/wqflask/wqflask/templates/correlation_matrix.html
+++ b/wqflask/wqflask/templates/correlation_matrix.html
@@ -60,6 +60,13 @@
{% if pca_works == "True" %}
+