From be9c4a39500d7978b4cae7536a5f96c3818d211e Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 24 Mar 2021 09:41:47 +0300
Subject: initial commit for gn3-correlation api integration
---
.../wqflask/correlation/test_correlation_gn3.py | 14 ++++
wqflask/wqflask/correlation/correlation_gn3_api.py | 77 ++++++++++++++++++++++
2 files changed, 91 insertions(+)
create mode 100644 wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py
create mode 100644 wqflask/wqflask/correlation/correlation_gn3_api.py
diff --git a/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py
new file mode 100644
index 00000000..e1bd6d86
--- /dev/null
+++ b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py
@@ -0,0 +1,14 @@
+"""this module contains tests for code used in integrating to gn3 api"""
+from unittest import TestCase
+from base.data_set import create_dataset
+
+class TestCorrelation(TestCase):
+
+ def test_create_dataset(self):
+ """test for creating datasets"""
+
+ pass
+ def test_fetch_dataset_info(self):
+ """test for fetching dataset info data"""
+
+ pass
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
new file mode 100644
index 00000000..4cf6533c
--- /dev/null
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -0,0 +1,77 @@
+"""module that calls the gn3 api's to do the correlation """
+from base import data_set
+from base.trait import create_trait
+from base.trait import retrieve_sample_data
+
+
+
+
+
+
+
+def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"):
+ import requests
+ from wqflask.correlation.correlation_gn3_api import compute_correlation
+
+ cor_results = compute_correlation(start_vars)
+
+ data = {
+ "target_dataset": target_dataset,
+ "target_samplelist": target_samplelist,
+ "trait_data": {
+ "trait_sample_data": trait_data,
+ "trait_id": "HC_Q"
+ }
+ }
+ requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}"
+
+ results = requests.post(requests_url, json=data)
+
+ data = results.json()
+
+ print(data)
+
+ return data
+
+
+def process_samples(start_vars,sample_names,excluded_samples=None):
+ sample_data = {}
+ if not excluded_samples:
+ excluded_samples = ()
+
+ sample_vals_dict = json.loads(start_vars["sample_vals"])
+
+ for sample in sample_names:
+ if sample not in excluded_samples:
+ val = sample_val_dict[sample]
+ if not val.strip().lower() == "x":
+ sample_data[str(sample)]=float(value)
+
+ return sample_data
+
+
+def create_fetch_dataset_data(dataset_name):
+ this_dataset = data_set.create_dataset(dataset_name=dataset_name)
+
+ this_dataset.get_trait_data()
+
+
+def create_target_this_trait(start_vars):
+ """this function prefetch required data for correlation"""
+
+ this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
+ target_dataset = data_set.create_dataset(
+ dataset_name=start_vars['corr_dataset'])
+
+ this_trait = create_trait(dataset=this_dataset,
+ name=start_vars['trait_id'])
+
+ this_trait = retrieve_sample_data(this_trait, this_dataset)
+
+ target_dataset.get_trait_data()
+
+ return (this_dataset,this_trait,target_dataset)
+def compute_correlation(start_vars):
+
+ this_dataset, this_trait, target_dataset = create_target_this_trait(
+ start_vars=start_vars)
--
cgit v1.2.3
From d913848572dd284ae7656e72dad199e99907871a Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 24 Mar 2021 12:59:49 +0300
Subject: initial commit for integrating to gn3 api
---
wqflask/wqflask/correlation/show_corr_results.py | 428 +++++++++++++----------
1 file changed, 242 insertions(+), 186 deletions(-)
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index fb4dc4f4..a817a4a4 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -1,4 +1,4 @@
-## Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
@@ -58,6 +58,31 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK]
TISSUE_MOUSE_DB = 1
+def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"):
+ import requests
+ from wqflask.correlation.correlation_gn3_api import compute_correlation
+
+ # cor_results = compute_correlation(start_vars)
+
+ data = {
+ "target_dataset": target_dataset,
+ "target_samplelist": target_samplelist,
+ "trait_data": {
+ "trait_sample_data": trait_data,
+ "trait_id": "HC_Q"
+ }
+ }
+ requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}"
+
+ results = requests.post(requests_url, json=data)
+
+ data = results.json()
+
+ print(data)
+
+ return data
+
+
class CorrelationResults(object):
def __init__(self, start_vars):
# get trait list from db (database name)
@@ -78,11 +103,12 @@ class CorrelationResults(object):
with Bench("Doing correlations"):
if start_vars['dataset'] == "Temp":
- self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
+ self.dataset = data_set.create_dataset(
+ dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group'])
self.trait_id = start_vars['trait_id']
self.this_trait = create_trait(dataset=self.dataset,
- name=self.trait_id,
- cellid=None)
+ name=self.trait_id,
+ cellid=None)
else:
helper_functions.get_species_dataset_trait(self, start_vars)
@@ -97,7 +123,7 @@ class CorrelationResults(object):
if ('loc_chr' in start_vars and
'min_loc_mb' in start_vars and
- 'max_loc_mb' in start_vars):
+ 'max_loc_mb' in start_vars):
self.location_type = get_string(start_vars, 'location_type')
self.location_chr = get_string(start_vars, 'loc_chr')
@@ -109,8 +135,8 @@ class CorrelationResults(object):
self.get_formatted_corr_type()
self.return_number = int(start_vars['corr_return_results'])
- #The two if statements below append samples to the sample list based upon whether the user
- #rselected Primary Samples Only, Other Samples Only, or All Samples
+ # The two if statements below append samples to the sample list based upon whether the user
+ # rselected Primary Samples Only, Other Samples Only, or All Samples
primary_samples = self.dataset.group.samplelist
if self.dataset.group.parlist != None:
@@ -118,23 +144,26 @@ class CorrelationResults(object):
if self.dataset.group.f1list != None:
primary_samples += self.dataset.group.f1list
- #If either BXD/whatever Only or All Samples, append all of that group's samplelist
+ # If either BXD/whatever Only or All Samples, append all of that group's samplelist
if corr_samples_group != 'samples_other':
self.process_samples(start_vars, primary_samples)
- #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
- #exclude the primary samples (because they would have been added in the previous
- #if statement if the user selected All Samples)
+ # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
+ # exclude the primary samples (because they would have been added in the previous
+ # if statement if the user selected All Samples)
if corr_samples_group != 'samples_primary':
if corr_samples_group == 'samples_other':
primary_samples = [x for x in primary_samples if x not in (
- self.dataset.group.parlist + self.dataset.group.f1list)]
- self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples)
+ self.dataset.group.parlist + self.dataset.group.f1list)]
+ self.process_samples(start_vars, list(
+ self.this_trait.data.keys()), primary_samples)
- self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
+ self.target_dataset = data_set.create_dataset(
+ start_vars['corr_dataset'])
self.target_dataset.get_trait_data(list(self.sample_data.keys()))
- self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method)
+ self.header_fields = get_header_fields(
+ self.target_dataset.type, self.corr_method)
if self.target_dataset.type == "ProbeSet":
self.filter_cols = [7, 6]
@@ -153,7 +182,8 @@ class CorrelationResults(object):
tissue_corr_data = self.do_tissue_correlation_for_all_traits()
if tissue_corr_data != None:
for trait in list(tissue_corr_data.keys())[:self.return_number]:
- self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
+ self.get_sample_r_and_p_values(
+ trait, self.target_dataset.trait_data[trait])
else:
for trait, values in list(self.target_dataset.trait_data.items()):
self.get_sample_r_and_p_values(trait, values)
@@ -163,80 +193,85 @@ class CorrelationResults(object):
lit_corr_data = self.do_lit_correlation_for_all_traits()
for trait in list(lit_corr_data.keys())[:self.return_number]:
- self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
+ self.get_sample_r_and_p_values(
+ trait, self.target_dataset.trait_data[trait])
elif self.corr_type == "sample":
- for trait, values in list(self.target_dataset.trait_data.items()):
- self.get_sample_r_and_p_values(trait, values)
-
- self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
- key=lambda t: -abs(t[1][0])))
-
-
- #ZS: Convert min/max chromosome to an int for the location range option
- range_chr_as_int = None
- for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
- if 'loc_chr' in start_vars:
- if chr_info.name == self.location_chr:
- range_chr_as_int = order_id
-
- for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
- trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
- if not trait_object:
- continue
-
- chr_as_int = 0
- for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
- if self.location_type == "highest_lod":
- if chr_info.name == trait_object.locus_chr:
- chr_as_int = order_id
- else:
- if chr_info.name == trait_object.chr:
- chr_as_int = order_id
-
- if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
- float(self.correlation_data[trait][0]) <= self.p_range_upper):
-
- if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
- if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
- continue
-
- if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
- continue
- if self.location_type == "highest_lod":
- if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
- continue
- if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
- continue
- else:
- if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
- continue
- if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
- continue
-
- (trait_object.sample_r,
- trait_object.sample_p,
- trait_object.num_overlap) = self.correlation_data[trait]
-
- # Set some sane defaults
- trait_object.tissue_corr = 0
- trait_object.tissue_pvalue = 0
- trait_object.lit_corr = 0
- if self.corr_type == "tissue" and tissue_corr_data != None:
- trait_object.tissue_corr = tissue_corr_data[trait][1]
- trait_object.tissue_pvalue = tissue_corr_data[trait][2]
- elif self.corr_type == "lit":
- trait_object.lit_corr = lit_corr_data[trait][1]
-
- self.correlation_results.append(trait_object)
-
- if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
- self.do_lit_correlation_for_trait_list()
-
- if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
- self.do_tissue_correlation_for_trait_list()
-
- self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
+
+ compute_sample_r(start_vars,
+ self.target_dataset.trait_data, self.sample_data, self.target_dataset.samplelist)
+ # for trait, values in list(self.target_dataset.trait_data.items()):
+ # self.get_sample_r_and_p_values(trait, values)
+
+ # self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
+ # key=lambda t: -abs(t[1][0])))
+
+ # # ZS: Convert min/max chromosome to an int for the location range option
+ # range_chr_as_int = None
+ # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
+ # if 'loc_chr' in start_vars:
+ # if chr_info.name == self.location_chr:
+ # range_chr_as_int = order_id
+
+ # for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
+ # trait_object = create_trait(
+ # dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
+ # if not trait_object:
+ # continue
+
+ # chr_as_int = 0
+ # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
+ # if self.location_type == "highest_lod":
+ # if chr_info.name == trait_object.locus_chr:
+ # chr_as_int = order_id
+ # else:
+ # if chr_info.name == trait_object.chr:
+ # chr_as_int = order_id
+
+ # if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
+ # float(self.correlation_data[trait][0]) <= self.p_range_upper):
+
+ # if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
+ # if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
+ # continue
+
+ # if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
+ # continue
+ # if self.location_type == "highest_lod":
+ # if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
+ # continue
+ # if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
+ # continue
+ # else:
+ # if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
+ # continue
+ # if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
+ # continue
+
+ # (trait_object.sample_r,
+ # trait_object.sample_p,
+ # trait_object.num_overlap) = self.correlation_data[trait]
+
+ # # Set some sane defaults
+ # trait_object.tissue_corr = 0
+ # trait_object.tissue_pvalue = 0
+ # trait_object.lit_corr = 0
+ # if self.corr_type == "tissue" and tissue_corr_data != None:
+ # trait_object.tissue_corr = tissue_corr_data[trait][1]
+ # trait_object.tissue_pvalue = tissue_corr_data[trait][2]
+ # elif self.corr_type == "lit":
+ # trait_object.lit_corr = lit_corr_data[trait][1]
+
+ # self.correlation_results.append(trait_object)
+
+ # if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
+ # self.do_lit_correlation_for_trait_list()
+
+ # if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
+ # self.do_tissue_correlation_for_trait_list()
+
+ # self.json_results = generate_corr_json(
+ # self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
############################################################################################################################################
@@ -259,39 +294,43 @@ class CorrelationResults(object):
def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1):
"""Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each"""
- #Gets tissue expression values for the primary trait
+ # Gets tissue expression values for the primary trait
primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list = [self.this_trait.symbol])
+ symbol_list=[self.this_trait.symbol])
if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
- gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol]
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower(
+ )]
+ gene_symbol_list = [
+ trait.symbol for trait in self.correlation_results if trait.symbol]
- corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=gene_symbol_list)
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=gene_symbol_list)
for trait in self.correlation_results:
if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()]
+ this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower(
+ )]
result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
- this_trait_tissue_values,
- self.corr_method)
+ this_trait_tissue_values,
+ self.corr_method)
trait.tissue_corr = result[0]
trait.tissue_pvalue = result[2]
def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1):
- #Gets tissue expression values for the primary trait
+ # Gets tissue expression values for the primary trait
primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list = [self.this_trait.symbol])
+ symbol_list=[self.this_trait.symbol])
if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower(
+ )]
#print("trait_gene_symbols: ", pf(trait_gene_symbols.values()))
- corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=list(self.trait_symbol_dict.values()))
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=list(self.trait_symbol_dict.values()))
#print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict))
@@ -300,27 +339,30 @@ class CorrelationResults(object):
tissue_corr_data = {}
for trait, symbol in list(self.trait_symbol_dict.items()):
if symbol and symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()]
+ this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
+ )]
result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
- this_trait_tissue_values,
- self.corr_method)
+ this_trait_tissue_values,
+ self.corr_method)
tissue_corr_data[trait] = [symbol, result[0], result[2]]
tissue_corr_data = collections.OrderedDict(sorted(list(tissue_corr_data.items()),
- key=lambda t: -abs(t[1][1])))
+ key=lambda t: -abs(t[1][1])))
return tissue_corr_data
def do_lit_correlation_for_trait_list(self):
- input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)
+ input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(
+ self.dataset.group.species.lower(), self.this_trait.geneid)
for trait in self.correlation_results:
if trait.geneid:
- trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid)
+ trait.mouse_gene_id = self.convert_to_mouse_gene_id(
+ self.dataset.group.species.lower(), trait.geneid)
else:
trait.mouse_gene_id = None
@@ -348,13 +390,14 @@ class CorrelationResults(object):
else:
trait.lit_corr = 0
-
def do_lit_correlation_for_all_traits(self):
- input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)
+ input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(
+ self.dataset.group.species.lower(), self.this_trait.geneid)
lit_corr_data = {}
for trait, gene_id in list(self.trait_geneid_dict.items()):
- mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id)
+ mouse_gene_id = self.convert_to_mouse_gene_id(
+ self.dataset.group.species.lower(), gene_id)
if mouse_gene_id and str(mouse_gene_id).find(";") == -1:
#print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id)
@@ -382,7 +425,7 @@ class CorrelationResults(object):
lit_corr_data[trait] = [gene_id, 0]
lit_corr_data = collections.OrderedDict(sorted(list(lit_corr_data.items()),
- key=lambda t: -abs(t[1][1])))
+ key=lambda t: -abs(t[1][1])))
return lit_corr_data
@@ -422,6 +465,7 @@ class CorrelationResults(object):
return mouse_gene_id
+
def get_sample_r_and_p_values(self, trait, target_samples):
"""Calculates the sample r (or rho) and p-value
@@ -431,6 +475,9 @@ class CorrelationResults(object):
"""
+ print("below here>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
+ print(self.target_dataset.trait_data)
+
self.this_trait_vals = []
target_vals = []
for index, sample in enumerate(self.target_dataset.samplelist):
@@ -440,21 +487,26 @@ class CorrelationResults(object):
self.this_trait_vals.append(sample_value)
target_vals.append(target_sample_value)
- self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals)
+ self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
+ self.this_trait_vals, target_vals)
if num_overlap > 5:
- #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
+ # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
if self.corr_method == 'bicor':
- sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals)
+ sample_r, sample_p = do_bicor(
+ self.this_trait_vals, target_vals)
elif self.corr_method == 'pearson':
- sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals)
+ sample_r, sample_p = scipy.stats.pearsonr(
+ self.this_trait_vals, target_vals)
else:
- sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals)
+ sample_r, sample_p = scipy.stats.spearmanr(
+ self.this_trait_vals, target_vals)
if numpy.isnan(sample_r):
pass
else:
- self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
+ self.correlation_data[trait] = [
+ sample_r, sample_p, num_overlap]
def process_samples(self, start_vars, sample_names, excluded_samples=None):
if not excluded_samples:
@@ -475,16 +527,18 @@ def do_bicor(this_trait_vals, target_trait_vals):
r_library("WGCNA")
r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function
- r_options(stringsAsFactors = False)
+ r_options(stringsAsFactors=False)
this_vals = ro.Vector(this_trait_vals)
target_vals = ro.Vector(target_trait_vals)
- the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x = this_vals, y = target_vals)]
+ the_r, the_p, _fisher_transform, _the_t, _n_obs = [
+ numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)]
return the_r, the_p
-def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api = False):
+
+def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False):
results_list = []
for i, trait in enumerate(corr_results):
if trait.view == False:
@@ -493,7 +547,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap
results_dict['index'] = i + 1
results_dict['trait_id'] = trait.name
results_dict['dataset'] = trait.dataset.name
- results_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait.name, trait.dataset.name))
+ results_dict['hmac'] = hmac.data_hmac(
+ '{}:{}'.format(trait.name, trait.dataset.name))
if target_dataset.type == "ProbeSet":
results_dict['symbol'] = trait.symbol
results_dict['description'] = "N/A"
@@ -544,7 +599,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap
if bool(trait.authors):
authors_list = trait.authors.split(',')
if len(authors_list) > 6:
- results_dict['authors_display'] = ", ".join(authors_list[:6]) + ", et al."
+ results_dict['authors_display'] = ", ".join(
+ authors_list[:6]) + ", et al."
else:
results_dict['authors_display'] = trait.authors
if bool(trait.pubmed_id):
@@ -574,85 +630,85 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap
return json.dumps(results_list)
+
def get_header_fields(data_type, corr_method):
if data_type == "ProbeSet":
if corr_method == "spearman":
header_fields = ['Index',
- 'Record',
- 'Symbol',
- 'Description',
- 'Location',
- 'Mean',
- 'Sample rho',
- 'N',
- 'Sample p(rho)',
- 'Lit rho',
- 'Tissue rho',
- 'Tissue p(rho)',
- 'Max LRS',
- 'Max LRS Location',
- 'Additive Effect']
+ 'Record',
+ 'Symbol',
+ 'Description',
+ 'Location',
+ 'Mean',
+ 'Sample rho',
+ 'N',
+ 'Sample p(rho)',
+ 'Lit rho',
+ 'Tissue rho',
+ 'Tissue p(rho)',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
else:
header_fields = ['Index',
- 'Record',
- 'Symbol',
- 'Description',
- 'Location',
- 'Mean',
- 'Sample r',
- 'N',
- 'Sample p(r)',
- 'Lit r',
- 'Tissue r',
- 'Tissue p(r)',
- 'Max LRS',
- 'Max LRS Location',
- 'Additive Effect']
+ 'Record',
+ 'Symbol',
+ 'Description',
+ 'Location',
+ 'Mean',
+ 'Sample r',
+ 'N',
+ 'Sample p(r)',
+ 'Lit r',
+ 'Tissue r',
+ 'Tissue p(r)',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
elif data_type == "Publish":
if corr_method == "spearman":
header_fields = ['Index',
- 'Record',
- 'Abbreviation',
- 'Description',
- 'Mean',
- 'Authors',
- 'Year',
- 'Sample rho',
- 'N',
- 'Sample p(rho)',
- 'Max LRS',
- 'Max LRS Location',
- 'Additive Effect']
+ 'Record',
+ 'Abbreviation',
+ 'Description',
+ 'Mean',
+ 'Authors',
+ 'Year',
+ 'Sample rho',
+ 'N',
+ 'Sample p(rho)',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
else:
header_fields = ['Index',
- 'Record',
- 'Abbreviation',
- 'Description',
- 'Mean',
- 'Authors',
- 'Year',
- 'Sample r',
- 'N',
- 'Sample p(r)',
- 'Max LRS',
- 'Max LRS Location',
- 'Additive Effect']
+ 'Record',
+ 'Abbreviation',
+ 'Description',
+ 'Mean',
+ 'Authors',
+ 'Year',
+ 'Sample r',
+ 'N',
+ 'Sample p(r)',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
else:
if corr_method == "spearman":
header_fields = ['Index',
- 'ID',
- 'Location',
- 'Sample rho',
- 'N',
- 'Sample p(rho)']
+ 'ID',
+ 'Location',
+ 'Sample rho',
+ 'N',
+ 'Sample p(rho)']
else:
header_fields = ['Index',
- 'ID',
- 'Location',
- 'Sample r',
- 'N',
- 'Sample p(r)']
+ 'ID',
+ 'Location',
+ 'Sample r',
+ 'N',
+ 'Sample p(r)']
return header_fields
-
--
cgit v1.2.3
From e5d2ce8f29e43900977b967ec8cac715f544a2f0 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 02:25:45 +0300
Subject: add code for calling gn3 correlation endpoint
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 131 ++++++++++++----
wqflask/wqflask/correlation/show_corr_results.py | 174 +++++++++------------
wqflask/wqflask/views.py | 5 +-
3 files changed, 177 insertions(+), 133 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 4cf6533c..7e269e41 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,20 +1,17 @@
"""module that calls the gn3 api's to do the correlation """
-from base import data_set
-from base.trait import create_trait
-from base.trait import retrieve_sample_data
+import json
+import requests
+from wqflask.wqflask.correlation import correlation_functions
+from wqflask.base import data_set
+from wqflask.base.trait import create_trait
+from wqflask.base.trait import retrieve_sample_data
+GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation"
-
-
-
-def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"):
- import requests
- from wqflask.correlation.correlation_gn3_api import compute_correlation
-
- cor_results = compute_correlation(start_vars)
-
+def compute_sample(target_dataset, trait_data, target_samplelist, method="pearson"):
+ """integration for integrating sample_r api correlation"""
data = {
"target_dataset": target_dataset,
"target_samplelist": target_samplelist,
@@ -29,33 +26,60 @@ def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, m
data = results.json()
- print(data)
-
return data
-def process_samples(start_vars,sample_names,excluded_samples=None):
+def get_tissue_correlation_input(this_trait, trait_symbol_dict):
+ """Gets tissue expression values for the primary trait and target tissues values"""
+ primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=[this_trait.symbol])
+
+ if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
+ )]
+
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=list(trait_symbol_dict.values()))
+
+ target_tissue_data = []
+ for trait, symbol in list(trait_symbol_dict.items()):
+ if symbol and symbol.lower() in corr_result_tissue_vals_dict:
+ this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
+ )]
+
+ this_trait_data = {"trait_id": trait,
+ "tissue_values": this_trait_tissue_values}
+
+ target_tissue_data.append(this_trait_data)
+
+ primary_tissue_data = {
+ "this_id": "TT",
+ "tissue_values": primary_trait_tissue_values
+
+ }
+
+ return (primary_tissue_data, target_tissue_data)
+
+ return None
+
+
+def process_samples(start_vars, sample_names, excluded_samples=None):
+ """process samples method"""
sample_data = {}
if not excluded_samples:
excluded_samples = ()
- sample_vals_dict = json.loads(start_vars["sample_vals"])
+ sample_vals_dict = json.loads(start_vars["sample_vals"])
for sample in sample_names:
if sample not in excluded_samples:
- val = sample_val_dict[sample]
+ val = sample_vals_dict[sample]
if not val.strip().lower() == "x":
- sample_data[str(sample)]=float(value)
+ sample_data[str(sample)] = float(val)
return sample_data
-def create_fetch_dataset_data(dataset_name):
- this_dataset = data_set.create_dataset(dataset_name=dataset_name)
-
- this_dataset.get_trait_data()
-
-
def create_target_this_trait(start_vars):
"""this function prefetch required data for correlation"""
@@ -66,12 +90,61 @@ def create_target_this_trait(start_vars):
this_trait = create_trait(dataset=this_dataset,
name=start_vars['trait_id'])
+ sample_data = process_samples(start_vars, this_dataset.group.samplelist)
+ # target_dataset.get_trait_data(list(self.sample_data.keys()))
+
this_trait = retrieve_sample_data(this_trait, this_dataset)
- target_dataset.get_trait_data()
+ target_dataset.get_trait_data(list(sample_data.keys()))
+
+ return (this_dataset, this_trait, target_dataset, sample_data)
+
+
+def compute_correlation(start_vars, method="pearson"):
+ """compute correlation for to call gn3 api"""
+
+ corr_type = start_vars['corr_type']
+
+ (this_dataset, this_trait, target_dataset,
+ sample_data) = create_target_this_trait(start_vars)
+
+ # cor_results = compute_correlation(start_vars)
+
+ method = start_vars['corr_sample_method']
+
+ corr_input_data = {}
- return (this_dataset,this_trait,target_dataset)
-def compute_correlation(start_vars):
+ if corr_type == "sample":
+ corr_input_data = {
+ "target_dataset": target_dataset.trait_data,
+ "target_samplelist": target_dataset.samplelist,
+ "trait_data": {
+ "trait_sample_data": sample_data,
+ "trait_id": start_vars["trait_id"]
+ }
+ }
+
+ requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
+
+ elif corr_type == "tissue":
+ trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
+ primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
+ this_trait, trait_symbol_dict)
+
+ corr_input_data = {
+ "primary_tissue": primary_tissue_data,
+ "target_tissues": target_tissue_data
+ }
- this_dataset, this_trait, target_dataset = create_target_this_trait(
- start_vars=start_vars)
+ requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+
+ else:
+ pass
+ # lit correlation/literature
+ # can fetch values in gn3 not set up in gn3
+
+ corr_results = requests.post(requests_url, json=corr_input_data)
+
+ data = corr_results.json()
+
+ return data
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index a817a4a4..50b3ba26 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -57,32 +57,6 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK]
TISSUE_MOUSE_DB = 1
-
-def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"):
- import requests
- from wqflask.correlation.correlation_gn3_api import compute_correlation
-
- # cor_results = compute_correlation(start_vars)
-
- data = {
- "target_dataset": target_dataset,
- "target_samplelist": target_samplelist,
- "trait_data": {
- "trait_sample_data": trait_data,
- "trait_id": "HC_Q"
- }
- }
- requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}"
-
- results = requests.post(requests_url, json=data)
-
- data = results.json()
-
- print(data)
-
- return data
-
-
class CorrelationResults(object):
def __init__(self, start_vars):
# get trait list from db (database name)
@@ -197,81 +171,78 @@ class CorrelationResults(object):
trait, self.target_dataset.trait_data[trait])
elif self.corr_type == "sample":
-
- compute_sample_r(start_vars,
- self.target_dataset.trait_data, self.sample_data, self.target_dataset.samplelist)
- # for trait, values in list(self.target_dataset.trait_data.items()):
- # self.get_sample_r_and_p_values(trait, values)
-
- # self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
- # key=lambda t: -abs(t[1][0])))
-
- # # ZS: Convert min/max chromosome to an int for the location range option
- # range_chr_as_int = None
- # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
- # if 'loc_chr' in start_vars:
- # if chr_info.name == self.location_chr:
- # range_chr_as_int = order_id
-
- # for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
- # trait_object = create_trait(
- # dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
- # if not trait_object:
- # continue
-
- # chr_as_int = 0
- # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
- # if self.location_type == "highest_lod":
- # if chr_info.name == trait_object.locus_chr:
- # chr_as_int = order_id
- # else:
- # if chr_info.name == trait_object.chr:
- # chr_as_int = order_id
-
- # if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
- # float(self.correlation_data[trait][0]) <= self.p_range_upper):
-
- # if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
- # if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
- # continue
-
- # if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
- # continue
- # if self.location_type == "highest_lod":
- # if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
- # continue
- # if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
- # continue
- # else:
- # if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
- # continue
- # if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
- # continue
-
- # (trait_object.sample_r,
- # trait_object.sample_p,
- # trait_object.num_overlap) = self.correlation_data[trait]
-
- # # Set some sane defaults
- # trait_object.tissue_corr = 0
- # trait_object.tissue_pvalue = 0
- # trait_object.lit_corr = 0
- # if self.corr_type == "tissue" and tissue_corr_data != None:
- # trait_object.tissue_corr = tissue_corr_data[trait][1]
- # trait_object.tissue_pvalue = tissue_corr_data[trait][2]
- # elif self.corr_type == "lit":
- # trait_object.lit_corr = lit_corr_data[trait][1]
-
- # self.correlation_results.append(trait_object)
-
- # if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
- # self.do_lit_correlation_for_trait_list()
-
- # if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
- # self.do_tissue_correlation_for_trait_list()
-
- # self.json_results = generate_corr_json(
- # self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
+ for trait, values in list(self.target_dataset.trait_data.items()):
+ self.get_sample_r_and_p_values(trait, values)
+
+ self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
+ key=lambda t: -abs(t[1][0])))
+
+ # ZS: Convert min/max chromosome to an int for the location range option
+ range_chr_as_int = None
+ for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
+ if 'loc_chr' in start_vars:
+ if chr_info.name == self.location_chr:
+ range_chr_as_int = order_id
+
+ for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
+ trait_object = create_trait(
+ dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
+ if not trait_object:
+ continue
+
+ chr_as_int = 0
+ for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
+ if self.location_type == "highest_lod":
+ if chr_info.name == trait_object.locus_chr:
+ chr_as_int = order_id
+ else:
+ if chr_info.name == trait_object.chr:
+ chr_as_int = order_id
+
+ if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
+ float(self.correlation_data[trait][0]) <= self.p_range_upper):
+
+ if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
+ if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
+ continue
+
+ if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
+ continue
+ if self.location_type == "highest_lod":
+ if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
+ continue
+ if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
+ continue
+ else:
+ if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
+ continue
+ if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
+ continue
+
+ (trait_object.sample_r,
+ trait_object.sample_p,
+ trait_object.num_overlap) = self.correlation_data[trait]
+
+ # Set some sane defaults
+ trait_object.tissue_corr = 0
+ trait_object.tissue_pvalue = 0
+ trait_object.lit_corr = 0
+ if self.corr_type == "tissue" and tissue_corr_data != None:
+ trait_object.tissue_corr = tissue_corr_data[trait][1]
+ trait_object.tissue_pvalue = tissue_corr_data[trait][2]
+ elif self.corr_type == "lit":
+ trait_object.lit_corr = lit_corr_data[trait][1]
+
+ self.correlation_results.append(trait_object)
+
+ if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
+ self.do_lit_correlation_for_trait_list()
+
+ if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
+ self.do_tissue_correlation_for_trait_list()
+
+ self.json_results = generate_corr_json(
+ self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
############################################################################################################################################
@@ -465,7 +436,6 @@ class CorrelationResults(object):
return mouse_gene_id
-
def get_sample_r_and_p_values(self, trait, target_samples):
"""Calculates the sample r (or rho) and p-value
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 2c0ba586..6ca9b23f 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -46,6 +46,7 @@ from wqflask.marker_regression import run_mapping
from wqflask.marker_regression import display_mapping_results
from wqflask.network_graph import network_graph
from wqflask.correlation import show_corr_results
+from wqflask.correlation.correlation_gn3_api import compute_correlation
from wqflask.correlation_matrix import show_corr_matrix
from wqflask.correlation import corr_scatter_plot
from wqflask.wgcna import wgcna_analysis
@@ -880,8 +881,8 @@ def network_graph_page():
def corr_compute_page():
logger.info("In corr_compute, request.form is:", pf(request.form))
logger.info(request.url)
- template_vars = show_corr_results.CorrelationResults(request.form)
- return render_template("correlation_page.html", **template_vars.__dict__)
+ correlation_results = compute_correlation(request.form)
+ return render_template("demo_correlation_page.html",correlation_results=correlation_results)
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
--
cgit v1.2.3
From 7a1e84cafdf02a1bcef4ddeb653d072b80a8deba Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 02:27:38 +0300
Subject: add initial demo template page
---
.../wqflask/templates/demo_correlation_page.html | 36 ++++++++++++++++++++++
1 file changed, 36 insertions(+)
create mode 100644 wqflask/wqflask/templates/demo_correlation_page.html
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
new file mode 100644
index 00000000..4d310051
--- /dev/null
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -0,0 +1,36 @@
+{% extends "base.html" %}
+{% block title %}Demo Correlation Results{% endblock %}
+{% block css %}
+
+
+
+
+
+
+{% endblock %}
+{% block content %}
+
+ {{correlation_results}}
+
+
+
+{% endblock %}
+
+{% block js %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+{% endblock %}
--
cgit v1.2.3
From cf42f769ec4db2efaebca64c63454935cc28b2a3 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 03:55:16 +0300
Subject: modify gn3 integration code
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 89 +++++++++-------------
.../wqflask/templates/demo_correlation_page.html | 22 +-----
2 files changed, 38 insertions(+), 73 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 7e269e41..7e865bf3 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -10,59 +10,6 @@ from wqflask.base.trait import retrieve_sample_data
GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation"
-def compute_sample(target_dataset, trait_data, target_samplelist, method="pearson"):
- """integration for integrating sample_r api correlation"""
- data = {
- "target_dataset": target_dataset,
- "target_samplelist": target_samplelist,
- "trait_data": {
- "trait_sample_data": trait_data,
- "trait_id": "HC_Q"
- }
- }
- requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}"
-
- results = requests.post(requests_url, json=data)
-
- data = results.json()
-
- return data
-
-
-def get_tissue_correlation_input(this_trait, trait_symbol_dict):
- """Gets tissue expression values for the primary trait and target tissues values"""
- primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=[this_trait.symbol])
-
- if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
- )]
-
- corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=list(trait_symbol_dict.values()))
-
- target_tissue_data = []
- for trait, symbol in list(trait_symbol_dict.items()):
- if symbol and symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
- )]
-
- this_trait_data = {"trait_id": trait,
- "tissue_values": this_trait_tissue_values}
-
- target_tissue_data.append(this_trait_data)
-
- primary_tissue_data = {
- "this_id": "TT",
- "tissue_values": primary_trait_tissue_values
-
- }
-
- return (primary_tissue_data, target_tissue_data)
-
- return None
-
-
def process_samples(start_vars, sample_names, excluded_samples=None):
"""process samples method"""
sample_data = {}
@@ -81,7 +28,7 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
- """this function prefetch required data for correlation"""
+ """this function creates the required trait and target dataset for correlation"""
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
@@ -148,3 +95,37 @@ def compute_correlation(start_vars, method="pearson"):
data = corr_results.json()
return data
+
+
+def get_tissue_correlation_input(this_trait, trait_symbol_dict):
+ """Gets tissue expression values for the primary trait and target tissues values"""
+ primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=[this_trait.symbol])
+
+ if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
+ )]
+
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=list(trait_symbol_dict.values()))
+
+ target_tissue_data = []
+ for trait, symbol in list(trait_symbol_dict.items()):
+ if symbol and symbol.lower() in corr_result_tissue_vals_dict:
+ this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
+ )]
+
+ this_trait_data = {"trait_id": trait,
+ "tissue_values": this_trait_tissue_values}
+
+ target_tissue_data.append(this_trait_data)
+
+ primary_tissue_data = {
+ "this_id": "TT",
+ "tissue_values": primary_trait_tissue_values
+
+ }
+
+ return (primary_tissue_data, target_tissue_data)
+
+ return None
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index 4d310051..ddcdf38d 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -10,27 +10,11 @@
{% endblock %}
{% block content %}
- {{correlation_results}}
+
-
-
-{% endblock %}
-{% block js %}
-
-
-
-
-
-
-
-
-
-
-
-
-
+
{% endblock %}
--
cgit v1.2.3
From 7de35627a6dc3fa48a039c932be005ffe6c175c4 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 04:02:49 +0300
Subject: fix import error
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 7e865bf3..479bb0d8 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,11 +1,11 @@
"""module that calls the gn3 api's to do the correlation """
import json
import requests
-from wqflask.wqflask.correlation import correlation_functions
+from wqflask.correlation import correlation_functions
-from wqflask.base import data_set
-from wqflask.base.trait import create_trait
-from wqflask.base.trait import retrieve_sample_data
+from base import data_set
+from base.trait import create_trait
+from base.trait import retrieve_sample_data
GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation"
--
cgit v1.2.3
From da72efa86846179d8d2aa64cd7b06a894469dc85 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 10:14:31 +0300
Subject: minor fix
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 479bb0d8..f1137c0e 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -88,7 +88,7 @@ def compute_correlation(start_vars, method="pearson"):
else:
pass
# lit correlation/literature
- # can fetch values in gn3 not set up in gn3
+ # to fetch values from the database
corr_results = requests.post(requests_url, json=corr_input_data)
--
cgit v1.2.3
From 08ddec9dcbaa1730d0b65b643aa5c99d1077d4d5 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sun, 28 Mar 2021 21:13:52 +0300
Subject: refactor correlation integration code
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 42 ++++++++++++----------
.../wqflask/templates/demo_correlation_page.html | 6 ++--
2 files changed, 27 insertions(+), 21 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index f1137c0e..1cd1b332 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -63,6 +63,7 @@ def compute_correlation(start_vars, method="pearson"):
if corr_type == "sample":
corr_input_data = {
+ "target": target_dataset,
"target_dataset": target_dataset.trait_data,
"target_samplelist": target_dataset.samplelist,
"trait_data": {
@@ -80,16 +81,17 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {
"primary_tissue": primary_tissue_data,
- "target_tissues": target_tissue_data
+ "target_tissues_dict": target_tissue_data
}
requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
- else:
- pass
- # lit correlation/literature
- # to fetch values from the database
+ elif corr_type == "lit":
+ (this_trait_geneid, geneid_dict, species) = do_lit_correlation(
+ this_trait, this_dataset, target_dataset)
+ requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
+ corr_input_data = geneid_dict
corr_results = requests.post(requests_url, json=corr_input_data)
data = corr_results.json()
@@ -97,6 +99,18 @@ def compute_correlation(start_vars, method="pearson"):
return data
+def do_lit_correlation(this_trait, this_dataset, target_dataset):
+ geneid_dict = this_dataset.retrieve_genes("GeneId")
+ species = this_dataset.group.species.lower()
+
+ this_trait_geneid = this_trait.geneid
+ this_trait_gene_data = {
+ this_trait.name: this_trait_geneid
+ }
+
+ return (this_trait_geneid, geneid_dict, species)
+
+
def get_tissue_correlation_input(this_trait, trait_symbol_dict):
"""Gets tissue expression values for the primary trait and target tissues values"""
primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
@@ -108,23 +122,15 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
-
- target_tissue_data = []
- for trait, symbol in list(trait_symbol_dict.items()):
- if symbol and symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
- )]
-
- this_trait_data = {"trait_id": trait,
- "tissue_values": this_trait_tissue_values}
-
- target_tissue_data.append(this_trait_data)
-
primary_tissue_data = {
- "this_id": "TT",
+ "this_id": this_trait.name,
"tissue_values": primary_trait_tissue_values
}
+ target_tissue_data = {
+ "trait_symbol_dict": trait_symbol_dict,
+ "symbol_tissue_vals_dict": corr_result_tissue_vals_dict
+ }
return (primary_tissue_data, target_tissue_data)
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index ddcdf38d..a8651067 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -10,11 +10,11 @@
{% endblock %}
{% block content %}
-
-
+ {{correlation_results}}
+
{% endblock %}
--
cgit v1.2.3
From fcb93bef5ab230b948f83e0e77a1ef54b017aca1 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 8 Apr 2021 23:59:13 +0300
Subject: minor fix
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 1cd1b332..c8d5347c 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -63,7 +63,6 @@ def compute_correlation(start_vars, method="pearson"):
if corr_type == "sample":
corr_input_data = {
- "target": target_dataset,
"target_dataset": target_dataset.trait_data,
"target_samplelist": target_dataset.samplelist,
"trait_data": {
--
cgit v1.2.3
From e7b589f05e1c13612ea2f7245d66cc3f054fa14b Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 9 Apr 2021 17:40:53 +0000
Subject: Added varaiable 'categorical_attr_exists' tracking whether there are
any case attributes with fewer than 10 distinct values, since it currently
throws a JS error if case attributes exist but none have fewer than 10
distinct values (specifically when we have RRID as a case attribute)
---
wqflask/wqflask/show_trait/show_trait.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 878c41c0..d3267190 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -203,6 +203,13 @@ class ShowTrait(object):
if sample.value < 0:
self.negative_vals_exist = "true"
+ #ZS: Check whether any attributes have few enough distinct values to show the "Block samples by group" option
+ self.categorical_attr_exists = False
+ for attribute in self.sample_groups[0].attributes:
+ if len(self.sample_groups[0].attributes[attribute].distinct_values) <= 10:
+ self.categorical_attr_exists = True
+ break
+
sample_column_width = max_samplename_width * 8
self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups, sample_column_width, self.has_num_cases)
@@ -277,6 +284,7 @@ class ShowTrait(object):
se_exists = self.sample_groups[0].se_exists,
has_num_cases = self.has_num_cases,
attributes = self.sample_groups[0].attributes,
+ categorical_attr_exists = self.categorical_attr_exists,
categorical_vars = ",".join(categorical_var_list),
num_values = self.num_values,
qnorm_values = self.qnorm_vals,
--
cgit v1.2.3
From 9d7da4653c8b0241af712043bb375e3f2bc52a3f Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 9 Apr 2021 17:43:30 +0000
Subject: Store categorical_attr_exists as a string instead of boolean since
apparently the boolean doesn't get passed to the template properly
---
wqflask/wqflask/show_trait/show_trait.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index d3267190..6892f02b 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -204,10 +204,10 @@ class ShowTrait(object):
self.negative_vals_exist = "true"
#ZS: Check whether any attributes have few enough distinct values to show the "Block samples by group" option
- self.categorical_attr_exists = False
+ self.categorical_attr_exists = "false"
for attribute in self.sample_groups[0].attributes:
if len(self.sample_groups[0].attributes[attribute].distinct_values) <= 10:
- self.categorical_attr_exists = True
+ self.categorical_attr_exists = "true"
break
sample_column_width = max_samplename_width * 8
--
cgit v1.2.3
From 9fa88673447ab13dcd1b899c0e6c2c5915dd0114 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 9 Apr 2021 17:44:03 +0000
Subject: Replaced the conditional for whether to show 'Block samples by group'
to instead check categorical_attr_exists
---
wqflask/wqflask/templates/show_trait_transform_and_filter.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/templates/show_trait_transform_and_filter.html b/wqflask/wqflask/templates/show_trait_transform_and_filter.html
index b70ca590..e3f5ef81 100644
--- a/wqflask/wqflask/templates/show_trait_transform_and_filter.html
+++ b/wqflask/wqflask/templates/show_trait_transform_and_filter.html
@@ -20,7 +20,7 @@
Please check that your input is formatted correctly, e.g. 3, 5-10, 12
- {% if sample_groups[0].attributes %}
+ {% if categorical_attr_exists == "true" %}
Block samples by group:
--
cgit v1.2.3
From 6919d7cf68555d9ea3a260d328815cd66e3be1d7 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Fri, 9 Apr 2021 17:44:48 +0000
Subject: Changed connditional in show_trait.js to check
categorical_attr_exists instead of just checking if there are case attributes
---
wqflask/wqflask/static/new/javascript/show_trait.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js
index 9d356570..6e9d68c4 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait.js
@@ -634,7 +634,7 @@ populate_sample_attributes_values_dropdown = function() {
return _results;
};
-if (Object.keys(js_data.attributes).length){
+if (js_data.categorical_attr_exists == "true"){
populate_sample_attributes_values_dropdown();
}
--
cgit v1.2.3
From 5787a6ec5a54e2c98aed38f4c68291cb65b7f609 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 13 Apr 2021 02:37:13 -0500
Subject: SQL notes
---
doc/database.org | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 156 insertions(+)
diff --git a/doc/database.org b/doc/database.org
index 5107b660..f3d4d92e 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -1368,3 +1368,159 @@ mysql> select * from SnpSource limit 5;
Empty set (0.00 sec)
Hmmm. This is the test database. Then there are the plink files and VCF files.
+
+* Optimize SQL?
+
+We were facing some issues with slow queries. A query
+was really slow on Penguin2:
+
+: time mysql -u webqtlout -pwebqtlout db_webqtl < ~/chunk.sql > /dev/null
+: real 0m13.082s
+: user 0m0.292s
+: sys 0m0.032s
+
+Runs in 1s on Tux01 and 13s on P2, why is that? The gist of it
+was increasing an InnoDB cache size(!)
+
+Interestingly, Penguin2 is running InnoDB on a much slower storage.
+It has more indices that Tux01(?!). Probably due to things we have
+been trying to make the datatables faster.
+
+Meanwhile the query is one with many joins:
+
+#+begin_src sql
+SELECT ProbeSet.Name,ProbeSetXRef.DataId, T4.value, T5.value, T6.value, T7.value, T8.value, T9.value, T10.value, T11.value, T12.value, T14.value, T15.value, T17.value, T18.value, T19.value, T20.value, T21.value, T22.value, T24.value, T25.value, T26.value, T28.value, T29.value, T30.value, T31.value, T35.value, T36.value, T37.value, T39.value, T98.value, T99.value, T100.value, T103.value, T487.value, T105.value, T106.value, T110.value FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+ left join ProbeSetData as T4 on T4.Id = ProbeSetXRef.DataId
+ and T4.StrainId=4
+ (...)
+ left join ProbeSetData as T110 on T110.Id = ProbeSetXRef.DataId
+ and T110.StrainId=110
+ WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+ and ProbeSetFreeze.Name = 'HC_M2_0606_P'
+ and ProbeSet.Id = ProbeSetXRef.ProbeSetId
+ order by ProbeSet.Id
+#+end_src
+
+And is blazingly fast on Tux01 and (now) fast enough on Penguin2.
+
+First I checked the tables for indices and storage type. Next I
+checked the difference in configuration.
+
+** Check tables
+
+Tables (ProbeSetData, ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+
+*** ProbeSetData
+
+Same on Tux01 and P2:
+
+: show indexes from ProbeSetData ;
+
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| ProbeSetData | 0 | DataId | 1 | Id | A | 47769944 | NULL | NULL | | BTREE | | |
+| ProbeSetData | 0 | DataId | 2 | StrainId | A | 5111384047 | NULL | NULL | | BTREE | | |
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+
+*** ProbeSetFreeze
+
+Tux01 has less indexes than P2(!):
+
++----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 911 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 911 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 0 | Name | 1 | Name | A | 911 | NULL | NULL | YES | BTREE | | |
+| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 911 | NULL | NULL | | BTREE | | |
++----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+4 rows in set (0.000 sec)
+
++----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 883 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 883 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 0 | Name | 1 | Name | A | 883 | NULL | NULL | YES | BTREE | | |
+| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 883 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 1 | ShortName | 1 | ShortName | A | 883 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 1 | ProbeFreezeId | 1 | ProbeFreezeId | A | 441 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 1 | conf_and_public | 1 | confidentiality | A | 3 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 1 | conf_and_public | 2 | public | A | 4 | NULL | NULL | | BTREE | | |
++----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+8 rows in set (0.00 sec)
+
+*** ProbeSet
+
+Identical indexes
+
+*** ProbeSetXRef
+
+Tux01 has less indexes than P2(!):
+
+MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
++--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 885 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 47713039 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 47713039 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 1 | Locus_IDX | 1 | Locus | A | 15904346 | NULL | NULL | YES | BTREE | | |
++--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+4 rows in set (0.000 sec)
+
+
+MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
++--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 856 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 46412145 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 46412145 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 1 | ProbeSetId1 | 1 | ProbeSetId | A | 5156905 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 1 | Locus | 1 | Locus | A | 23206072 | NULL | NULL | YES | BTREE | | |
++--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+5 rows in set (0.00 sec)
+
+** Check storage
+
+The database in Tux01 is mounted on NVME. On Penguin2 it
+is slower SATA with RAID5.
+
+Also on Penguin2 the following tables are using InnoDB instead of
+MyISAM
+
+#+begin_src sh
+-rw-rw---- 1 mysql mysql 79691776 Oct 15 2019 AccessLog.ibd
+-rw-rw---- 1 mysql mysql 196608 Oct 24 2019 Docs.ibd
+-rw-rw---- 1 mysql mysql 63673729024 Jul 10 2020 GenoData.ibd
+-rw-rw---- 1 mysql mysql 34787557376 Jul 9 2020 ProbeData.ibd
+-rw-rw---- 1 mysql mysql 254690721792 Jul 10 2020 ProbeSetData.ibd
+-rw-rw---- 1 mysql mysql 32103202816 Jul 9 2020 SnpAll.ibd
+-rw-rw---- 1 mysql mysql 98304 May 6 2020 TraitMetadata.ibd
+#+end_src
+
+This [[https://www.liquidweb.com/kb/mysql-performance-myisam-vs-innodb/][article]] suggests that myISAM will be faster for our use case.
+
+** Configuration
+
+There was one setting on Tux01 missing on P2
+
+: +innodb_buffer_pool_size=1024M
+
+Running the same query twice (so you can see the warmup after
+a restart of MariaDB)
+
+#+begin_src sh
+penguin2:/etc$ time mysql -u webqtlout -pwebqtlout db_webqtl < ~/chunk.sql > ~/test.out
+real 0m4.253s
+user 0m0.276s
+sys 0m0.040s
+penguin2:/etc$ time mysql -u webqtlout -pwebqtlout db_webqtl < ~/chunk.sql > ~/test.out
+real 0m2.633s
+user 0m0.296s
+sys 0m0.028s
+#+end_src
+
+That is much better :)
--
cgit v1.2.3
From f628f14bae508ec1d86606cfb9fdad6096f8b29f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 13 Apr 2021 02:41:20 -0500
Subject: SQL notes
---
doc/database.org | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/doc/database.org b/doc/database.org
index f3d4d92e..cd833b83 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -1339,7 +1339,8 @@ The SNP count info for the BXD is calculated like this
startMb += stepMb
#+end_src
-select * from BXDSnpPosition limit 5;
+: select * from BXDSnpPosition limit 5;
+
+------+-----------+-----------+----------+
| Chr | StrainId1 | StrainId2 | Mb |
+------+-----------+-----------+----------+
@@ -1435,7 +1436,7 @@ Tux01 has less indexes than P2(!):
| ProbeSetFreeze | 0 | Name | 1 | Name | A | 911 | NULL | NULL | YES | BTREE | | |
| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 911 | NULL | NULL | | BTREE | | |
+----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-4 rows in set (0.000 sec)
+: 4 rows in set (0.000 sec)
+----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
@@ -1449,7 +1450,7 @@ Tux01 has less indexes than P2(!):
| ProbeSetFreeze | 1 | conf_and_public | 1 | confidentiality | A | 3 | NULL | NULL | | BTREE | | |
| ProbeSetFreeze | 1 | conf_and_public | 2 | public | A | 4 | NULL | NULL | | BTREE | | |
+----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-8 rows in set (0.00 sec)
+: 8 rows in set (0.00 sec)
*** ProbeSet
@@ -1459,7 +1460,7 @@ Identical indexes
Tux01 has less indexes than P2(!):
-MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
+: MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
+--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
@@ -1468,10 +1469,10 @@ MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 47713039 | NULL | NULL | | BTREE | | |
| ProbeSetXRef | 1 | Locus_IDX | 1 | Locus | A | 15904346 | NULL | NULL | YES | BTREE | | |
+--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-4 rows in set (0.000 sec)
+: 4 rows in set (0.000 sec)
-MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
+: MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
+--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
@@ -1481,7 +1482,7 @@ MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
| ProbeSetXRef | 1 | ProbeSetId1 | 1 | ProbeSetId | A | 5156905 | NULL | NULL | | BTREE | | |
| ProbeSetXRef | 1 | Locus | 1 | Locus | A | 23206072 | NULL | NULL | YES | BTREE | | |
+--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-5 rows in set (0.00 sec)
+: 5 rows in set (0.00 sec)
** Check storage
--
cgit v1.2.3
From d6d339adbbdbf18e5acabaa180092184cabacec8 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 13 Apr 2021 02:43:03 -0500
Subject: SQL notes - table layout
---
doc/database.org | 11 +----------
1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/doc/database.org b/doc/database.org
index cd833b83..dd068d71 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -1417,28 +1417,23 @@ Same on Tux01 and P2:
: show indexes from ProbeSetData ;
-+--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| ProbeSetData | 0 | DataId | 1 | Id | A | 47769944 | NULL | NULL | | BTREE | | |
| ProbeSetData | 0 | DataId | 2 | StrainId | A | 5111384047 | NULL | NULL | | BTREE | | |
-+--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
*** ProbeSetFreeze
Tux01 has less indexes than P2(!):
-+----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 911 | NULL | NULL | | BTREE | | |
| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 911 | NULL | NULL | | BTREE | | |
| ProbeSetFreeze | 0 | Name | 1 | Name | A | 911 | NULL | NULL | YES | BTREE | | |
| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 911 | NULL | NULL | | BTREE | | |
-+----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
: 4 rows in set (0.000 sec)
-+----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 883 | NULL | NULL | | BTREE | | |
@@ -1449,7 +1444,6 @@ Tux01 has less indexes than P2(!):
| ProbeSetFreeze | 1 | ProbeFreezeId | 1 | ProbeFreezeId | A | 441 | NULL | NULL | | BTREE | | |
| ProbeSetFreeze | 1 | conf_and_public | 1 | confidentiality | A | 3 | NULL | NULL | | BTREE | | |
| ProbeSetFreeze | 1 | conf_and_public | 2 | public | A | 4 | NULL | NULL | | BTREE | | |
-+----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
: 8 rows in set (0.00 sec)
*** ProbeSet
@@ -1461,19 +1455,17 @@ Identical indexes
Tux01 has less indexes than P2(!):
: MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
-+--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 885 | NULL | NULL | | BTREE | | |
| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 47713039 | NULL | NULL | | BTREE | | |
| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 47713039 | NULL | NULL | | BTREE | | |
| ProbeSetXRef | 1 | Locus_IDX | 1 | Locus | A | 15904346 | NULL | NULL | YES | BTREE | | |
-+--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
: 4 rows in set (0.000 sec)
+
: MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
-+--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 856 | NULL | NULL | | BTREE | | |
@@ -1481,7 +1473,6 @@ Tux01 has less indexes than P2(!):
| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 46412145 | NULL | NULL | | BTREE | | |
| ProbeSetXRef | 1 | ProbeSetId1 | 1 | ProbeSetId | A | 5156905 | NULL | NULL | | BTREE | | |
| ProbeSetXRef | 1 | Locus | 1 | Locus | A | 23206072 | NULL | NULL | YES | BTREE | | |
-+--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
: 5 rows in set (0.00 sec)
** Check storage
--
cgit v1.2.3
From 0815b6004a167c5c25f6994aeb31ec3161619c6d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 13 Apr 2021 02:44:03 -0500
Subject: SQL notes - table layout
---
doc/database.org | 56 ++++++++++++++++++++++++++++----------------------------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/doc/database.org b/doc/database.org
index dd068d71..d5462d4e 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -1418,32 +1418,32 @@ Same on Tux01 and P2:
: show indexes from ProbeSetData ;
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
-+--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-| ProbeSetData | 0 | DataId | 1 | Id | A | 47769944 | NULL | NULL | | BTREE | | |
-| ProbeSetData | 0 | DataId | 2 | StrainId | A | 5111384047 | NULL | NULL | | BTREE | | |
+|--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------|
+| ProbeSetData | 0 | DataId | 1 | Id | A | 47769944 | NULL | NULL | | BTREE | | |
+| ProbeSetData | 0 | DataId | 2 | StrainId | A | 5111384047 | NULL | NULL | | BTREE | | |
*** ProbeSetFreeze
Tux01 has less indexes than P2(!):
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
-+----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 911 | NULL | NULL | | BTREE | | |
-| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 911 | NULL | NULL | | BTREE | | |
-| ProbeSetFreeze | 0 | Name | 1 | Name | A | 911 | NULL | NULL | YES | BTREE | | |
-| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 911 | NULL | NULL | | BTREE | | |
+|----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------|
+| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 911 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 911 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 0 | Name | 1 | Name | A | 911 | NULL | NULL | YES | BTREE | | |
+| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 911 | NULL | NULL | | BTREE | | |
: 4 rows in set (0.000 sec)
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
-+----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 883 | NULL | NULL | | BTREE | | |
-| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 883 | NULL | NULL | | BTREE | | |
-| ProbeSetFreeze | 0 | Name | 1 | Name | A | 883 | NULL | NULL | YES | BTREE | | |
-| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 883 | NULL | NULL | | BTREE | | |
-| ProbeSetFreeze | 1 | ShortName | 1 | ShortName | A | 883 | NULL | NULL | | BTREE | | |
-| ProbeSetFreeze | 1 | ProbeFreezeId | 1 | ProbeFreezeId | A | 441 | NULL | NULL | | BTREE | | |
-| ProbeSetFreeze | 1 | conf_and_public | 1 | confidentiality | A | 3 | NULL | NULL | | BTREE | | |
-| ProbeSetFreeze | 1 | conf_and_public | 2 | public | A | 4 | NULL | NULL | | BTREE | | |
+|----------------+------------+-----------------+--------------+-----------------+-----------+-------------+----------+--------+------+------------+---------+---------------|
+| ProbeSetFreeze | 0 | PRIMARY | 1 | Id | A | 883 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 0 | FullName | 1 | FullName | A | 883 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 0 | Name | 1 | Name | A | 883 | NULL | NULL | YES | BTREE | | |
+| ProbeSetFreeze | 1 | NameIndex | 1 | Name2 | A | 883 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 1 | ShortName | 1 | ShortName | A | 883 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 1 | ProbeFreezeId | 1 | ProbeFreezeId | A | 441 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 1 | conf_and_public | 1 | confidentiality | A | 3 | NULL | NULL | | BTREE | | |
+| ProbeSetFreeze | 1 | conf_and_public | 2 | public | A | 4 | NULL | NULL | | BTREE | | |
: 8 rows in set (0.00 sec)
*** ProbeSet
@@ -1456,23 +1456,23 @@ Tux01 has less indexes than P2(!):
: MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
-+--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 885 | NULL | NULL | | BTREE | | |
-| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 47713039 | NULL | NULL | | BTREE | | |
-| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 47713039 | NULL | NULL | | BTREE | | |
-| ProbeSetXRef | 1 | Locus_IDX | 1 | Locus | A | 15904346 | NULL | NULL | YES | BTREE | | |
+|--------------+------------+------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------|
+| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 885 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 47713039 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 47713039 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 1 | Locus_IDX | 1 | Locus | A | 15904346 | NULL | NULL | YES | BTREE | | |
: 4 rows in set (0.000 sec)
: MariaDB [db_webqtl]> show indexes from ProbeSetXRef ;
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
-+--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
-| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 856 | NULL | NULL | | BTREE | | |
-| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 46412145 | NULL | NULL | | BTREE | | |
-| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 46412145 | NULL | NULL | | BTREE | | |
-| ProbeSetXRef | 1 | ProbeSetId1 | 1 | ProbeSetId | A | 5156905 | NULL | NULL | | BTREE | | |
-| ProbeSetXRef | 1 | Locus | 1 | Locus | A | 23206072 | NULL | NULL | YES | BTREE | | |
+|--------------+------------+-------------+--------------+------------------+-----------+-------------+----------+--------+------+------------+---------+---------------|
+| ProbeSetXRef | 0 | ProbeSetId | 1 | ProbeSetFreezeId | A | 856 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 0 | ProbeSetId | 2 | ProbeSetId | A | 46412145 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 0 | DataId_IDX | 1 | DataId | A | 46412145 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 1 | ProbeSetId1 | 1 | ProbeSetId | A | 5156905 | NULL | NULL | | BTREE | | |
+| ProbeSetXRef | 1 | Locus | 1 | Locus | A | 23206072 | NULL | NULL | YES | BTREE | | |
: 5 rows in set (0.00 sec)
** Check storage
--
cgit v1.2.3
From af652da915ae263eac56c3b7ea255be31a7fccaa Mon Sep 17 00:00:00 2001
From: zsloan
Date: Wed, 14 Apr 2021 20:25:15 +0000
Subject: Added option to set default collection to collections/view.html
---
wqflask/wqflask/templates/collections/view.html | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html
index 8d5e3616..9ec98ab1 100644
--- a/wqflask/wqflask/templates/collections/view.html
+++ b/wqflask/wqflask/templates/collections/view.html
@@ -14,6 +14,7 @@
{{ uc.name }}
Change Collection Name
+ Make Default
This collection has {{ '{}'.format(numify(trait_obs|count, "record", "records")) }}
@@ -231,6 +232,21 @@
$('#collection_name').css('display', 'inline');
}
});
+
+ make_default = function() {
+ alert("The current collection is now your default collection.")
+ let uc_id = $('#uc_id').val();
+ $.cookie('default_collection', uc_id, {
+ expires: 365,
+ path: '/'
+ });
+
+ let default_collection_id = $.cookie('default_collection');
+ };
+
+ $("#make_default").on("click", function(){
+ make_default();
+ });
});
--
cgit v1.2.3
From 939a27cb0ede3102ce929e445690e1ba86d5870a Mon Sep 17 00:00:00 2001
From: zsloan
Date: Wed, 14 Apr 2021 20:26:34 +0000
Subject: Added JS that automatically selects the default collection if it's
set
---
wqflask/wqflask/templates/collections/add.html | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/templates/collections/add.html b/wqflask/wqflask/templates/collections/add.html
index b4e5385b..0398c6e4 100644
--- a/wqflask/wqflask/templates/collections/add.html
+++ b/wqflask/wqflask/templates/collections/add.html
@@ -49,8 +49,20 @@
--
cgit v1.2.3
From 328b176628ed9db6c1c60590cb10f4cca212738a Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 15 Apr 2021 06:10:28 +0300
Subject: change api port
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index c8d5347c..8ee4a9b7 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -7,7 +7,7 @@ from base import data_set
from base.trait import create_trait
from base.trait import retrieve_sample_data
-GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation"
+GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
def process_samples(start_vars, sample_names, excluded_samples=None):
@@ -30,6 +30,12 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
+
+ print("creating the dataset and trait")
+ import time
+
+ initial_time = time.time()
+
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
dataset_name=start_vars['corr_dataset'])
@@ -44,6 +50,11 @@ def create_target_this_trait(start_vars):
target_dataset.get_trait_data(list(sample_data.keys()))
+
+ time_taken = time.time() - initial_time
+
+ print(f"the time taken to create dataset abnd trait is",time_taken)
+
return (this_dataset, this_trait, target_dataset, sample_data)
@@ -91,6 +102,8 @@ def compute_correlation(start_vars, method="pearson"):
requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
corr_input_data = geneid_dict
+
+ print("Sending this request")
corr_results = requests.post(requests_url, json=corr_input_data)
data = corr_results.json()
--
cgit v1.2.3
From 5a9a7a645510d1385def017adf2f956d61fa2329 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Fri, 16 Apr 2021 02:09:28 +0300
Subject: add demo template
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 3 +
.../wqflask/templates/demo_correlation_page.html | 78 ++++++++++++++++++++--
wqflask/wqflask/views.py | 2 +-
3 files changed, 77 insertions(+), 6 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 8ee4a9b7..b4480076 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -47,11 +47,14 @@ def create_target_this_trait(start_vars):
# target_dataset.get_trait_data(list(self.sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
+ print(f"Starting to creat the target dataset ")
+ dataset_start_time = time.time()
target_dataset.get_trait_data(list(sample_data.keys()))
time_taken = time.time() - initial_time
+ print(f"the time taken to create dataset is",time.time()-dataset_start_time)
print(f"the time taken to create dataset abnd trait is",time_taken)
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index a8651067..1900a0bd 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -10,11 +10,79 @@
{% endblock %}
{% block content %}
- {{correlation_results}}
-
+
CORRELATION RESULTS
+
+
+ {% for corr_result in correlation_results %}
+ {% for key,value in corr_result.items()%}
+
+
+ {%for o_key,o_value in value.items()%}
+
+ {%endfor%}
+ {% endfor %}
+
+ {% endfor %}
+
+
+
{% endblock %}
+
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 6ca9b23f..072db466 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -882,7 +882,7 @@ def corr_compute_page():
logger.info("In corr_compute, request.form is:", pf(request.form))
logger.info(request.url)
correlation_results = compute_correlation(request.form)
- return render_template("demo_correlation_page.html",correlation_results=correlation_results)
+ return render_template("demo_correlation_page.html",correlation_results=correlation_results[1:20])
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
--
cgit v1.2.3
From 33e03898ee733f18b29e54e202c217ba14921f48 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 17 Apr 2021 04:14:33 +0300
Subject: use gn3 lib
---
bin/genenetwork2 | 3 +-
wqflask/wqflask/correlation/correlation_gn3_api.py | 57 +++++++++++++++++-----
.../wqflask/templates/demo_correlation_page.html | 2 +-
3 files changed, 47 insertions(+), 15 deletions(-)
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index 5f4e0f9a..917d6549 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -154,7 +154,8 @@ if [ ! -d $R_LIBS_SITE ] ; then
fi
# We may change this one:
-export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH
+# export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH
+PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/genenetwork3:$PYTHONPATH
# Our UNIX TMPDIR defaults to /tmp - change this on a shared server
if [ -z $TMPDIR ]; then
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index b4480076..c1d6132b 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,11 +1,17 @@
"""module that calls the gn3 api's to do the correlation """
import json
import requests
+import time
from wqflask.correlation import correlation_functions
from base import data_set
from base.trait import create_trait
from base.trait import retrieve_sample_data
+# gn3 lib
+from gn3.computations.correlations import compute_all_sample_correlation
+from gn3.computations.correlations import benchmark_compute_all_sample
+from gn3.computations.correlations import map_shared_keys_to_values
+from gn3.computations.correlations import compute_all_tissue_correlation
GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
@@ -30,7 +36,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
-
print("creating the dataset and trait")
import time
@@ -52,11 +57,10 @@ def create_target_this_trait(start_vars):
target_dataset.get_trait_data(list(sample_data.keys()))
-
time_taken = time.time() - initial_time
- print(f"the time taken to create dataset is",time.time()-dataset_start_time)
+ print(f"the time taken to create dataset is", time.time()-dataset_start_time)
- print(f"the time taken to create dataset abnd trait is",time_taken)
+ print(f"the time taken to create dataset abnd trait is", time_taken)
return (this_dataset, this_trait, target_dataset, sample_data)
@@ -76,16 +80,34 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {}
if corr_type == "sample":
- corr_input_data = {
- "target_dataset": target_dataset.trait_data,
- "target_samplelist": target_dataset.samplelist,
- "trait_data": {
- "trait_sample_data": sample_data,
- "trait_id": start_vars["trait_id"]
- }
+ # corr_input_data = {
+ # "target_dataset": target_dataset.trait_data,
+ # "target_samplelist": target_dataset.samplelist,
+ # "trait_data": {
+ # "trait_sample_data": sample_data,
+ # "trait_id": start_vars["trait_id"]
+ # }
+ # }
+
+
+
+ this_trait_data = {
+ "trait_sample_data": sample_data,
+ "trait_id": start_vars["trait_id"]
}
- requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
+ initial_time = time.time()
+ print("Calling sample correlation")
+ results = map_shared_keys_to_values(
+ target_dataset.samplelist, target_dataset.trait_data)
+ correlation_results = compute_all_sample_correlation(corr_method=method,
+ this_trait=this_trait_data,
+ target_dataset=results)
+
+ print("Time taken is>>>>",time.time()-initial_time)
+
+ # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
+ return correlation_results
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
@@ -96,8 +118,17 @@ def compute_correlation(start_vars, method="pearson"):
"primary_tissue": primary_tissue_data,
"target_tissues_dict": target_tissue_data
}
+ print("Calling tissue correlation")
+ initial_time = time.time()
+ correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+ target_tissues_data=corr_input_data["target_tissues_dict"],
+ corr_method=method)
+
+ time_taken = time.time()
+ print("Time taken is ??????",time_taken-initial_time)
- requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+ # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+ return correlation_results
elif corr_type == "lit":
(this_trait_geneid, geneid_dict, species) = do_lit_correlation(
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index 1900a0bd..d2979f9d 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -21,7 +21,7 @@
{% for corr_result in correlation_results %}
{% for key,value in corr_result.items()%}
-
+
{%for o_key,o_value in value.items()%}
{%endfor%}
--
cgit v1.2.3
From ba2fa2025bdc381346afc8ec3203f229ed3551d6 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 17 Apr 2021 13:43:44 +0300
Subject: refactoring fetching of data
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index c1d6132b..75bd5561 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -48,20 +48,17 @@ def create_target_this_trait(start_vars):
this_trait = create_trait(dataset=this_dataset,
name=start_vars['trait_id'])
- sample_data = process_samples(start_vars, this_dataset.group.samplelist)
+
# target_dataset.get_trait_data(list(self.sample_data.keys()))
- this_trait = retrieve_sample_data(this_trait, this_dataset)
+ # this_trait = retrieve_sample_data(this_trait, this_dataset)
print(f"Starting to creat the target dataset ")
dataset_start_time = time.time()
+ sample_data = ()
- target_dataset.get_trait_data(list(sample_data.keys()))
+
time_taken = time.time() - initial_time
- print(f"the time taken to create dataset is", time.time()-dataset_start_time)
-
- print(f"the time taken to create dataset abnd trait is", time_taken)
-
return (this_dataset, this_trait, target_dataset, sample_data)
@@ -89,6 +86,10 @@ def compute_correlation(start_vars, method="pearson"):
# }
# }
+ sample_data = process_samples(start_vars, this_dataset.group.samplelist)
+ target_dataset.get_trait_data(list(sample_data.keys()))
+ this_trait = retrieve_sample_data(this_trait, this_dataset)
+
this_trait_data = {
@@ -111,8 +112,10 @@ def compute_correlation(start_vars, method="pearson"):
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
+ time_to_retrieve = time.time()
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
this_trait, trait_symbol_dict)
+ print("Time taken to retrieve this is",time.time()-time_to_retrieve)
corr_input_data = {
"primary_tissue": primary_tissue_data,
--
cgit v1.2.3
From 50c0ee93a59eecd40a6fbd19139671c94003c21b Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 19 Apr 2021 00:24:36 +0300
Subject: fix for correlation_demo template
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 53 ++++++++++------------
.../wqflask/templates/demo_correlation_page.html | 23 +++++++++-
2 files changed, 44 insertions(+), 32 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 75bd5561..ba606b92 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -9,9 +9,10 @@ from base.trait import create_trait
from base.trait import retrieve_sample_data
# gn3 lib
from gn3.computations.correlations import compute_all_sample_correlation
-from gn3.computations.correlations import benchmark_compute_all_sample
from gn3.computations.correlations import map_shared_keys_to_values
from gn3.computations.correlations import compute_all_tissue_correlation
+from gn3.computations.correlations import compute_all_lit_correlation
+from gn3.db_utils import database_connector
GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
@@ -36,10 +37,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
- print("creating the dataset and trait")
- import time
-
- initial_time = time.time()
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
@@ -48,17 +45,10 @@ def create_target_this_trait(start_vars):
this_trait = create_trait(dataset=this_dataset,
name=start_vars['trait_id'])
-
# target_dataset.get_trait_data(list(self.sample_data.keys()))
# this_trait = retrieve_sample_data(this_trait, this_dataset)
- print(f"Starting to creat the target dataset ")
- dataset_start_time = time.time()
sample_data = ()
-
-
-
- time_taken = time.time() - initial_time
return (this_dataset, this_trait, target_dataset, sample_data)
@@ -77,6 +67,7 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {}
if corr_type == "sample":
+ initial_time = time.time()
# corr_input_data = {
# "target_dataset": target_dataset.trait_data,
# "target_samplelist": target_dataset.samplelist,
@@ -85,50 +76,44 @@ def compute_correlation(start_vars, method="pearson"):
# "trait_id": start_vars["trait_id"]
# }
# }
-
- sample_data = process_samples(start_vars, this_dataset.group.samplelist)
+ sample_data = process_samples(
+ start_vars, this_dataset.group.samplelist)
target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
-
+ print("Creating dataset and trait took",time.time()-initial_time)
this_trait_data = {
"trait_sample_data": sample_data,
"trait_id": start_vars["trait_id"]
}
-
initial_time = time.time()
- print("Calling sample correlation")
results = map_shared_keys_to_values(
target_dataset.samplelist, target_dataset.trait_data)
correlation_results = compute_all_sample_correlation(corr_method=method,
this_trait=this_trait_data,
target_dataset=results)
- print("Time taken is>>>>",time.time()-initial_time)
+ print("doing sample correlation took",time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
return correlation_results
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
- time_to_retrieve = time.time()
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
this_trait, trait_symbol_dict)
- print("Time taken to retrieve this is",time.time()-time_to_retrieve)
corr_input_data = {
"primary_tissue": primary_tissue_data,
"target_tissues_dict": target_tissue_data
}
- print("Calling tissue correlation")
initial_time = time.time()
correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
target_tissues_data=corr_input_data["target_tissues_dict"],
corr_method=method)
+ print("time taken for compute tissue is",time.time()-initial_time)
- time_taken = time.time()
- print("Time taken is ??????",time_taken-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
return correlation_results
@@ -137,15 +122,23 @@ def compute_correlation(start_vars, method="pearson"):
(this_trait_geneid, geneid_dict, species) = do_lit_correlation(
this_trait, this_dataset, target_dataset)
- requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
- corr_input_data = geneid_dict
+ conn, _cursor_object = database_connector()
+ initial_time = time.time()
+ with conn:
- print("Sending this request")
- corr_results = requests.post(requests_url, json=corr_input_data)
+ lit_corr_results = compute_all_lit_correlation(
+ conn=conn, trait_lists=list(geneid_dict.items()),
+ species=species, gene_id=this_trait_geneid)
- data = corr_results.json()
+ return lit_corr_results
+ print("the time taken is",time.time()-initial_time)
+ # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
+ # corr_input_data = geneid_dict
+ # corr_results = requests.post(requests_url, json=corr_input_data)
- return data
+ # data = corr_results.json()
+
+ # return data
def do_lit_correlation(this_trait, this_dataset, target_dataset):
@@ -164,11 +157,11 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
"""Gets tissue expression values for the primary trait and target tissues values"""
primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=[this_trait.symbol])
-
if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
+ time_to_to_fetch_all = time.time()
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
primary_tissue_data = {
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index d2979f9d..67e3c57c 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -22,9 +22,28 @@
{% for key,value in corr_result.items()%}
- {%for o_key,o_value in value.items()%}
+
+ {% if "corr_coeffient" in value %}
+
{{value["corr_coeffient"]}}
+ {%elif "tissue_corr" in value %}
+
+ {%elif "lit_corr" in value %}
+ {{value["lit_corr"]}}
+ {% endif %}
+ {%if "tissue_number" in value %}
+
{{value["tissue_number"]}}
+ {%elif "num_overlap" in value %}
+
+ {% endif %}
+
+
+
+
+
+
{% endfor %}
{% endfor %}
--
cgit v1.2.3
From e6c21d96f45dc74de43db451383236f28e723847 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 19 Apr 2021 16:46:39 +0000
Subject: Removed unused code
---
wqflask/base/data_set.py | 5 -----
1 file changed, 5 deletions(-)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 178234fe..44325d5b 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -573,11 +573,6 @@ class DataSet(object):
"""Gets overridden later, at least for Temp...used by trait's get_given_name"""
return None
- # Delete this eventually
- @property
- def riset():
- Weve_Renamed_This_As_Group
-
def get_accession_id(self):
if self.type == "Publish":
results = g.db.execute("""select InfoFiles.GN_AccesionId from InfoFiles, PublishFreeze, InbredSet where
--
cgit v1.2.3
From 34b6fbf0044fd950ecb95590b4b772abd2cbcb8e Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 19 Apr 2021 16:57:21 +0000
Subject: Fixed NIAAA link because apparently it was changed
---
wqflask/wqflask/templates/base.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html
index ccb2ac5a..1fbaad51 100644
--- a/wqflask/wqflask/templates/base.html
+++ b/wqflask/wqflask/templates/base.html
@@ -191,7 +191,7 @@
Translational Systems Genetics of Mitochondria, Metabolism, and Aging (R01AG043930, 2013-2018)
- NIAAA
+ NIAAA
Integrative Neuroscience Initiative on Alcoholism (U01 AA016662, U01 AA013499, U24 AA013513, U01 AA014425, 2006-2017)
--
cgit v1.2.3
From c8f9367bc12340bb8b8a7ce1a5f42789e311555a Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 19 Apr 2021 16:57:52 +0000
Subject: Removed commented out js file
---
wqflask/wqflask/templates/base.html | 1 -
1 file changed, 1 deletion(-)
diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html
index 1fbaad51..e6f22deb 100644
--- a/wqflask/wqflask/templates/base.html
+++ b/wqflask/wqflask/templates/base.html
@@ -257,7 +257,6 @@
-
--
cgit v1.2.3
From 789dd88a0b9a6a8cbe0413c47212c89c1681765c Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 19 Apr 2021 17:08:30 +0000
Subject: Removedu nused function from network_graph.py
---
wqflask/wqflask/network_graph/network_graph.py | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/wqflask/wqflask/network_graph/network_graph.py b/wqflask/wqflask/network_graph/network_graph.py
index 1d5316a2..132e1884 100644
--- a/wqflask/wqflask/network_graph/network_graph.py
+++ b/wqflask/wqflask/network_graph/network_graph.py
@@ -182,15 +182,3 @@ class NetworkGraph(object):
samples=self.all_sample_list,
sample_data=self.sample_data,
elements=self.elements,)
-
- def get_trait_db_obs(self, trait_db_list):
- self.trait_list = []
- for i, trait_db in enumerate(trait_db_list):
- if i == (len(trait_db_list) - 1):
- break
- trait_name, dataset_name = trait_db.split(":")
- dataset_ob = data_set.create_dataset(dataset_name)
- trait_ob = create_trait(dataset=dataset_ob,
- name=trait_name,
- cellid=None)
- self.trait_list.append((trait_ob, dataset_ob))
--
cgit v1.2.3
From 68ee0a995fceaf6aefdd3c8f780e46a83b51a0e8 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 19 Apr 2021 17:11:41 +0000
Subject: Specify only getting the first two items after splitting the
trait/dataset input string
---
wqflask/utility/helper_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 7eb7f013..46eeb35d 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -40,7 +40,7 @@ def get_trait_db_obs(self, trait_db_list):
data, _separator, hmac_string = trait.rpartition(':')
data = data.strip()
assert hmac_string==hmac.hmac_creation(data), "Data tampering?"
- trait_name, dataset_name = data.split(":")
+ trait_name, dataset_name = data.split(":")[:2]
if dataset_name == "Temp":
dataset_ob = data_set.create_dataset(dataset_name=dataset_name, dataset_type="Temp", group_name=trait_name.split("_")[2])
else:
--
cgit v1.2.3
From 34e4933de5a1cd444abe618fcfd93b424bf3442e Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Tue, 20 Apr 2021 01:38:26 +0300
Subject: refactor code for iterating mrna tissue data
---
wqflask/base/mrna_assay_tissue_data.py | 39 +++++++++++++++++++---
.../wqflask/correlation/correlation_functions.py | 6 ++--
wqflask/wqflask/correlation/correlation_gn3_api.py | 24 +++++++------
3 files changed, 51 insertions(+), 18 deletions(-)
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index f1929518..0220d73b 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -6,6 +6,7 @@ from utility import db_tools
from utility import Bunch
from utility.db_tools import escape
+from gn3.db_utils import database_connector
from utility.logger import getLogger
@@ -44,16 +45,42 @@ class MrnaAssayTissueData(object):
and t.Mean = x.maxmean;
'''.format(in_clause)
- results = g.db.execute(query).fetchall()
- lower_symbols = []
+ # lower_symbols = []
+ lower_symbols = {}
for gene_symbol in gene_symbols:
+ # lower_symbols[gene_symbol.lower()] = True
if gene_symbol != None:
- lower_symbols.append(gene_symbol.lower())
-
+ lower_symbols[gene_symbol.lower()] = True
+
+ import time
+ # initial_time = time.time()
+ # conn,cursor = database_connector()
+ # cursor.execute(query)
+ # for result in cursor.fetchall():
+ # symbol = result[0]
+ # self.data[symbol].gene_id = result[1]
+ # self.data[symbol].data_id = result[2]
+ # self.data[symbol].chr = result[3]
+ # self.data[symbol].mb = result[4]
+ # self.data[symbol].description = result[5]
+ # self.data[symbol].probe_target_description = result[6]
+
+
+ # print("my loop takes>>>>",time.time()-initial_time)
+ # conn.close()
+ # r
+
+ # takes 5 seconds
+ initial_time = time.time()
+ results = list(g.db.execute(query).fetchall())
for result in results:
symbol = result[0]
- if symbol.lower() in lower_symbols:
+ # if symbol is not None
+ # exists = lower_symbols.get(symbol.lower())
+ # if symbol.lower() in lower_symbols:
+ if symbol is not None and lower_symbols.get(symbol.lower()):
+
symbol = symbol.lower()
self.data[symbol].gene_id = result.GeneId
@@ -62,6 +89,7 @@ class MrnaAssayTissueData(object):
self.data[symbol].mb = result.Mb
self.data[symbol].description = result.description
self.data[symbol].probe_target_description = result.Probe_Target_Description
+ print("time taken in the loop is",time.time()-initial_time)
###########################################################################
#Input: cursor, symbolList (list), dataIdDict(Dict)
@@ -82,6 +110,7 @@ class MrnaAssayTissueData(object):
WHERE TissueProbeSetData.Id IN {} and
TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+
results = g.db.execute(query).fetchall()
for result in results:
if result.Symbol.lower() not in symbol_values_dict:
diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py
index fd7691d4..af1d6060 100644
--- a/wqflask/wqflask/correlation/correlation_functions.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -82,6 +82,6 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears
def get_trait_symbol_and_tissue_values(symbol_list=None):
tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
-
- if len(tissue_data.gene_symbols):
- return tissue_data.get_symbol_values_pairs()
+ if len(tissue_data.gene_symbols) >0:
+ results = tissue_data.get_symbol_values_pairs()
+ return results
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index ba606b92..e7394647 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -12,6 +12,7 @@ from gn3.computations.correlations import compute_all_sample_correlation
from gn3.computations.correlations import map_shared_keys_to_values
from gn3.computations.correlations import compute_all_tissue_correlation
from gn3.computations.correlations import compute_all_lit_correlation
+from gn3.computations.correlations import experimental_compute_all_tissue_correlation
from gn3.db_utils import database_connector
GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
@@ -37,7 +38,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
-
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
dataset_name=start_vars['corr_dataset'])
@@ -81,7 +81,7 @@ def compute_correlation(start_vars, method="pearson"):
target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
- print("Creating dataset and trait took",time.time()-initial_time)
+ print("Creating dataset and trait took", time.time()-initial_time)
this_trait_data = {
"trait_sample_data": sample_data,
@@ -94,7 +94,7 @@ def compute_correlation(start_vars, method="pearson"):
this_trait=this_trait_data,
target_dataset=results)
- print("doing sample correlation took",time.time()-initial_time)
+ print("doing sample correlation took", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
return correlation_results
@@ -109,11 +109,16 @@ def compute_correlation(start_vars, method="pearson"):
"target_tissues_dict": target_tissue_data
}
initial_time = time.time()
- correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
- target_tissues_data=corr_input_data["target_tissues_dict"],
- corr_method=method)
- print("time taken for compute tissue is",time.time()-initial_time)
-
+ correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+ target_tissues_data=corr_input_data[
+ "target_tissues_dict"],
+ corr_method=method)
+ print("correlation y took", time.time()-initial_time)
+ # initial_time = time.time()
+ # correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+ # target_tissues_data=corr_input_data["target_tissues_dict"],
+ # corr_method=method)
+ # print("time taken for compute tissue is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
return correlation_results
@@ -131,7 +136,7 @@ def compute_correlation(start_vars, method="pearson"):
species=species, gene_id=this_trait_geneid)
return lit_corr_results
- print("the time taken is",time.time()-initial_time)
+ print("the time taken is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
# corr_input_data = geneid_dict
# corr_results = requests.post(requests_url, json=corr_input_data)
@@ -161,7 +166,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
- time_to_to_fetch_all = time.time()
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
primary_tissue_data = {
--
cgit v1.2.3
From 315d7f13b254aa62e277805d7d9816b0c7042479 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 22 Apr 2021 18:23:50 +0000
Subject: Added JS that sets the 'add to collection' dropdown to default to the
collection the use set as their default collection
---
wqflask/wqflask/static/new/javascript/search_results.js | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/wqflask/wqflask/static/new/javascript/search_results.js b/wqflask/wqflask/static/new/javascript/search_results.js
index ecb1220d..48b9b7be 100644
--- a/wqflask/wqflask/static/new/javascript/search_results.js
+++ b/wqflask/wqflask/static/new/javascript/search_results.js
@@ -326,4 +326,15 @@ $(function() {
} );
}
+ apply_default = function() {
+ let default_collection_id = $.cookie('default_collection');
+ if (default_collection_id) {
+ let the_option = $('[name=existing_collection] option').filter(function() {
+ return ($(this).text().split(":")[0] == default_collection_id);
+ })
+ the_option.prop('selected', true);
+ }
+ }
+ apply_default();
+
});
\ No newline at end of file
--
cgit v1.2.3
From 1a7bb988ee360b3ef48e22e25b419c375dccb9fa Mon Sep 17 00:00:00 2001
From: zsloan
Date: Thu, 22 Apr 2021 18:26:01 +0000
Subject: Account for situations where the trait symbol is null for ProbeSet
traits; previously it could throw an error
---
wqflask/wqflask/search_results.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index f23c0582..cb01a2af 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -119,7 +119,7 @@ class SearchResultPage(object):
trait_dict['dataset'] = this_trait.dataset.name
trait_dict['hmac'] = hmac.data_hmac('{}:{}'.format(this_trait.name, this_trait.dataset.name))
if this_trait.dataset.type == "ProbeSet":
- trait_dict['symbol'] = this_trait.symbol
+ trait_dict['symbol'] = this_trait.symbol if this_trait.symbol else "N/A"
trait_dict['description'] = "N/A"
if this_trait.description_display:
trait_dict['description'] = this_trait.description_display
--
cgit v1.2.3
From b0ccb12682fed83bf72d22ff42f1f442a8e6176e Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Mon, 19 Apr 2021 14:43:16 +0300
Subject: Remove stale comments
---
wqflask/base/data_set.py | 11 ----
wqflask/utility/helper_functions.py | 4 --
wqflask/wqflask/show_trait/show_trait.py | 72 +++++++++++++++-----------
wqflask/wqflask/templates/index_page_orig.html | 10 ----
wqflask/wqflask/templates/submit_trait.html | 12 -----
wqflask/wqflask/views.py | 23 ++------
6 files changed, 46 insertions(+), 86 deletions(-)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 178234fe..cc5a428c 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -262,8 +262,6 @@ class Markers(object):
elif isinstance(p_values, dict):
filtered_markers = []
for marker in self.markers:
- #logger.debug("marker[name]", marker['name'])
- #logger.debug("p_values:", p_values)
if marker['name'] in p_values:
#logger.debug("marker {} IS in p_values".format(i))
marker['p_value'] = p_values[marker['name']]
@@ -276,10 +274,6 @@ class Markers(object):
marker['lrs_value'] = - \
math.log10(marker['p_value']) * 4.61
filtered_markers.append(marker)
- # else:
- #logger.debug("marker {} NOT in p_values".format(i))
- # self.markers.remove(marker)
- #del self.markers[i]
self.markers = filtered_markers
@@ -306,7 +300,6 @@ class HumanMarkers(Markers):
marker['Mb'] = float(splat[3]) / 1000000
self.markers.append(marker)
- #logger.debug("markers is: ", pf(self.markers))
def add_pvalues(self, p_values):
super(HumanMarkers, self).add_pvalues(p_values)
@@ -520,7 +513,6 @@ def datasets(group_name, this_group=None):
break
if tissue_already_exists:
- #logger.debug("dataset_menu:", dataset_menu[i]['datasets'])
dataset_menu[i]['datasets'].append((dataset, dataset_short))
else:
dataset_menu.append(dict(tissue=tissue_name,
@@ -735,9 +727,6 @@ class PhenotypeDataSet(DataSet):
DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
def setup(self):
-
- #logger.debug("IS A PHENOTYPEDATASET")
-
# Fields in the database table
self.search_fields = ['Phenotype.Post_publication_description',
'Phenotype.Pre_publication_description',
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 7eb7f013..15d5b3ab 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -10,7 +10,6 @@ import logging
logger = logging.getLogger(__name__ )
def get_species_dataset_trait(self, start_vars):
- #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
if "temp_trait" in list(start_vars.keys()):
if start_vars['temp_trait'] == "True":
self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
@@ -27,9 +26,6 @@ def get_species_dataset_trait(self, start_vars):
get_qtl_info=True)
logger.debug("After creating trait")
- #if read_genotype:
- #self.dataset.group.read_genotype_file()
- #self.genotype = self.dataset.group.genotype
def get_trait_db_obs(self, trait_db_list):
if isinstance(trait_db_list, str):
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 6892f02b..ed55d473 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -138,17 +138,12 @@ class ShowTrait(object):
self.ncbi_summary = get_ncbi_summary(self.this_trait)
- #Get nearest marker for composite mapping
+ # Get nearest marker for composite mapping
if not self.temp_trait:
if check_if_attr_exists(self.this_trait, 'locus_chr') and self.dataset.type != "Geno" and self.dataset.type != "Publish":
self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset)
- #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
- #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
else:
self.nearest_marker = ""
- #self.nearest_marker1 = ""
- #self.nearest_marker2 = ""
-
self.make_sample_lists()
@@ -168,16 +163,19 @@ class ShowTrait(object):
categorical_var_list = []
if not self.temp_trait:
- categorical_var_list = get_categorical_variables(self.this_trait, self.sample_groups[0]) #ZS: Only using first samplelist, since I think mapping only uses those samples
+ # ZS: Only using first samplelist, since I think mapping only uses those samples
+ categorical_var_list = get_categorical_variables(self.this_trait, self.sample_groups[0])
- #ZS: Get list of chromosomes to select for mapping
+ # ZS: Get list of chromosomes to select for mapping
self.chr_list = [["All", -1]]
for i, this_chr in enumerate(self.dataset.species.chromosomes.chromosomes):
self.chr_list.append([self.dataset.species.chromosomes.chromosomes[this_chr].name, i])
self.genofiles = self.dataset.group.get_genofiles()
- if "QTLReaper" or "R/qtl" in dataset.group.mapping_names: #ZS: No need to grab scales from .geno file unless it's using a mapping method that reads .geno files
+ # ZS: No need to grab scales from .geno file unless it's using
+ # a mapping method that reads .geno files
+ if "QTLReaper" or "R/qtl" in dataset.group.mapping_names:
if self.genofiles:
self.scales_in_geno = get_genotype_scales(self.genofiles)
else:
@@ -187,10 +185,15 @@ class ShowTrait(object):
self.has_num_cases = has_num_cases(self.this_trait)
- #ZS: Needed to know whether to display bar chart + get max sample name length in order to set table column width
+ # ZS: Needed to know whether to display bar chart + get max
+ # sample name length in order to set table column width
self.num_values = 0
- self.binary = "true" #ZS: So it knows whether to display the Binary R/qtl mapping method, which doesn't work unless all values are 0 or 1
- self.negative_vals_exist = "false" #ZS: Since we don't want to show log2 transform option for situations where it doesn't make sense
+ # ZS: So it knows whether to display the Binary R/qtl mapping
+ # method, which doesn't work unless all values are 0 or 1
+ self.binary = "true"
+ # ZS: Since we don't want to show log2 transform option for
+ # situations where it doesn't make sense
+ self.negative_vals_exist = "false"
max_samplename_width = 1
for group in self.sample_groups:
for sample in group.sample_list:
@@ -203,7 +206,8 @@ class ShowTrait(object):
if sample.value < 0:
self.negative_vals_exist = "true"
- #ZS: Check whether any attributes have few enough distinct values to show the "Block samples by group" option
+ # ZS: Check whether any attributes have few enough distinct
+ # values to show the "Block samples by group" option
self.categorical_attr_exists = "false"
for attribute in self.sample_groups[0].attributes:
if len(self.sample_groups[0].attributes[attribute].distinct_values) <= 10:
@@ -258,7 +262,6 @@ class ShowTrait(object):
if not self.temp_trait:
if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
hddn['control_marker'] = self.nearest_marker
- #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
hddn['do_control'] = False
hddn['maf'] = 0.05
hddn['mapping_scale'] = "physic"
@@ -268,7 +271,8 @@ class ShowTrait(object):
if len(self.scales_in_geno) < 2:
hddn['mapping_scale'] = self.scales_in_geno[list(self.scales_in_geno.keys())[0]][0][0]
- # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
+ # We'll need access to this_trait and hddn in the Jinja2
+ # Template, so we put it inside self
self.hddn = hddn
js_data = dict(trait_id = self.trait_id,
@@ -294,7 +298,8 @@ class ShowTrait(object):
self.js_data = js_data
def get_external_links(self):
- #ZS: There's some weirdness here because some fields don't exist while others are empty strings
+ # ZS: There's some weirdness here because some fields don't
+ # exist while others are empty strings
self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.this_trait.pubmed_id if check_if_attr_exists(self.this_trait, 'pubmed_id') else None
self.ncbi_gene_link = webqtlConfig.NCBI_LOCUSID % self.this_trait.geneid if check_if_attr_exists(self.this_trait, 'geneid') else None
self.omim_link = webqtlConfig.OMIM_ID % self.this_trait.omim if check_if_attr_exists(self.this_trait, 'omim') else None
@@ -320,7 +325,6 @@ class ShowTrait(object):
self.panther_link = webqtlConfig.PANTHER_URL % self.this_trait.symbol
self.ebi_gwas_link = webqtlConfig.EBIGWAS_URL % self.this_trait.symbol
self.protein_atlas_link = webqtlConfig.PROTEIN_ATLAS_URL % self.this_trait.symbol
- #self.open_targets_link = webqtlConfig.OPEN_TARGETS_URL % self.this_trait.symbol
if self.dataset.group.species == "mouse" or self.dataset.group.species == "human":
self.rgd_link = webqtlConfig.RGD_URL % (self.this_trait.symbol, self.dataset.group.species.capitalize())
@@ -429,7 +433,9 @@ class ShowTrait(object):
all_samples_ordered.append(sample)
other_sample_names.append(sample)
- #ZS: CFW is here because the .geno file doesn't properly contain its full list of samples. This should probably be fixed.
+ # ZS: CFW is here because the .geno file doesn't properly
+ # contain its full list of samples. This should probably
+ # be fixed.
if self.dataset.group.species == "human" or (set(primary_sample_names) == set(parent_f1_samples)) or self.dataset.group.name == "CFW":
primary_sample_names += other_sample_names
other_sample_names = []
@@ -445,7 +451,8 @@ class ShowTrait(object):
sample_group_type='primary',
header=primary_header)
- #if other_sample_names and self.dataset.group.species != "human" and self.dataset.group.name != "CFW":
+ # if other_sample_names and self.dataset.group.species !=
+ # "human" and self.dataset.group.name != "CFW":
if len(other_sample_names) > 0:
other_sample_names.sort() #Sort other samples
if parent_f1_samples:
@@ -539,7 +546,8 @@ def get_z_scores(sample_groups):
def get_nearest_marker(this_trait, this_db):
this_chr = this_trait.locus_chr
this_mb = this_trait.locus_mb
- #One option is to take flanking markers, another is to take the two (or one) closest
+ # One option is to take flanking markers, another is to take the
+ # two (or one) closest
query = """SELECT Geno.Name
FROM Geno, GenoXRef, GenoFreeze
WHERE Geno.Chr = '{}' AND
@@ -552,7 +560,6 @@ def get_nearest_marker(this_trait, this_db):
if result == []:
return ""
- #return "", ""
else:
return result[0][0]
@@ -617,7 +624,8 @@ def check_if_attr_exists(the_trait, id_type):
def get_ncbi_summary(this_trait):
if check_if_attr_exists(this_trait, 'geneid'):
- #ZS: Need to switch this try/except to something that checks the output later
+ # ZS: Need to switch this try/except to something that checks
+ # the output later
try:
response = requests.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=%s&retmode=json" % this_trait.geneid)
summary = json.loads(response.content)['result'][this_trait.geneid]['summary']
@@ -661,8 +669,8 @@ def get_genotype_scales(genofiles):
def get_scales_from_genofile(file_location):
geno_path = locate_ignore_error(file_location, 'genotype')
-
- if not geno_path: #ZS: This is just to allow the code to run when
+ # ZS: This is just to allow the code to run when
+ if not geno_path:
return [["physic", "Mb"]]
cm_and_mb_cols_exist = True
cm_column = None
@@ -670,7 +678,9 @@ def get_scales_from_genofile(file_location):
with open(geno_path, "r") as geno_fh:
for i, line in enumerate(geno_fh):
if line[0] == "#" or line[0] == "@":
- if "@scale" in line: #ZS: If the scale is made explicit in the metadata, use that
+ # ZS: If the scale is made explicit in the metadata,
+ # use that
+ if "@scale" in line:
scale = line.split(":")[1].strip()
if scale == "morgan":
return [["morgan", "cM"]]
@@ -690,12 +700,16 @@ def get_scales_from_genofile(file_location):
mb_column = 3
break
- #ZS: This attempts to check whether the cM and Mb columns are 'real', since some .geno files have one column be a copy of the other column, or have one column that is all 0s
+ # ZS: This attempts to check whether the cM and Mb columns are
+ # 'real', since some .geno files have one column be a copy of
+ # the other column, or have one column that is all 0s
cm_all_zero = True
mb_all_zero = True
cm_mb_all_equal = True
for i, line in enumerate(geno_fh):
- if first_marker_line <= i < first_marker_line + 10: #ZS: I'm assuming there won't be more than 10 markers where the position is listed as 0
+ # ZS: I'm assuming there won't be more than 10 markers
+ # where the position is listed as 0
+ if first_marker_line <= i < first_marker_line + 10:
if cm_column:
cm_val = line.split("\t")[cm_column].strip()
if cm_val != "0":
@@ -711,8 +725,8 @@ def get_scales_from_genofile(file_location):
if i > first_marker_line + 10:
break
-
- #ZS: This assumes that both won't be all zero, since if that's the case mapping shouldn't be an option to begin with
+ # ZS: This assumes that both won't be all zero, since if that's
+ # the case mapping shouldn't be an option to begin with
if mb_all_zero:
return [["morgan", "cM"]]
elif cm_mb_all_equal:
diff --git a/wqflask/wqflask/templates/index_page_orig.html b/wqflask/wqflask/templates/index_page_orig.html
index 7f82b35c..87cf1b45 100755
--- a/wqflask/wqflask/templates/index_page_orig.html
+++ b/wqflask/wqflask/templates/index_page_orig.html
@@ -7,16 +7,6 @@
{% endblock %}
{% block content %}
-
-
-
{{ flash_me() }}
diff --git a/wqflask/wqflask/templates/submit_trait.html b/wqflask/wqflask/templates/submit_trait.html
index 68b06f55..334a608d 100644
--- a/wqflask/wqflask/templates/submit_trait.html
+++ b/wqflask/wqflask/templates/submit_trait.html
@@ -61,18 +61,6 @@
-
Paste or Type Multiple Values: You can enter data by pasting a series of numbers representing trait values into this area.
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 2c0ba586..c4b510d4 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -156,12 +156,6 @@ def index_page():
import_collections = params['import_collections']
if import_collections == "true":
g.user_session.import_traits_to_user(params['anon_id'])
- #if USE_GN_SERVER:
- # # The menu is generated using GN_SERVER
- # return render_template("index_page.html", gn_server_url = GN_SERVER_URL, version=GN_VERSION)
- #else:
-
- # Old style static menu (OBSOLETE)
return render_template("index_page_orig.html", version=GN_VERSION)
@@ -343,14 +337,10 @@ def intro():
@app.route("/tutorials")
def tutorials():
- #doc = Docs("links", request.args)
- #return render_template("docs.html", **doc.__dict__)
return render_template("tutorials.html")
@app.route("/credits")
def credits():
- #doc = Docs("links", request.args)
- #return render_template("docs.html", **doc.__dict__)
return render_template("credits.html")
@app.route("/update_text", methods=('POST',))
@@ -368,12 +358,9 @@ def submit_trait_form():
@app.route("/create_temp_trait", methods=('POST',))
def create_temp_trait():
logger.info(request.url)
-
- #template_vars = submit_trait.SubmitTrait(request.form)
-
doc = Docs("links")
return render_template("links.html", **doc.__dict__)
- #return render_template("show_trait.html", **template_vars.__dict__)
+
@app.route('/export_trait_excel', methods=('POST',))
def export_trait_excel():
@@ -487,21 +474,17 @@ def export_perm_data():
mimetype='text/csv',
headers={"Content-Disposition":"attachment;filename=" + file_name + ".csv"})
+
@app.route("/show_temp_trait", methods=('POST',))
def show_temp_trait_page():
logger.info(request.url)
template_vars = show_trait.ShowTrait(request.form)
- #logger.info("js_data before dump:", template_vars.js_data)
template_vars.js_data = json.dumps(template_vars.js_data,
default=json_default_handler,
indent=" ")
- # Sorting the keys messes up the ordered dictionary, so don't do that
- #sort_keys=True)
-
- #logger.info("js_data after dump:", template_vars.js_data)
- #logger.info("show_trait template_vars:", pf(template_vars.__dict__))
return render_template("show_trait.html", **template_vars.__dict__)
+
@app.route("/show_trait")
def show_trait_page():
logger.info(request.url)
--
cgit v1.2.3
From 4534daa6fb07c23b90e024560ca64091fc330eed Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Mon, 19 Apr 2021 17:46:38 +0300
Subject: Move looped sql query into one statement in "get_species_groups"
It's in-efficient to have a sql query executed in a loop. As data
grows, the query becomes slower. It's better to let sql handle such
queries.
---
wqflask/utility/helper_functions.py | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 15d5b3ab..4ba92ed5 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -47,19 +47,18 @@ def get_trait_db_obs(self, trait_db_list):
if trait_ob:
self.trait_list.append((trait_ob, dataset_ob))
-def get_species_groups():
-
- species_query = "SELECT SpeciesId, MenuName FROM Species"
- species_ids_and_names = g.db.execute(species_query).fetchall()
-
- species_and_groups = []
- for species_id, species_name in species_ids_and_names:
- this_species_groups = {}
- this_species_groups['species'] = species_name
- groups_query = "SELECT InbredSetName FROM InbredSet WHERE SpeciesId = %s" % (species_id)
- groups = [group[0] for group in g.db.execute(groups_query).fetchall()]
- this_species_groups['groups'] = groups
- species_and_groups.append(this_species_groups)
-
- return species_and_groups
+def get_species_groups():
+ """Group each species into a group"""
+ _menu = {}
+ for species, group_name in g.db.execute(
+ "SELECT s.MenuName, i.InbredSetName FROM InbredSet i "
+ "INNER JOIN Species s ON s.SpeciesId = i.SpeciesId "
+ "ORDER BY i.SpeciesId ASC, i.Name ASC").fetchall():
+ if _menu.get(species):
+ _menu = _menu[species].append(group_name)
+ else:
+ _menu[species] = [group_name]
+ return [{"species": key,
+ "groups": value} for key, value in
+ list(_menu.items())]
--
cgit v1.2.3
From d2e2046a3ce1af0ca97ea1b6d9ccb3a4c9aecf7c Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Fri, 23 Apr 2021 17:21:12 +0300
Subject: Add full link to genetic data collected as part of WebQTL project
---
wqflask/wqflask/templates/submit_trait.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/templates/submit_trait.html b/wqflask/wqflask/templates/submit_trait.html
index 334a608d..2cc18240 100644
--- a/wqflask/wqflask/templates/submit_trait.html
+++ b/wqflask/wqflask/templates/submit_trait.html
@@ -14,7 +14,7 @@
Introduction
The trait values that you enter are statistically compared with verified genotypes collected at a set of microsatellite markers in each RI set. The markers are drawn from a set of over 750, but for each set redundant markers have been removed, preferentially retaining those that are most informative.
-
These error-checked RI mapping data match theoretical expectations for RI strain sets. The cumulative adjusted length of the RI maps are approximately 1400 cM, a value that matches those of both MIT maps and Chromosome Committee Report maps. See our full description of the genetic data collected as part of the WebQTL project.
+
These error-checked RI mapping data match theoretical expectations for RI strain sets. The cumulative adjusted length of the RI maps are approximately 1400 cM, a value that matches those of both MIT maps and Chromosome Committee Report maps. See our full description of the genetic data collected as part of the WebQTL project.
--
cgit v1.2.3
From 2114ad9e84ad7778e048b52cf865b5f031ceab88 Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Fri, 23 Apr 2021 17:27:31 +0300
Subject: Rename index_page_orig to index_page
---
wqflask/wqflask/templates/index_page.html | 222 +++++++++-------
wqflask/wqflask/templates/index_page_orig.html | 339 -------------------------
wqflask/wqflask/views.py | 2 +-
3 files changed, 136 insertions(+), 427 deletions(-)
mode change 100644 => 100755 wqflask/wqflask/templates/index_page.html
delete mode 100755 wqflask/wqflask/templates/index_page_orig.html
diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html
old mode 100644
new mode 100755
index 31846f87..87cf1b45
--- a/wqflask/wqflask/templates/index_page.html
+++ b/wqflask/wqflask/templates/index_page.html
@@ -1,78 +1,73 @@
{% extends "base.html" %}
{% block title %}GeneNetwork{% endblock %}
+{% block css %}
+
+{% endblock %}
{% block content %}
-
-
-
-