From be9c4a39500d7978b4cae7536a5f96c3818d211e Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 24 Mar 2021 09:41:47 +0300
Subject: initial commit for gn3-correlation api integration
---
.../wqflask/correlation/test_correlation_gn3.py | 14 ++++
wqflask/wqflask/correlation/correlation_gn3_api.py | 77 ++++++++++++++++++++++
2 files changed, 91 insertions(+)
create mode 100644 wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py
create mode 100644 wqflask/wqflask/correlation/correlation_gn3_api.py
diff --git a/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py
new file mode 100644
index 00000000..e1bd6d86
--- /dev/null
+++ b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py
@@ -0,0 +1,14 @@
+"""this module contains tests for code used in integrating to gn3 api"""
+from unittest import TestCase
+from base.data_set import create_dataset
+
+class TestCorrelation(TestCase):
+
+ def test_create_dataset(self):
+ """test for creating datasets"""
+
+ pass
+ def test_fetch_dataset_info(self):
+ """test for fetching dataset info data"""
+
+ pass
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
new file mode 100644
index 00000000..4cf6533c
--- /dev/null
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -0,0 +1,77 @@
+"""module that calls the gn3 api's to do the correlation """
+from base import data_set
+from base.trait import create_trait
+from base.trait import retrieve_sample_data
+
+
+
+
+
+
+
+def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"):
+ import requests
+ from wqflask.correlation.correlation_gn3_api import compute_correlation
+
+ cor_results = compute_correlation(start_vars)
+
+ data = {
+ "target_dataset": target_dataset,
+ "target_samplelist": target_samplelist,
+ "trait_data": {
+ "trait_sample_data": trait_data,
+ "trait_id": "HC_Q"
+ }
+ }
+ requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}"
+
+ results = requests.post(requests_url, json=data)
+
+ data = results.json()
+
+ print(data)
+
+ return data
+
+
+def process_samples(start_vars,sample_names,excluded_samples=None):
+ sample_data = {}
+ if not excluded_samples:
+ excluded_samples = ()
+
+ sample_vals_dict = json.loads(start_vars["sample_vals"])
+
+ for sample in sample_names:
+ if sample not in excluded_samples:
+ val = sample_val_dict[sample]
+ if not val.strip().lower() == "x":
+ sample_data[str(sample)]=float(value)
+
+ return sample_data
+
+
+def create_fetch_dataset_data(dataset_name):
+ this_dataset = data_set.create_dataset(dataset_name=dataset_name)
+
+ this_dataset.get_trait_data()
+
+
+def create_target_this_trait(start_vars):
+ """this function prefetch required data for correlation"""
+
+ this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
+ target_dataset = data_set.create_dataset(
+ dataset_name=start_vars['corr_dataset'])
+
+ this_trait = create_trait(dataset=this_dataset,
+ name=start_vars['trait_id'])
+
+ this_trait = retrieve_sample_data(this_trait, this_dataset)
+
+ target_dataset.get_trait_data()
+
+ return (this_dataset,this_trait,target_dataset)
+def compute_correlation(start_vars):
+
+ this_dataset, this_trait, target_dataset = create_target_this_trait(
+ start_vars=start_vars)
--
cgit v1.2.3
From d913848572dd284ae7656e72dad199e99907871a Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 24 Mar 2021 12:59:49 +0300
Subject: initial commit for integrating to gn3 api
---
wqflask/wqflask/correlation/show_corr_results.py | 428 +++++++++++++----------
1 file changed, 242 insertions(+), 186 deletions(-)
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index fb4dc4f4..a817a4a4 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -1,4 +1,4 @@
-## Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
@@ -58,6 +58,31 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK]
TISSUE_MOUSE_DB = 1
+def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"):
+ import requests
+ from wqflask.correlation.correlation_gn3_api import compute_correlation
+
+ # cor_results = compute_correlation(start_vars)
+
+ data = {
+ "target_dataset": target_dataset,
+ "target_samplelist": target_samplelist,
+ "trait_data": {
+ "trait_sample_data": trait_data,
+ "trait_id": "HC_Q"
+ }
+ }
+ requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}"
+
+ results = requests.post(requests_url, json=data)
+
+ data = results.json()
+
+ print(data)
+
+ return data
+
+
class CorrelationResults(object):
def __init__(self, start_vars):
# get trait list from db (database name)
@@ -78,11 +103,12 @@ class CorrelationResults(object):
with Bench("Doing correlations"):
if start_vars['dataset'] == "Temp":
- self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
+ self.dataset = data_set.create_dataset(
+ dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group'])
self.trait_id = start_vars['trait_id']
self.this_trait = create_trait(dataset=self.dataset,
- name=self.trait_id,
- cellid=None)
+ name=self.trait_id,
+ cellid=None)
else:
helper_functions.get_species_dataset_trait(self, start_vars)
@@ -97,7 +123,7 @@ class CorrelationResults(object):
if ('loc_chr' in start_vars and
'min_loc_mb' in start_vars and
- 'max_loc_mb' in start_vars):
+ 'max_loc_mb' in start_vars):
self.location_type = get_string(start_vars, 'location_type')
self.location_chr = get_string(start_vars, 'loc_chr')
@@ -109,8 +135,8 @@ class CorrelationResults(object):
self.get_formatted_corr_type()
self.return_number = int(start_vars['corr_return_results'])
- #The two if statements below append samples to the sample list based upon whether the user
- #rselected Primary Samples Only, Other Samples Only, or All Samples
+ # The two if statements below append samples to the sample list based upon whether the user
+ # rselected Primary Samples Only, Other Samples Only, or All Samples
primary_samples = self.dataset.group.samplelist
if self.dataset.group.parlist != None:
@@ -118,23 +144,26 @@ class CorrelationResults(object):
if self.dataset.group.f1list != None:
primary_samples += self.dataset.group.f1list
- #If either BXD/whatever Only or All Samples, append all of that group's samplelist
+ # If either BXD/whatever Only or All Samples, append all of that group's samplelist
if corr_samples_group != 'samples_other':
self.process_samples(start_vars, primary_samples)
- #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
- #exclude the primary samples (because they would have been added in the previous
- #if statement if the user selected All Samples)
+ # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
+ # exclude the primary samples (because they would have been added in the previous
+ # if statement if the user selected All Samples)
if corr_samples_group != 'samples_primary':
if corr_samples_group == 'samples_other':
primary_samples = [x for x in primary_samples if x not in (
- self.dataset.group.parlist + self.dataset.group.f1list)]
- self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples)
+ self.dataset.group.parlist + self.dataset.group.f1list)]
+ self.process_samples(start_vars, list(
+ self.this_trait.data.keys()), primary_samples)
- self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
+ self.target_dataset = data_set.create_dataset(
+ start_vars['corr_dataset'])
self.target_dataset.get_trait_data(list(self.sample_data.keys()))
- self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method)
+ self.header_fields = get_header_fields(
+ self.target_dataset.type, self.corr_method)
if self.target_dataset.type == "ProbeSet":
self.filter_cols = [7, 6]
@@ -153,7 +182,8 @@ class CorrelationResults(object):
tissue_corr_data = self.do_tissue_correlation_for_all_traits()
if tissue_corr_data != None:
for trait in list(tissue_corr_data.keys())[:self.return_number]:
- self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
+ self.get_sample_r_and_p_values(
+ trait, self.target_dataset.trait_data[trait])
else:
for trait, values in list(self.target_dataset.trait_data.items()):
self.get_sample_r_and_p_values(trait, values)
@@ -163,80 +193,85 @@ class CorrelationResults(object):
lit_corr_data = self.do_lit_correlation_for_all_traits()
for trait in list(lit_corr_data.keys())[:self.return_number]:
- self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
+ self.get_sample_r_and_p_values(
+ trait, self.target_dataset.trait_data[trait])
elif self.corr_type == "sample":
- for trait, values in list(self.target_dataset.trait_data.items()):
- self.get_sample_r_and_p_values(trait, values)
-
- self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
- key=lambda t: -abs(t[1][0])))
-
-
- #ZS: Convert min/max chromosome to an int for the location range option
- range_chr_as_int = None
- for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
- if 'loc_chr' in start_vars:
- if chr_info.name == self.location_chr:
- range_chr_as_int = order_id
-
- for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
- trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
- if not trait_object:
- continue
-
- chr_as_int = 0
- for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
- if self.location_type == "highest_lod":
- if chr_info.name == trait_object.locus_chr:
- chr_as_int = order_id
- else:
- if chr_info.name == trait_object.chr:
- chr_as_int = order_id
-
- if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
- float(self.correlation_data[trait][0]) <= self.p_range_upper):
-
- if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
- if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
- continue
-
- if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
- continue
- if self.location_type == "highest_lod":
- if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
- continue
- if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
- continue
- else:
- if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
- continue
- if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
- continue
-
- (trait_object.sample_r,
- trait_object.sample_p,
- trait_object.num_overlap) = self.correlation_data[trait]
-
- # Set some sane defaults
- trait_object.tissue_corr = 0
- trait_object.tissue_pvalue = 0
- trait_object.lit_corr = 0
- if self.corr_type == "tissue" and tissue_corr_data != None:
- trait_object.tissue_corr = tissue_corr_data[trait][1]
- trait_object.tissue_pvalue = tissue_corr_data[trait][2]
- elif self.corr_type == "lit":
- trait_object.lit_corr = lit_corr_data[trait][1]
-
- self.correlation_results.append(trait_object)
-
- if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
- self.do_lit_correlation_for_trait_list()
-
- if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
- self.do_tissue_correlation_for_trait_list()
-
- self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
+
+ compute_sample_r(start_vars,
+ self.target_dataset.trait_data, self.sample_data, self.target_dataset.samplelist)
+ # for trait, values in list(self.target_dataset.trait_data.items()):
+ # self.get_sample_r_and_p_values(trait, values)
+
+ # self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
+ # key=lambda t: -abs(t[1][0])))
+
+ # # ZS: Convert min/max chromosome to an int for the location range option
+ # range_chr_as_int = None
+ # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
+ # if 'loc_chr' in start_vars:
+ # if chr_info.name == self.location_chr:
+ # range_chr_as_int = order_id
+
+ # for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
+ # trait_object = create_trait(
+ # dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
+ # if not trait_object:
+ # continue
+
+ # chr_as_int = 0
+ # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
+ # if self.location_type == "highest_lod":
+ # if chr_info.name == trait_object.locus_chr:
+ # chr_as_int = order_id
+ # else:
+ # if chr_info.name == trait_object.chr:
+ # chr_as_int = order_id
+
+ # if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
+ # float(self.correlation_data[trait][0]) <= self.p_range_upper):
+
+ # if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
+ # if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
+ # continue
+
+ # if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
+ # continue
+ # if self.location_type == "highest_lod":
+ # if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
+ # continue
+ # if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
+ # continue
+ # else:
+ # if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
+ # continue
+ # if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
+ # continue
+
+ # (trait_object.sample_r,
+ # trait_object.sample_p,
+ # trait_object.num_overlap) = self.correlation_data[trait]
+
+ # # Set some sane defaults
+ # trait_object.tissue_corr = 0
+ # trait_object.tissue_pvalue = 0
+ # trait_object.lit_corr = 0
+ # if self.corr_type == "tissue" and tissue_corr_data != None:
+ # trait_object.tissue_corr = tissue_corr_data[trait][1]
+ # trait_object.tissue_pvalue = tissue_corr_data[trait][2]
+ # elif self.corr_type == "lit":
+ # trait_object.lit_corr = lit_corr_data[trait][1]
+
+ # self.correlation_results.append(trait_object)
+
+ # if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
+ # self.do_lit_correlation_for_trait_list()
+
+ # if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
+ # self.do_tissue_correlation_for_trait_list()
+
+ # self.json_results = generate_corr_json(
+ # self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
############################################################################################################################################
@@ -259,39 +294,43 @@ class CorrelationResults(object):
def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1):
"""Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each"""
- #Gets tissue expression values for the primary trait
+ # Gets tissue expression values for the primary trait
primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list = [self.this_trait.symbol])
+ symbol_list=[self.this_trait.symbol])
if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
- gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol]
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower(
+ )]
+ gene_symbol_list = [
+ trait.symbol for trait in self.correlation_results if trait.symbol]
- corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=gene_symbol_list)
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=gene_symbol_list)
for trait in self.correlation_results:
if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()]
+ this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower(
+ )]
result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
- this_trait_tissue_values,
- self.corr_method)
+ this_trait_tissue_values,
+ self.corr_method)
trait.tissue_corr = result[0]
trait.tissue_pvalue = result[2]
def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1):
- #Gets tissue expression values for the primary trait
+ # Gets tissue expression values for the primary trait
primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list = [self.this_trait.symbol])
+ symbol_list=[self.this_trait.symbol])
if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()]
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower(
+ )]
#print("trait_gene_symbols: ", pf(trait_gene_symbols.values()))
- corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=list(self.trait_symbol_dict.values()))
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=list(self.trait_symbol_dict.values()))
#print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict))
@@ -300,27 +339,30 @@ class CorrelationResults(object):
tissue_corr_data = {}
for trait, symbol in list(self.trait_symbol_dict.items()):
if symbol and symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()]
+ this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
+ )]
result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values,
- this_trait_tissue_values,
- self.corr_method)
+ this_trait_tissue_values,
+ self.corr_method)
tissue_corr_data[trait] = [symbol, result[0], result[2]]
tissue_corr_data = collections.OrderedDict(sorted(list(tissue_corr_data.items()),
- key=lambda t: -abs(t[1][1])))
+ key=lambda t: -abs(t[1][1])))
return tissue_corr_data
def do_lit_correlation_for_trait_list(self):
- input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)
+ input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(
+ self.dataset.group.species.lower(), self.this_trait.geneid)
for trait in self.correlation_results:
if trait.geneid:
- trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid)
+ trait.mouse_gene_id = self.convert_to_mouse_gene_id(
+ self.dataset.group.species.lower(), trait.geneid)
else:
trait.mouse_gene_id = None
@@ -348,13 +390,14 @@ class CorrelationResults(object):
else:
trait.lit_corr = 0
-
def do_lit_correlation_for_all_traits(self):
- input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid)
+ input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(
+ self.dataset.group.species.lower(), self.this_trait.geneid)
lit_corr_data = {}
for trait, gene_id in list(self.trait_geneid_dict.items()):
- mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id)
+ mouse_gene_id = self.convert_to_mouse_gene_id(
+ self.dataset.group.species.lower(), gene_id)
if mouse_gene_id and str(mouse_gene_id).find(";") == -1:
#print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id)
@@ -382,7 +425,7 @@ class CorrelationResults(object):
lit_corr_data[trait] = [gene_id, 0]
lit_corr_data = collections.OrderedDict(sorted(list(lit_corr_data.items()),
- key=lambda t: -abs(t[1][1])))
+ key=lambda t: -abs(t[1][1])))
return lit_corr_data
@@ -422,6 +465,7 @@ class CorrelationResults(object):
return mouse_gene_id
+
def get_sample_r_and_p_values(self, trait, target_samples):
"""Calculates the sample r (or rho) and p-value
@@ -431,6 +475,9 @@ class CorrelationResults(object):
"""
+ print("below here>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
+ print(self.target_dataset.trait_data)
+
self.this_trait_vals = []
target_vals = []
for index, sample in enumerate(self.target_dataset.samplelist):
@@ -440,21 +487,26 @@ class CorrelationResults(object):
self.this_trait_vals.append(sample_value)
target_vals.append(target_sample_value)
- self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals)
+ self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(
+ self.this_trait_vals, target_vals)
if num_overlap > 5:
- #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
+ # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/
if self.corr_method == 'bicor':
- sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals)
+ sample_r, sample_p = do_bicor(
+ self.this_trait_vals, target_vals)
elif self.corr_method == 'pearson':
- sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals)
+ sample_r, sample_p = scipy.stats.pearsonr(
+ self.this_trait_vals, target_vals)
else:
- sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals)
+ sample_r, sample_p = scipy.stats.spearmanr(
+ self.this_trait_vals, target_vals)
if numpy.isnan(sample_r):
pass
else:
- self.correlation_data[trait] = [sample_r, sample_p, num_overlap]
+ self.correlation_data[trait] = [
+ sample_r, sample_p, num_overlap]
def process_samples(self, start_vars, sample_names, excluded_samples=None):
if not excluded_samples:
@@ -475,16 +527,18 @@ def do_bicor(this_trait_vals, target_trait_vals):
r_library("WGCNA")
r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function
- r_options(stringsAsFactors = False)
+ r_options(stringsAsFactors=False)
this_vals = ro.Vector(this_trait_vals)
target_vals = ro.Vector(target_trait_vals)
- the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x = this_vals, y = target_vals)]
+ the_r, the_p, _fisher_transform, _the_t, _n_obs = [
+ numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)]
return the_r, the_p
-def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api = False):
+
+def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False):
results_list = []
for i, trait in enumerate(corr_results):
if trait.view == False:
@@ -493,7 +547,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap
results_dict['index'] = i + 1
results_dict['trait_id'] = trait.name
results_dict['dataset'] = trait.dataset.name
- results_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait.name, trait.dataset.name))
+ results_dict['hmac'] = hmac.data_hmac(
+ '{}:{}'.format(trait.name, trait.dataset.name))
if target_dataset.type == "ProbeSet":
results_dict['symbol'] = trait.symbol
results_dict['description'] = "N/A"
@@ -544,7 +599,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap
if bool(trait.authors):
authors_list = trait.authors.split(',')
if len(authors_list) > 6:
- results_dict['authors_display'] = ", ".join(authors_list[:6]) + ", et al."
+ results_dict['authors_display'] = ", ".join(
+ authors_list[:6]) + ", et al."
else:
results_dict['authors_display'] = trait.authors
if bool(trait.pubmed_id):
@@ -574,85 +630,85 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap
return json.dumps(results_list)
+
def get_header_fields(data_type, corr_method):
if data_type == "ProbeSet":
if corr_method == "spearman":
header_fields = ['Index',
- 'Record',
- 'Symbol',
- 'Description',
- 'Location',
- 'Mean',
- 'Sample rho',
- 'N',
- 'Sample p(rho)',
- 'Lit rho',
- 'Tissue rho',
- 'Tissue p(rho)',
- 'Max LRS',
- 'Max LRS Location',
- 'Additive Effect']
+ 'Record',
+ 'Symbol',
+ 'Description',
+ 'Location',
+ 'Mean',
+ 'Sample rho',
+ 'N',
+ 'Sample p(rho)',
+ 'Lit rho',
+ 'Tissue rho',
+ 'Tissue p(rho)',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
else:
header_fields = ['Index',
- 'Record',
- 'Symbol',
- 'Description',
- 'Location',
- 'Mean',
- 'Sample r',
- 'N',
- 'Sample p(r)',
- 'Lit r',
- 'Tissue r',
- 'Tissue p(r)',
- 'Max LRS',
- 'Max LRS Location',
- 'Additive Effect']
+ 'Record',
+ 'Symbol',
+ 'Description',
+ 'Location',
+ 'Mean',
+ 'Sample r',
+ 'N',
+ 'Sample p(r)',
+ 'Lit r',
+ 'Tissue r',
+ 'Tissue p(r)',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
elif data_type == "Publish":
if corr_method == "spearman":
header_fields = ['Index',
- 'Record',
- 'Abbreviation',
- 'Description',
- 'Mean',
- 'Authors',
- 'Year',
- 'Sample rho',
- 'N',
- 'Sample p(rho)',
- 'Max LRS',
- 'Max LRS Location',
- 'Additive Effect']
+ 'Record',
+ 'Abbreviation',
+ 'Description',
+ 'Mean',
+ 'Authors',
+ 'Year',
+ 'Sample rho',
+ 'N',
+ 'Sample p(rho)',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
else:
header_fields = ['Index',
- 'Record',
- 'Abbreviation',
- 'Description',
- 'Mean',
- 'Authors',
- 'Year',
- 'Sample r',
- 'N',
- 'Sample p(r)',
- 'Max LRS',
- 'Max LRS Location',
- 'Additive Effect']
+ 'Record',
+ 'Abbreviation',
+ 'Description',
+ 'Mean',
+ 'Authors',
+ 'Year',
+ 'Sample r',
+ 'N',
+ 'Sample p(r)',
+ 'Max LRS',
+ 'Max LRS Location',
+ 'Additive Effect']
else:
if corr_method == "spearman":
header_fields = ['Index',
- 'ID',
- 'Location',
- 'Sample rho',
- 'N',
- 'Sample p(rho)']
+ 'ID',
+ 'Location',
+ 'Sample rho',
+ 'N',
+ 'Sample p(rho)']
else:
header_fields = ['Index',
- 'ID',
- 'Location',
- 'Sample r',
- 'N',
- 'Sample p(r)']
+ 'ID',
+ 'Location',
+ 'Sample r',
+ 'N',
+ 'Sample p(r)']
return header_fields
-
--
cgit v1.2.3
From e5d2ce8f29e43900977b967ec8cac715f544a2f0 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 02:25:45 +0300
Subject: add code for calling gn3 correlation endpoint
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 131 ++++++++++++----
wqflask/wqflask/correlation/show_corr_results.py | 174 +++++++++------------
wqflask/wqflask/views.py | 5 +-
3 files changed, 177 insertions(+), 133 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 4cf6533c..7e269e41 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,20 +1,17 @@
"""module that calls the gn3 api's to do the correlation """
-from base import data_set
-from base.trait import create_trait
-from base.trait import retrieve_sample_data
+import json
+import requests
+from wqflask.wqflask.correlation import correlation_functions
+from wqflask.base import data_set
+from wqflask.base.trait import create_trait
+from wqflask.base.trait import retrieve_sample_data
+GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation"
-
-
-
-def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"):
- import requests
- from wqflask.correlation.correlation_gn3_api import compute_correlation
-
- cor_results = compute_correlation(start_vars)
-
+def compute_sample(target_dataset, trait_data, target_samplelist, method="pearson"):
+ """integration for integrating sample_r api correlation"""
data = {
"target_dataset": target_dataset,
"target_samplelist": target_samplelist,
@@ -29,33 +26,60 @@ def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, m
data = results.json()
- print(data)
-
return data
-def process_samples(start_vars,sample_names,excluded_samples=None):
+def get_tissue_correlation_input(this_trait, trait_symbol_dict):
+ """Gets tissue expression values for the primary trait and target tissues values"""
+ primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=[this_trait.symbol])
+
+ if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
+ )]
+
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=list(trait_symbol_dict.values()))
+
+ target_tissue_data = []
+ for trait, symbol in list(trait_symbol_dict.items()):
+ if symbol and symbol.lower() in corr_result_tissue_vals_dict:
+ this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
+ )]
+
+ this_trait_data = {"trait_id": trait,
+ "tissue_values": this_trait_tissue_values}
+
+ target_tissue_data.append(this_trait_data)
+
+ primary_tissue_data = {
+ "this_id": "TT",
+ "tissue_values": primary_trait_tissue_values
+
+ }
+
+ return (primary_tissue_data, target_tissue_data)
+
+ return None
+
+
+def process_samples(start_vars, sample_names, excluded_samples=None):
+ """process samples method"""
sample_data = {}
if not excluded_samples:
excluded_samples = ()
- sample_vals_dict = json.loads(start_vars["sample_vals"])
+ sample_vals_dict = json.loads(start_vars["sample_vals"])
for sample in sample_names:
if sample not in excluded_samples:
- val = sample_val_dict[sample]
+ val = sample_vals_dict[sample]
if not val.strip().lower() == "x":
- sample_data[str(sample)]=float(value)
+ sample_data[str(sample)] = float(val)
return sample_data
-def create_fetch_dataset_data(dataset_name):
- this_dataset = data_set.create_dataset(dataset_name=dataset_name)
-
- this_dataset.get_trait_data()
-
-
def create_target_this_trait(start_vars):
"""this function prefetch required data for correlation"""
@@ -66,12 +90,61 @@ def create_target_this_trait(start_vars):
this_trait = create_trait(dataset=this_dataset,
name=start_vars['trait_id'])
+ sample_data = process_samples(start_vars, this_dataset.group.samplelist)
+ # target_dataset.get_trait_data(list(self.sample_data.keys()))
+
this_trait = retrieve_sample_data(this_trait, this_dataset)
- target_dataset.get_trait_data()
+ target_dataset.get_trait_data(list(sample_data.keys()))
+
+ return (this_dataset, this_trait, target_dataset, sample_data)
+
+
+def compute_correlation(start_vars, method="pearson"):
+ """compute correlation for to call gn3 api"""
+
+ corr_type = start_vars['corr_type']
+
+ (this_dataset, this_trait, target_dataset,
+ sample_data) = create_target_this_trait(start_vars)
+
+ # cor_results = compute_correlation(start_vars)
+
+ method = start_vars['corr_sample_method']
+
+ corr_input_data = {}
- return (this_dataset,this_trait,target_dataset)
-def compute_correlation(start_vars):
+ if corr_type == "sample":
+ corr_input_data = {
+ "target_dataset": target_dataset.trait_data,
+ "target_samplelist": target_dataset.samplelist,
+ "trait_data": {
+ "trait_sample_data": sample_data,
+ "trait_id": start_vars["trait_id"]
+ }
+ }
+
+ requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
+
+ elif corr_type == "tissue":
+ trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
+ primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
+ this_trait, trait_symbol_dict)
+
+ corr_input_data = {
+ "primary_tissue": primary_tissue_data,
+ "target_tissues": target_tissue_data
+ }
- this_dataset, this_trait, target_dataset = create_target_this_trait(
- start_vars=start_vars)
+ requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+
+ else:
+ pass
+ # lit correlation/literature
+ # can fetch values in gn3 not set up in gn3
+
+ corr_results = requests.post(requests_url, json=corr_input_data)
+
+ data = corr_results.json()
+
+ return data
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index a817a4a4..50b3ba26 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -57,32 +57,6 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK]
TISSUE_MOUSE_DB = 1
-
-def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"):
- import requests
- from wqflask.correlation.correlation_gn3_api import compute_correlation
-
- # cor_results = compute_correlation(start_vars)
-
- data = {
- "target_dataset": target_dataset,
- "target_samplelist": target_samplelist,
- "trait_data": {
- "trait_sample_data": trait_data,
- "trait_id": "HC_Q"
- }
- }
- requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}"
-
- results = requests.post(requests_url, json=data)
-
- data = results.json()
-
- print(data)
-
- return data
-
-
class CorrelationResults(object):
def __init__(self, start_vars):
# get trait list from db (database name)
@@ -197,81 +171,78 @@ class CorrelationResults(object):
trait, self.target_dataset.trait_data[trait])
elif self.corr_type == "sample":
-
- compute_sample_r(start_vars,
- self.target_dataset.trait_data, self.sample_data, self.target_dataset.samplelist)
- # for trait, values in list(self.target_dataset.trait_data.items()):
- # self.get_sample_r_and_p_values(trait, values)
-
- # self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
- # key=lambda t: -abs(t[1][0])))
-
- # # ZS: Convert min/max chromosome to an int for the location range option
- # range_chr_as_int = None
- # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
- # if 'loc_chr' in start_vars:
- # if chr_info.name == self.location_chr:
- # range_chr_as_int = order_id
-
- # for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
- # trait_object = create_trait(
- # dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
- # if not trait_object:
- # continue
-
- # chr_as_int = 0
- # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
- # if self.location_type == "highest_lod":
- # if chr_info.name == trait_object.locus_chr:
- # chr_as_int = order_id
- # else:
- # if chr_info.name == trait_object.chr:
- # chr_as_int = order_id
-
- # if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
- # float(self.correlation_data[trait][0]) <= self.p_range_upper):
-
- # if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
- # if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
- # continue
-
- # if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
- # continue
- # if self.location_type == "highest_lod":
- # if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
- # continue
- # if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
- # continue
- # else:
- # if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
- # continue
- # if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
- # continue
-
- # (trait_object.sample_r,
- # trait_object.sample_p,
- # trait_object.num_overlap) = self.correlation_data[trait]
-
- # # Set some sane defaults
- # trait_object.tissue_corr = 0
- # trait_object.tissue_pvalue = 0
- # trait_object.lit_corr = 0
- # if self.corr_type == "tissue" and tissue_corr_data != None:
- # trait_object.tissue_corr = tissue_corr_data[trait][1]
- # trait_object.tissue_pvalue = tissue_corr_data[trait][2]
- # elif self.corr_type == "lit":
- # trait_object.lit_corr = lit_corr_data[trait][1]
-
- # self.correlation_results.append(trait_object)
-
- # if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
- # self.do_lit_correlation_for_trait_list()
-
- # if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
- # self.do_tissue_correlation_for_trait_list()
-
- # self.json_results = generate_corr_json(
- # self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
+ for trait, values in list(self.target_dataset.trait_data.items()):
+ self.get_sample_r_and_p_values(trait, values)
+
+ self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()),
+ key=lambda t: -abs(t[1][0])))
+
+ # ZS: Convert min/max chromosome to an int for the location range option
+ range_chr_as_int = None
+ for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
+ if 'loc_chr' in start_vars:
+ if chr_info.name == self.location_chr:
+ range_chr_as_int = order_id
+
+ for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]):
+ trait_object = create_trait(
+ dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)
+ if not trait_object:
+ continue
+
+ chr_as_int = 0
+ for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()):
+ if self.location_type == "highest_lod":
+ if chr_info.name == trait_object.locus_chr:
+ chr_as_int = order_id
+ else:
+ if chr_info.name == trait_object.chr:
+ chr_as_int = order_id
+
+ if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
+ float(self.correlation_data[trait][0]) <= self.p_range_upper):
+
+ if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean):
+ if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
+ continue
+
+ if range_chr_as_int != None and (chr_as_int != range_chr_as_int):
+ continue
+ if self.location_type == "highest_lod":
+ if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)):
+ continue
+ if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)):
+ continue
+ else:
+ if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
+ continue
+ if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
+ continue
+
+ (trait_object.sample_r,
+ trait_object.sample_p,
+ trait_object.num_overlap) = self.correlation_data[trait]
+
+ # Set some sane defaults
+ trait_object.tissue_corr = 0
+ trait_object.tissue_pvalue = 0
+ trait_object.lit_corr = 0
+ if self.corr_type == "tissue" and tissue_corr_data != None:
+ trait_object.tissue_corr = tissue_corr_data[trait][1]
+ trait_object.tissue_pvalue = tissue_corr_data[trait][2]
+ elif self.corr_type == "lit":
+ trait_object.lit_corr = lit_corr_data[trait][1]
+
+ self.correlation_results.append(trait_object)
+
+ if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
+ self.do_lit_correlation_for_trait_list()
+
+ if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
+ self.do_tissue_correlation_for_trait_list()
+
+ self.json_results = generate_corr_json(
+ self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
############################################################################################################################################
@@ -465,7 +436,6 @@ class CorrelationResults(object):
return mouse_gene_id
-
def get_sample_r_and_p_values(self, trait, target_samples):
"""Calculates the sample r (or rho) and p-value
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 2c0ba586..6ca9b23f 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -46,6 +46,7 @@ from wqflask.marker_regression import run_mapping
from wqflask.marker_regression import display_mapping_results
from wqflask.network_graph import network_graph
from wqflask.correlation import show_corr_results
+from wqflask.correlation.correlation_gn3_api import compute_correlation
from wqflask.correlation_matrix import show_corr_matrix
from wqflask.correlation import corr_scatter_plot
from wqflask.wgcna import wgcna_analysis
@@ -880,8 +881,8 @@ def network_graph_page():
def corr_compute_page():
logger.info("In corr_compute, request.form is:", pf(request.form))
logger.info(request.url)
- template_vars = show_corr_results.CorrelationResults(request.form)
- return render_template("correlation_page.html", **template_vars.__dict__)
+ correlation_results = compute_correlation(request.form)
+ return render_template("demo_correlation_page.html",correlation_results=correlation_results)
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
--
cgit v1.2.3
From 7a1e84cafdf02a1bcef4ddeb653d072b80a8deba Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 02:27:38 +0300
Subject: add initial demo template page
---
.../wqflask/templates/demo_correlation_page.html | 36 ++++++++++++++++++++++
1 file changed, 36 insertions(+)
create mode 100644 wqflask/wqflask/templates/demo_correlation_page.html
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
new file mode 100644
index 00000000..4d310051
--- /dev/null
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -0,0 +1,36 @@
+{% extends "base.html" %}
+{% block title %}Demo Correlation Results{% endblock %}
+{% block css %}
+
+
+
+
+
+
+{% endblock %}
+{% block content %}
+
+ {{correlation_results}}
+
+
+
+{% endblock %}
+
+{% block js %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+{% endblock %}
--
cgit v1.2.3
From cf42f769ec4db2efaebca64c63454935cc28b2a3 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 03:55:16 +0300
Subject: modify gn3 integration code
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 89 +++++++++-------------
.../wqflask/templates/demo_correlation_page.html | 22 +-----
2 files changed, 38 insertions(+), 73 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 7e269e41..7e865bf3 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -10,59 +10,6 @@ from wqflask.base.trait import retrieve_sample_data
GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation"
-def compute_sample(target_dataset, trait_data, target_samplelist, method="pearson"):
- """integration for integrating sample_r api correlation"""
- data = {
- "target_dataset": target_dataset,
- "target_samplelist": target_samplelist,
- "trait_data": {
- "trait_sample_data": trait_data,
- "trait_id": "HC_Q"
- }
- }
- requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}"
-
- results = requests.post(requests_url, json=data)
-
- data = results.json()
-
- return data
-
-
-def get_tissue_correlation_input(this_trait, trait_symbol_dict):
- """Gets tissue expression values for the primary trait and target tissues values"""
- primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=[this_trait.symbol])
-
- if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
- )]
-
- corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=list(trait_symbol_dict.values()))
-
- target_tissue_data = []
- for trait, symbol in list(trait_symbol_dict.items()):
- if symbol and symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
- )]
-
- this_trait_data = {"trait_id": trait,
- "tissue_values": this_trait_tissue_values}
-
- target_tissue_data.append(this_trait_data)
-
- primary_tissue_data = {
- "this_id": "TT",
- "tissue_values": primary_trait_tissue_values
-
- }
-
- return (primary_tissue_data, target_tissue_data)
-
- return None
-
-
def process_samples(start_vars, sample_names, excluded_samples=None):
"""process samples method"""
sample_data = {}
@@ -81,7 +28,7 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
- """this function prefetch required data for correlation"""
+ """this function creates the required trait and target dataset for correlation"""
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
@@ -148,3 +95,37 @@ def compute_correlation(start_vars, method="pearson"):
data = corr_results.json()
return data
+
+
+def get_tissue_correlation_input(this_trait, trait_symbol_dict):
+ """Gets tissue expression values for the primary trait and target tissues values"""
+ primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=[this_trait.symbol])
+
+ if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
+ )]
+
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=list(trait_symbol_dict.values()))
+
+ target_tissue_data = []
+ for trait, symbol in list(trait_symbol_dict.items()):
+ if symbol and symbol.lower() in corr_result_tissue_vals_dict:
+ this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
+ )]
+
+ this_trait_data = {"trait_id": trait,
+ "tissue_values": this_trait_tissue_values}
+
+ target_tissue_data.append(this_trait_data)
+
+ primary_tissue_data = {
+ "this_id": "TT",
+ "tissue_values": primary_trait_tissue_values
+
+ }
+
+ return (primary_tissue_data, target_tissue_data)
+
+ return None
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index 4d310051..ddcdf38d 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -10,27 +10,11 @@
{% endblock %}
{% block content %}
- {{correlation_results}}
+
-
-
-{% endblock %}
-{% block js %}
-
-
-
-
-
-
-
-
-
-
-
-
-
+
{% endblock %}
--
cgit v1.2.3
From 7de35627a6dc3fa48a039c932be005ffe6c175c4 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 04:02:49 +0300
Subject: fix import error
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 7e865bf3..479bb0d8 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,11 +1,11 @@
"""module that calls the gn3 api's to do the correlation """
import json
import requests
-from wqflask.wqflask.correlation import correlation_functions
+from wqflask.correlation import correlation_functions
-from wqflask.base import data_set
-from wqflask.base.trait import create_trait
-from wqflask.base.trait import retrieve_sample_data
+from base import data_set
+from base.trait import create_trait
+from base.trait import retrieve_sample_data
GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation"
--
cgit v1.2.3
From da72efa86846179d8d2aa64cd7b06a894469dc85 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 25 Mar 2021 10:14:31 +0300
Subject: minor fix
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 479bb0d8..f1137c0e 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -88,7 +88,7 @@ def compute_correlation(start_vars, method="pearson"):
else:
pass
# lit correlation/literature
- # can fetch values in gn3 not set up in gn3
+ # to fetch values from the database
corr_results = requests.post(requests_url, json=corr_input_data)
--
cgit v1.2.3
From 08ddec9dcbaa1730d0b65b643aa5c99d1077d4d5 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sun, 28 Mar 2021 21:13:52 +0300
Subject: refactor correlation integration code
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 42 ++++++++++++----------
.../wqflask/templates/demo_correlation_page.html | 6 ++--
2 files changed, 27 insertions(+), 21 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index f1137c0e..1cd1b332 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -63,6 +63,7 @@ def compute_correlation(start_vars, method="pearson"):
if corr_type == "sample":
corr_input_data = {
+ "target": target_dataset,
"target_dataset": target_dataset.trait_data,
"target_samplelist": target_dataset.samplelist,
"trait_data": {
@@ -80,16 +81,17 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {
"primary_tissue": primary_tissue_data,
- "target_tissues": target_tissue_data
+ "target_tissues_dict": target_tissue_data
}
requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
- else:
- pass
- # lit correlation/literature
- # to fetch values from the database
+ elif corr_type == "lit":
+ (this_trait_geneid, geneid_dict, species) = do_lit_correlation(
+ this_trait, this_dataset, target_dataset)
+ requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
+ corr_input_data = geneid_dict
corr_results = requests.post(requests_url, json=corr_input_data)
data = corr_results.json()
@@ -97,6 +99,18 @@ def compute_correlation(start_vars, method="pearson"):
return data
+def do_lit_correlation(this_trait, this_dataset, target_dataset):
+ geneid_dict = this_dataset.retrieve_genes("GeneId")
+ species = this_dataset.group.species.lower()
+
+ this_trait_geneid = this_trait.geneid
+ this_trait_gene_data = {
+ this_trait.name: this_trait_geneid
+ }
+
+ return (this_trait_geneid, geneid_dict, species)
+
+
def get_tissue_correlation_input(this_trait, trait_symbol_dict):
"""Gets tissue expression values for the primary trait and target tissues values"""
primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
@@ -108,23 +122,15 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
-
- target_tissue_data = []
- for trait, symbol in list(trait_symbol_dict.items()):
- if symbol and symbol.lower() in corr_result_tissue_vals_dict:
- this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower(
- )]
-
- this_trait_data = {"trait_id": trait,
- "tissue_values": this_trait_tissue_values}
-
- target_tissue_data.append(this_trait_data)
-
primary_tissue_data = {
- "this_id": "TT",
+ "this_id": this_trait.name,
"tissue_values": primary_trait_tissue_values
}
+ target_tissue_data = {
+ "trait_symbol_dict": trait_symbol_dict,
+ "symbol_tissue_vals_dict": corr_result_tissue_vals_dict
+ }
return (primary_tissue_data, target_tissue_data)
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index ddcdf38d..a8651067 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -10,11 +10,11 @@
{% endblock %}
{% block content %}
-
-
+ {{correlation_results}}
+
{% endblock %}
--
cgit v1.2.3
From fcb93bef5ab230b948f83e0e77a1ef54b017aca1 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 8 Apr 2021 23:59:13 +0300
Subject: minor fix
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 1cd1b332..c8d5347c 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -63,7 +63,6 @@ def compute_correlation(start_vars, method="pearson"):
if corr_type == "sample":
corr_input_data = {
- "target": target_dataset,
"target_dataset": target_dataset.trait_data,
"target_samplelist": target_dataset.samplelist,
"trait_data": {
--
cgit v1.2.3
From 328b176628ed9db6c1c60590cb10f4cca212738a Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 15 Apr 2021 06:10:28 +0300
Subject: change api port
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index c8d5347c..8ee4a9b7 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -7,7 +7,7 @@ from base import data_set
from base.trait import create_trait
from base.trait import retrieve_sample_data
-GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation"
+GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
def process_samples(start_vars, sample_names, excluded_samples=None):
@@ -30,6 +30,12 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
+
+ print("creating the dataset and trait")
+ import time
+
+ initial_time = time.time()
+
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
dataset_name=start_vars['corr_dataset'])
@@ -44,6 +50,11 @@ def create_target_this_trait(start_vars):
target_dataset.get_trait_data(list(sample_data.keys()))
+
+ time_taken = time.time() - initial_time
+
+ print(f"the time taken to create dataset abnd trait is",time_taken)
+
return (this_dataset, this_trait, target_dataset, sample_data)
@@ -91,6 +102,8 @@ def compute_correlation(start_vars, method="pearson"):
requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
corr_input_data = geneid_dict
+
+ print("Sending this request")
corr_results = requests.post(requests_url, json=corr_input_data)
data = corr_results.json()
--
cgit v1.2.3
From 5a9a7a645510d1385def017adf2f956d61fa2329 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Fri, 16 Apr 2021 02:09:28 +0300
Subject: add demo template
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 3 +
.../wqflask/templates/demo_correlation_page.html | 78 ++++++++++++++++++++--
wqflask/wqflask/views.py | 2 +-
3 files changed, 77 insertions(+), 6 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 8ee4a9b7..b4480076 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -47,11 +47,14 @@ def create_target_this_trait(start_vars):
# target_dataset.get_trait_data(list(self.sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
+ print(f"Starting to creat the target dataset ")
+ dataset_start_time = time.time()
target_dataset.get_trait_data(list(sample_data.keys()))
time_taken = time.time() - initial_time
+ print(f"the time taken to create dataset is",time.time()-dataset_start_time)
print(f"the time taken to create dataset abnd trait is",time_taken)
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index a8651067..1900a0bd 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -10,11 +10,79 @@
{% endblock %}
{% block content %}
- {{correlation_results}}
-
+
CORRELATION RESULTS
+
+
+ {% for corr_result in correlation_results %}
+ {% for key,value in corr_result.items()%}
+
+
+ {%for o_key,o_value in value.items()%}
+
+ {%endfor%}
+ {% endfor %}
+
+ {% endfor %}
+
+
+
{% endblock %}
+
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 6ca9b23f..072db466 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -882,7 +882,7 @@ def corr_compute_page():
logger.info("In corr_compute, request.form is:", pf(request.form))
logger.info(request.url)
correlation_results = compute_correlation(request.form)
- return render_template("demo_correlation_page.html",correlation_results=correlation_results)
+ return render_template("demo_correlation_page.html",correlation_results=correlation_results[1:20])
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
--
cgit v1.2.3
From 33e03898ee733f18b29e54e202c217ba14921f48 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 17 Apr 2021 04:14:33 +0300
Subject: use gn3 lib
---
bin/genenetwork2 | 3 +-
wqflask/wqflask/correlation/correlation_gn3_api.py | 57 +++++++++++++++++-----
.../wqflask/templates/demo_correlation_page.html | 2 +-
3 files changed, 47 insertions(+), 15 deletions(-)
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index 5f4e0f9a..917d6549 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -154,7 +154,8 @@ if [ ! -d $R_LIBS_SITE ] ; then
fi
# We may change this one:
-export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH
+# export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH
+PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/genenetwork3:$PYTHONPATH
# Our UNIX TMPDIR defaults to /tmp - change this on a shared server
if [ -z $TMPDIR ]; then
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index b4480076..c1d6132b 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,11 +1,17 @@
"""module that calls the gn3 api's to do the correlation """
import json
import requests
+import time
from wqflask.correlation import correlation_functions
from base import data_set
from base.trait import create_trait
from base.trait import retrieve_sample_data
+# gn3 lib
+from gn3.computations.correlations import compute_all_sample_correlation
+from gn3.computations.correlations import benchmark_compute_all_sample
+from gn3.computations.correlations import map_shared_keys_to_values
+from gn3.computations.correlations import compute_all_tissue_correlation
GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
@@ -30,7 +36,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
-
print("creating the dataset and trait")
import time
@@ -52,11 +57,10 @@ def create_target_this_trait(start_vars):
target_dataset.get_trait_data(list(sample_data.keys()))
-
time_taken = time.time() - initial_time
- print(f"the time taken to create dataset is",time.time()-dataset_start_time)
+ print(f"the time taken to create dataset is", time.time()-dataset_start_time)
- print(f"the time taken to create dataset abnd trait is",time_taken)
+ print(f"the time taken to create dataset abnd trait is", time_taken)
return (this_dataset, this_trait, target_dataset, sample_data)
@@ -76,16 +80,34 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {}
if corr_type == "sample":
- corr_input_data = {
- "target_dataset": target_dataset.trait_data,
- "target_samplelist": target_dataset.samplelist,
- "trait_data": {
- "trait_sample_data": sample_data,
- "trait_id": start_vars["trait_id"]
- }
+ # corr_input_data = {
+ # "target_dataset": target_dataset.trait_data,
+ # "target_samplelist": target_dataset.samplelist,
+ # "trait_data": {
+ # "trait_sample_data": sample_data,
+ # "trait_id": start_vars["trait_id"]
+ # }
+ # }
+
+
+
+ this_trait_data = {
+ "trait_sample_data": sample_data,
+ "trait_id": start_vars["trait_id"]
}
- requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
+ initial_time = time.time()
+ print("Calling sample correlation")
+ results = map_shared_keys_to_values(
+ target_dataset.samplelist, target_dataset.trait_data)
+ correlation_results = compute_all_sample_correlation(corr_method=method,
+ this_trait=this_trait_data,
+ target_dataset=results)
+
+ print("Time taken is>>>>",time.time()-initial_time)
+
+ # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
+ return correlation_results
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
@@ -96,8 +118,17 @@ def compute_correlation(start_vars, method="pearson"):
"primary_tissue": primary_tissue_data,
"target_tissues_dict": target_tissue_data
}
+ print("Calling tissue correlation")
+ initial_time = time.time()
+ correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+ target_tissues_data=corr_input_data["target_tissues_dict"],
+ corr_method=method)
+
+ time_taken = time.time()
+ print("Time taken is ??????",time_taken-initial_time)
- requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+ # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+ return correlation_results
elif corr_type == "lit":
(this_trait_geneid, geneid_dict, species) = do_lit_correlation(
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index 1900a0bd..d2979f9d 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -21,7 +21,7 @@
{% for corr_result in correlation_results %}
{% for key,value in corr_result.items()%}
-
+
{%for o_key,o_value in value.items()%}
{%endfor%}
--
cgit v1.2.3
From ba2fa2025bdc381346afc8ec3203f229ed3551d6 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 17 Apr 2021 13:43:44 +0300
Subject: refactoring fetching of data
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index c1d6132b..75bd5561 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -48,20 +48,17 @@ def create_target_this_trait(start_vars):
this_trait = create_trait(dataset=this_dataset,
name=start_vars['trait_id'])
- sample_data = process_samples(start_vars, this_dataset.group.samplelist)
+
# target_dataset.get_trait_data(list(self.sample_data.keys()))
- this_trait = retrieve_sample_data(this_trait, this_dataset)
+ # this_trait = retrieve_sample_data(this_trait, this_dataset)
print(f"Starting to creat the target dataset ")
dataset_start_time = time.time()
+ sample_data = ()
- target_dataset.get_trait_data(list(sample_data.keys()))
+
time_taken = time.time() - initial_time
- print(f"the time taken to create dataset is", time.time()-dataset_start_time)
-
- print(f"the time taken to create dataset abnd trait is", time_taken)
-
return (this_dataset, this_trait, target_dataset, sample_data)
@@ -89,6 +86,10 @@ def compute_correlation(start_vars, method="pearson"):
# }
# }
+ sample_data = process_samples(start_vars, this_dataset.group.samplelist)
+ target_dataset.get_trait_data(list(sample_data.keys()))
+ this_trait = retrieve_sample_data(this_trait, this_dataset)
+
this_trait_data = {
@@ -111,8 +112,10 @@ def compute_correlation(start_vars, method="pearson"):
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
+ time_to_retrieve = time.time()
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
this_trait, trait_symbol_dict)
+ print("Time taken to retrieve this is",time.time()-time_to_retrieve)
corr_input_data = {
"primary_tissue": primary_tissue_data,
--
cgit v1.2.3
From 50c0ee93a59eecd40a6fbd19139671c94003c21b Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 19 Apr 2021 00:24:36 +0300
Subject: fix for correlation_demo template
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 53 ++++++++++------------
.../wqflask/templates/demo_correlation_page.html | 23 +++++++++-
2 files changed, 44 insertions(+), 32 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 75bd5561..ba606b92 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -9,9 +9,10 @@ from base.trait import create_trait
from base.trait import retrieve_sample_data
# gn3 lib
from gn3.computations.correlations import compute_all_sample_correlation
-from gn3.computations.correlations import benchmark_compute_all_sample
from gn3.computations.correlations import map_shared_keys_to_values
from gn3.computations.correlations import compute_all_tissue_correlation
+from gn3.computations.correlations import compute_all_lit_correlation
+from gn3.db_utils import database_connector
GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
@@ -36,10 +37,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
- print("creating the dataset and trait")
- import time
-
- initial_time = time.time()
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
@@ -48,17 +45,10 @@ def create_target_this_trait(start_vars):
this_trait = create_trait(dataset=this_dataset,
name=start_vars['trait_id'])
-
# target_dataset.get_trait_data(list(self.sample_data.keys()))
# this_trait = retrieve_sample_data(this_trait, this_dataset)
- print(f"Starting to creat the target dataset ")
- dataset_start_time = time.time()
sample_data = ()
-
-
-
- time_taken = time.time() - initial_time
return (this_dataset, this_trait, target_dataset, sample_data)
@@ -77,6 +67,7 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {}
if corr_type == "sample":
+ initial_time = time.time()
# corr_input_data = {
# "target_dataset": target_dataset.trait_data,
# "target_samplelist": target_dataset.samplelist,
@@ -85,50 +76,44 @@ def compute_correlation(start_vars, method="pearson"):
# "trait_id": start_vars["trait_id"]
# }
# }
-
- sample_data = process_samples(start_vars, this_dataset.group.samplelist)
+ sample_data = process_samples(
+ start_vars, this_dataset.group.samplelist)
target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
-
+ print("Creating dataset and trait took",time.time()-initial_time)
this_trait_data = {
"trait_sample_data": sample_data,
"trait_id": start_vars["trait_id"]
}
-
initial_time = time.time()
- print("Calling sample correlation")
results = map_shared_keys_to_values(
target_dataset.samplelist, target_dataset.trait_data)
correlation_results = compute_all_sample_correlation(corr_method=method,
this_trait=this_trait_data,
target_dataset=results)
- print("Time taken is>>>>",time.time()-initial_time)
+ print("doing sample correlation took",time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
return correlation_results
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
- time_to_retrieve = time.time()
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
this_trait, trait_symbol_dict)
- print("Time taken to retrieve this is",time.time()-time_to_retrieve)
corr_input_data = {
"primary_tissue": primary_tissue_data,
"target_tissues_dict": target_tissue_data
}
- print("Calling tissue correlation")
initial_time = time.time()
correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
target_tissues_data=corr_input_data["target_tissues_dict"],
corr_method=method)
+ print("time taken for compute tissue is",time.time()-initial_time)
- time_taken = time.time()
- print("Time taken is ??????",time_taken-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
return correlation_results
@@ -137,15 +122,23 @@ def compute_correlation(start_vars, method="pearson"):
(this_trait_geneid, geneid_dict, species) = do_lit_correlation(
this_trait, this_dataset, target_dataset)
- requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
- corr_input_data = geneid_dict
+ conn, _cursor_object = database_connector()
+ initial_time = time.time()
+ with conn:
- print("Sending this request")
- corr_results = requests.post(requests_url, json=corr_input_data)
+ lit_corr_results = compute_all_lit_correlation(
+ conn=conn, trait_lists=list(geneid_dict.items()),
+ species=species, gene_id=this_trait_geneid)
- data = corr_results.json()
+ return lit_corr_results
+ print("the time taken is",time.time()-initial_time)
+ # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
+ # corr_input_data = geneid_dict
+ # corr_results = requests.post(requests_url, json=corr_input_data)
- return data
+ # data = corr_results.json()
+
+ # return data
def do_lit_correlation(this_trait, this_dataset, target_dataset):
@@ -164,11 +157,11 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
"""Gets tissue expression values for the primary trait and target tissues values"""
primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=[this_trait.symbol])
-
if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
+ time_to_to_fetch_all = time.time()
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
primary_tissue_data = {
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
index d2979f9d..67e3c57c 100644
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ b/wqflask/wqflask/templates/demo_correlation_page.html
@@ -22,9 +22,28 @@
{% for key,value in corr_result.items()%}
- {%for o_key,o_value in value.items()%}
+
+ {% if "corr_coeffient" in value %}
+
{{value["corr_coeffient"]}}
+ {%elif "tissue_corr" in value %}
+
+ {%elif "lit_corr" in value %}
+ {{value["lit_corr"]}}
+ {% endif %}
+ {%if "tissue_number" in value %}
+
{{value["tissue_number"]}}
+ {%elif "num_overlap" in value %}
+
+ {% endif %}
+
+
+
+
+
+
{% endfor %}
{% endfor %}
--
cgit v1.2.3
From 34e4933de5a1cd444abe618fcfd93b424bf3442e Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Tue, 20 Apr 2021 01:38:26 +0300
Subject: refactor code for iterating mrna tissue data
---
wqflask/base/mrna_assay_tissue_data.py | 39 +++++++++++++++++++---
.../wqflask/correlation/correlation_functions.py | 6 ++--
wqflask/wqflask/correlation/correlation_gn3_api.py | 24 +++++++------
3 files changed, 51 insertions(+), 18 deletions(-)
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index f1929518..0220d73b 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -6,6 +6,7 @@ from utility import db_tools
from utility import Bunch
from utility.db_tools import escape
+from gn3.db_utils import database_connector
from utility.logger import getLogger
@@ -44,16 +45,42 @@ class MrnaAssayTissueData(object):
and t.Mean = x.maxmean;
'''.format(in_clause)
- results = g.db.execute(query).fetchall()
- lower_symbols = []
+ # lower_symbols = []
+ lower_symbols = {}
for gene_symbol in gene_symbols:
+ # lower_symbols[gene_symbol.lower()] = True
if gene_symbol != None:
- lower_symbols.append(gene_symbol.lower())
-
+ lower_symbols[gene_symbol.lower()] = True
+
+ import time
+ # initial_time = time.time()
+ # conn,cursor = database_connector()
+ # cursor.execute(query)
+ # for result in cursor.fetchall():
+ # symbol = result[0]
+ # self.data[symbol].gene_id = result[1]
+ # self.data[symbol].data_id = result[2]
+ # self.data[symbol].chr = result[3]
+ # self.data[symbol].mb = result[4]
+ # self.data[symbol].description = result[5]
+ # self.data[symbol].probe_target_description = result[6]
+
+
+ # print("my loop takes>>>>",time.time()-initial_time)
+ # conn.close()
+ # r
+
+ # takes 5 seconds
+ initial_time = time.time()
+ results = list(g.db.execute(query).fetchall())
for result in results:
symbol = result[0]
- if symbol.lower() in lower_symbols:
+ # if symbol is not None
+ # exists = lower_symbols.get(symbol.lower())
+ # if symbol.lower() in lower_symbols:
+ if symbol is not None and lower_symbols.get(symbol.lower()):
+
symbol = symbol.lower()
self.data[symbol].gene_id = result.GeneId
@@ -62,6 +89,7 @@ class MrnaAssayTissueData(object):
self.data[symbol].mb = result.Mb
self.data[symbol].description = result.description
self.data[symbol].probe_target_description = result.Probe_Target_Description
+ print("time taken in the loop is",time.time()-initial_time)
###########################################################################
#Input: cursor, symbolList (list), dataIdDict(Dict)
@@ -82,6 +110,7 @@ class MrnaAssayTissueData(object):
WHERE TissueProbeSetData.Id IN {} and
TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+
results = g.db.execute(query).fetchall()
for result in results:
if result.Symbol.lower() not in symbol_values_dict:
diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py
index fd7691d4..af1d6060 100644
--- a/wqflask/wqflask/correlation/correlation_functions.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -82,6 +82,6 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears
def get_trait_symbol_and_tissue_values(symbol_list=None):
tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
-
- if len(tissue_data.gene_symbols):
- return tissue_data.get_symbol_values_pairs()
+ if len(tissue_data.gene_symbols) >0:
+ results = tissue_data.get_symbol_values_pairs()
+ return results
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index ba606b92..e7394647 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -12,6 +12,7 @@ from gn3.computations.correlations import compute_all_sample_correlation
from gn3.computations.correlations import map_shared_keys_to_values
from gn3.computations.correlations import compute_all_tissue_correlation
from gn3.computations.correlations import compute_all_lit_correlation
+from gn3.computations.correlations import experimental_compute_all_tissue_correlation
from gn3.db_utils import database_connector
GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
@@ -37,7 +38,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
-
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
dataset_name=start_vars['corr_dataset'])
@@ -81,7 +81,7 @@ def compute_correlation(start_vars, method="pearson"):
target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
- print("Creating dataset and trait took",time.time()-initial_time)
+ print("Creating dataset and trait took", time.time()-initial_time)
this_trait_data = {
"trait_sample_data": sample_data,
@@ -94,7 +94,7 @@ def compute_correlation(start_vars, method="pearson"):
this_trait=this_trait_data,
target_dataset=results)
- print("doing sample correlation took",time.time()-initial_time)
+ print("doing sample correlation took", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
return correlation_results
@@ -109,11 +109,16 @@ def compute_correlation(start_vars, method="pearson"):
"target_tissues_dict": target_tissue_data
}
initial_time = time.time()
- correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
- target_tissues_data=corr_input_data["target_tissues_dict"],
- corr_method=method)
- print("time taken for compute tissue is",time.time()-initial_time)
-
+ correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+ target_tissues_data=corr_input_data[
+ "target_tissues_dict"],
+ corr_method=method)
+ print("correlation y took", time.time()-initial_time)
+ # initial_time = time.time()
+ # correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+ # target_tissues_data=corr_input_data["target_tissues_dict"],
+ # corr_method=method)
+ # print("time taken for compute tissue is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
return correlation_results
@@ -131,7 +136,7 @@ def compute_correlation(start_vars, method="pearson"):
species=species, gene_id=this_trait_geneid)
return lit_corr_results
- print("the time taken is",time.time()-initial_time)
+ print("the time taken is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
# corr_input_data = geneid_dict
# corr_results = requests.post(requests_url, json=corr_input_data)
@@ -161,7 +166,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
- time_to_to_fetch_all = time.time()
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
primary_tissue_data = {
--
cgit v1.2.3
From 1b0566d7c9779b979d20c350f66d5628fb55eba6 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Fri, 23 Apr 2021 23:22:46 +0300
Subject: debugging for fetching probe data
---
wqflask/base/data_set.py | 51 ++++++++++++++++++++--
wqflask/wqflask/correlation/correlation_gn3_api.py | 2 +-
wqflask/wqflask/views.py | 3 ++
3 files changed, 51 insertions(+), 5 deletions(-)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 178234fe..468c4da0 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -115,7 +115,8 @@ Publish or ProbeSet. E.g.
except:
pass
- self.redis_instance.set("dataset_structure", json.dumps(self.datasets))
+ self.redis_instance.set(
+ "dataset_structure", json.dumps(self.datasets))
def set_dataset_key(self, t, name):
"""If name is not in the object's dataset dictionary, set it, and update
@@ -154,10 +155,12 @@ Publish or ProbeSet. E.g.
if t in ['pheno', 'other_pheno']:
group_name = name.replace("Publish", "")
- results = g.db.execute(sql_query_mapping[t].format(group_name)).fetchone()
+ results = g.db.execute(
+ sql_query_mapping[t].format(group_name)).fetchone()
if results:
self.datasets[name] = dataset_name_mapping[t]
- self.redis_instance.set("dataset_structure", json.dumps(self.datasets))
+ self.redis_instance.set(
+ "dataset_structure", json.dumps(self.datasets))
return True
return None
@@ -169,7 +172,8 @@ Publish or ProbeSet. E.g.
# This has side-effects, with the end result being a truth-y value
if(self.set_dataset_key(t, name)):
break
- return self.datasets.get(name, None) # Return None if name has not been set
+ # Return None if name has not been set
+ return self.datasets.get(name, None)
# Do the intensive work at startup one time only
@@ -651,6 +655,43 @@ class DataSet(object):
"Dataset {} is not yet available in GeneNetwork.".format(self.name))
pass
+ def fetch_probe_trait_data(self, sample_list=None):
+ if sample_list:
+ self.samplelist = sample_list
+ else:
+ self.samplelist = self.group.samplelist
+
+ if self.group.parlist != None and self.group.f1list != None:
+ if (self.group.parlist + self.group.f1list) in self.samplelist:
+ self.samplelist += self.group.parlist + self.group.f1list
+
+ query = """
+ SELECT Strain.Name, Strain.Id FROM Strain, Species
+ WHERE Strain.Name IN {}
+ and Strain.SpeciesId=Species.Id
+ and Species.name = '{}'
+ """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
+ logger.sql(query)
+ results = dict(g.db.execute(query).fetchall())
+ sample_ids = [results[item] for item in self.samplelist]
+
+ query = """SELECT * from ProbeSetData WHERE Id in ( SELECT ProbeSetXRef.DataId FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and ProbeSetFreeze.Name = 'HC_M2_0606_P' and ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id ) and StrainId in ({})""".format(
+ ",".join(str(sample_id) for sample_id in sample_ids))
+
+ results = g.db.execute(query).fetchall()
+
+ # with conn:
+ # cursor = conn.cursor()
+ # cursor.execute(query)
+ # results = cursor.fetchall()
+ trait_data = {}
+ for trait_id, StrainId, value in results:
+ if trait_id in trait_data:
+ trait_data[trait_id].append(value)
+ else:
+ trait_data[trait_id] = [value]
+ self.trait_data = trait_data
+
def get_trait_data(self, sample_list=None):
if sample_list:
self.samplelist = sample_list
@@ -670,6 +711,7 @@ class DataSet(object):
logger.sql(query)
results = dict(g.db.execute(query).fetchall())
sample_ids = [results[item] for item in self.samplelist]
+ print("the number of sample ids are", len(sample_ids))
# MySQL limits the number of tables that can be used in a join to 61,
# so we break the sample ids into smaller chunks
@@ -720,6 +762,7 @@ class DataSet(object):
trait_sample_data.append(results)
trait_count = len(trait_sample_data[0])
+ print("the trait count is >>>", trait_count)
self.trait_data = collections.defaultdict(list)
# put all of the separate data together into a dictionary where the keys are
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index e7394647..51bf5fb5 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -78,7 +78,7 @@ def compute_correlation(start_vars, method="pearson"):
# }
sample_data = process_samples(
start_vars, this_dataset.group.samplelist)
- target_dataset.get_trait_data(list(sample_data.keys()))
+ target_dataset.fetch_probe_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
print("Creating dataset and trait took", time.time()-initial_time)
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 072db466..2c239425 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -881,7 +881,10 @@ def network_graph_page():
def corr_compute_page():
logger.info("In corr_compute, request.form is:", pf(request.form))
logger.info(request.url)
+ import time
+ initial_time = time.time()
correlation_results = compute_correlation(request.form)
+ print(">>>>Time taken by this endpoint",time.time()-initial_time)
return render_template("demo_correlation_page.html",correlation_results=correlation_results[1:20])
@app.route("/corr_matrix", methods=('POST',))
--
cgit v1.2.3
From 7556f8a5dfc4c98bc0f0c8241592acec22b65102 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 26 Apr 2021 15:42:07 +0300
Subject: test for probe-type sample and tissue
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 71 +++++++++++++++++++++-
1 file changed, 70 insertions(+), 1 deletion(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 51bf5fb5..c945f699 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -52,8 +52,64 @@ def create_target_this_trait(start_vars):
return (this_dataset, this_trait, target_dataset, sample_data)
+def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars):
+ sample_data = process_samples(
+ start_vars, this_dataset.group.samplelist)
+ target_dataset.get_trait_data(list(sample_data.keys()))
+
+ this_trait = retrieve_sample_data(this_trait, this_dataset)
+
+ this_trait_data = {
+ "trait_sample_data": sample_data,
+ "trait_id": start_vars["trait_id"]
+ }
+ # trait_lists = dict([(list(corr_result)[0],True) for corr_result in corr_results])
+ # target_dataset.trait_data =list(filter(lambda dict_obj: dict_obj.keys()[
+ # 0] in corr_results_traits, target_dataset_data))
+ results = map_shared_keys_to_values(
+ target_dataset.samplelist, target_dataset.trait_data)
+ correlation_results = compute_all_sample_correlation(corr_method="pearson",
+ this_trait=this_trait_data,
+ target_dataset=results)
+
+
+ return correlation_results
+
+
+def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait):
+ # # print(corr_results[0])--
+ # [{"awsdsd_at": {'corr_coeffient': 0.49714692782257336, 'p_value': 1.872077762359228e-05, 'num_overlap': 67}}]
+
+ print("creating trait_lists")
+ # corr_results = corr_results[0::]
+ trait_lists = dict([(list(corr_result)[0], True)
+ for corr_result in corr_results])
+ print("finished creating trait_list")
+
+ traits_symbol_dict = this_dataset.retrieve_genes("Symbol")
+ print("Retrieved symbol dict")
+ print("creating dict here>>>>>>>>>")
+ import time
+ init_time = time.time()
+ traits_symbol_dict = dict({trait_name: symbol for (
+ trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
+ print("time taken to create this max dict is>>>>", time.time()-init_time)
+ print("finished creatinf the dict")
+ print("Fetching tissue datas")
+ primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
+ this_trait, traits_symbol_dict)
+ print("finihsed>>>>>>>>>>>>>>>>>>")
+ print("Calling experimental_compute_all_tissue_correlation")
+ corr_results = experimental_compute_all_tissue_correlation(
+ primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson")
+ # print('finished calling this tissue reuslts',corr_results)
+
+ return corr_results
+
+
def compute_correlation(start_vars, method="pearson"):
"""compute correlation for to call gn3 api"""
+ import time
corr_type = start_vars['corr_type']
@@ -67,6 +123,7 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {}
if corr_type == "sample":
+ import time
initial_time = time.time()
# corr_input_data = {
# "target_dataset": target_dataset.trait_data,
@@ -78,7 +135,7 @@ def compute_correlation(start_vars, method="pearson"):
# }
sample_data = process_samples(
start_vars, this_dataset.group.samplelist)
- target_dataset.fetch_probe_trait_data(list(sample_data.keys()))
+ target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
print("Creating dataset and trait took", time.time()-initial_time)
@@ -94,8 +151,15 @@ def compute_correlation(start_vars, method="pearson"):
this_trait=this_trait_data,
target_dataset=results)
+ print("computedd>>>>>>>>>>>>>")
+
print("doing sample correlation took", time.time()-initial_time)
+ other_results_time = time.time()
+ other_results = tissue_for_trait_lists(
+ correlation_results, this_dataset, target_dataset, this_trait)
+ print(">>>time taken for this is", time.time()-other_results_time)
+
# requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
return correlation_results
@@ -121,6 +185,9 @@ def compute_correlation(start_vars, method="pearson"):
# print("time taken for compute tissue is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+
+ sample_results = sample_for_trait_lists(
+ correlation_results, target_dataset, this_trait, this_dataset, start_vars)
return correlation_results
elif corr_type == "lit":
@@ -148,6 +215,8 @@ def compute_correlation(start_vars, method="pearson"):
def do_lit_correlation(this_trait, this_dataset, target_dataset):
geneid_dict = this_dataset.retrieve_genes("GeneId")
+ #
+ print("CALLING THE LIT CORRELATION HERE")
species = this_dataset.group.species.lower()
this_trait_geneid = this_trait.geneid
--
cgit v1.2.3
From 067d27460965aaf1ceaa863a315a0c7dbc47ae02 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 26 Apr 2021 17:05:06 +0300
Subject: fix:remove debug statements and commented code
---
wqflask/base/mrna_assay_tissue_data.py | 25 ---------
wqflask/wqflask/correlation/correlation_gn3_api.py | 60 +++-------------------
2 files changed, 8 insertions(+), 77 deletions(-)
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index 0220d73b..5a64afb2 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -52,33 +52,9 @@ class MrnaAssayTissueData(object):
# lower_symbols[gene_symbol.lower()] = True
if gene_symbol != None:
lower_symbols[gene_symbol.lower()] = True
-
- import time
- # initial_time = time.time()
- # conn,cursor = database_connector()
- # cursor.execute(query)
- # for result in cursor.fetchall():
- # symbol = result[0]
- # self.data[symbol].gene_id = result[1]
- # self.data[symbol].data_id = result[2]
- # self.data[symbol].chr = result[3]
- # self.data[symbol].mb = result[4]
- # self.data[symbol].description = result[5]
- # self.data[symbol].probe_target_description = result[6]
-
-
- # print("my loop takes>>>>",time.time()-initial_time)
- # conn.close()
- # r
-
- # takes 5 seconds
- initial_time = time.time()
results = list(g.db.execute(query).fetchall())
for result in results:
symbol = result[0]
- # if symbol is not None
- # exists = lower_symbols.get(symbol.lower())
- # if symbol.lower() in lower_symbols:
if symbol is not None and lower_symbols.get(symbol.lower()):
symbol = symbol.lower()
@@ -89,7 +65,6 @@ class MrnaAssayTissueData(object):
self.data[symbol].mb = result.Mb
self.data[symbol].description = result.description
self.data[symbol].probe_target_description = result.Probe_Target_Description
- print("time taken in the loop is",time.time()-initial_time)
###########################################################################
#Input: cursor, symbolList (list), dataIdDict(Dict)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index c945f699..3c21a850 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -63,9 +63,6 @@ def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_datase
"trait_sample_data": sample_data,
"trait_id": start_vars["trait_id"]
}
- # trait_lists = dict([(list(corr_result)[0],True) for corr_result in corr_results])
- # target_dataset.trait_data =list(filter(lambda dict_obj: dict_obj.keys()[
- # 0] in corr_results_traits, target_dataset_data))
results = map_shared_keys_to_values(
target_dataset.samplelist, target_dataset.trait_data)
correlation_results = compute_all_sample_correlation(corr_method="pearson",
@@ -77,33 +74,15 @@ def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_datase
def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait):
- # # print(corr_results[0])--
- # [{"awsdsd_at": {'corr_coeffient': 0.49714692782257336, 'p_value': 1.872077762359228e-05, 'num_overlap': 67}}]
-
- print("creating trait_lists")
- # corr_results = corr_results[0::]
trait_lists = dict([(list(corr_result)[0], True)
for corr_result in corr_results])
- print("finished creating trait_list")
-
traits_symbol_dict = this_dataset.retrieve_genes("Symbol")
- print("Retrieved symbol dict")
- print("creating dict here>>>>>>>>>")
- import time
- init_time = time.time()
traits_symbol_dict = dict({trait_name: symbol for (
trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
- print("time taken to create this max dict is>>>>", time.time()-init_time)
- print("finished creatinf the dict")
- print("Fetching tissue datas")
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
this_trait, traits_symbol_dict)
- print("finihsed>>>>>>>>>>>>>>>>>>")
- print("Calling experimental_compute_all_tissue_correlation")
corr_results = experimental_compute_all_tissue_correlation(
primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson")
- # print('finished calling this tissue reuslts',corr_results)
-
return corr_results
@@ -123,22 +102,14 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {}
if corr_type == "sample":
- import time
- initial_time = time.time()
- # corr_input_data = {
- # "target_dataset": target_dataset.trait_data,
- # "target_samplelist": target_dataset.samplelist,
- # "trait_data": {
- # "trait_sample_data": sample_data,
- # "trait_id": start_vars["trait_id"]
- # }
- # }
+
sample_data = process_samples(
start_vars, this_dataset.group.samplelist)
+ initial_time = time.time()
target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
+ print("Creating target dataset and trait took", time.time()-initial_time)
- print("Creating dataset and trait took", time.time()-initial_time)
this_trait_data = {
"trait_sample_data": sample_data,
@@ -151,15 +122,9 @@ def compute_correlation(start_vars, method="pearson"):
this_trait=this_trait_data,
target_dataset=results)
- print("computedd>>>>>>>>>>>>>")
-
print("doing sample correlation took", time.time()-initial_time)
-
- other_results_time = time.time()
- other_results = tissue_for_trait_lists(
- correlation_results, this_dataset, target_dataset, this_trait)
- print(">>>time taken for this is", time.time()-other_results_time)
-
+ # other_results = tissue_for_trait_lists(
+ # correlation_results, this_dataset, target_dataset, this_trait)
# requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
return correlation_results
@@ -177,17 +142,9 @@ def compute_correlation(start_vars, method="pearson"):
target_tissues_data=corr_input_data[
"target_tissues_dict"],
corr_method=method)
- print("correlation y took", time.time()-initial_time)
- # initial_time = time.time()
- # correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
- # target_tissues_data=corr_input_data["target_tissues_dict"],
- # corr_method=method)
- # print("time taken for compute tissue is", time.time()-initial_time)
-
- # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
-
- sample_results = sample_for_trait_lists(
- correlation_results, target_dataset, this_trait, this_dataset, start_vars)
+ print("computing tissue took >>>>", time.time()-initial_time)
+ # sample_results = sample_for_trait_lists(
+ # correlation_results, target_dataset, this_trait, this_dataset, start_vars)
return correlation_results
elif corr_type == "lit":
@@ -203,7 +160,6 @@ def compute_correlation(start_vars, method="pearson"):
species=species, gene_id=this_trait_geneid)
return lit_corr_results
- print("the time taken is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
# corr_input_data = geneid_dict
# corr_results = requests.post(requests_url, json=corr_input_data)
--
cgit v1.2.3
From ac9be3f74e005e95a057f2c49baa7822d05f1ece Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Wed, 28 Apr 2021 08:46:53 +0300
Subject: minor fixes for correlation
---
bin/genenetwork2 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index 917d6549..f73f235c 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -155,7 +155,7 @@ fi
# We may change this one:
# export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH
-PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/genenetwork3:$PYTHONPATH
+PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/project/genenetwork3:$PYTHONPATH
# Our UNIX TMPDIR defaults to /tmp - change this on a shared server
if [ -z $TMPDIR ]; then
--
cgit v1.2.3
From 44bcda38241e06a27c386f612d3fc2bae96a1924 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Fri, 30 Apr 2021 02:45:26 +0300
Subject: add template for correlation result
---
.../wqflask/templates/test_correlation_page.html | 140 +++++++++++++++++++++
wqflask/wqflask/views.py | 2 +-
2 files changed, 141 insertions(+), 1 deletion(-)
create mode 100644 wqflask/wqflask/templates/test_correlation_page.html
diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html
new file mode 100644
index 00000000..051d84db
--- /dev/null
+++ b/wqflask/wqflask/templates/test_correlation_page.html
@@ -0,0 +1,140 @@
+{% extends "base.html" %}
+{% block title %}Correlation Results{% endblock %}
+{% block css %}
+
+
+
+
+
+
+
+
+{% endblock %}
+
+{% block content %}
+
+
+
Correlation Results for Dataset_name against trait_name for the top allResults
+
+
+
+
+ index |
+ trait_name |
+ Sample r |
+ Sample p(r) |
+ N |
+
+
+
+
+{% endblock %}
+
+{% block js %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{% endblock %}
\ No newline at end of file
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 2c239425..3d4376e2 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -885,7 +885,7 @@ def corr_compute_page():
initial_time = time.time()
correlation_results = compute_correlation(request.form)
print(">>>>Time taken by this endpoint",time.time()-initial_time)
- return render_template("demo_correlation_page.html",correlation_results=correlation_results[1:20])
+ return render_template("test_correlation_page.html",correlation_results=correlation_results[0:50])
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
--
cgit v1.2.3
From 8637c4f0487117c43be629b8bd14e51c48e5fbcf Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 1 May 2021 00:12:00 +0300
Subject: add toggle for columns
---
.../wqflask/templates/test_correlation_page.html | 72 ++++++++++++----------
1 file changed, 40 insertions(+), 32 deletions(-)
diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html
index 051d84db..40d9836c 100644
--- a/wqflask/wqflask/templates/test_correlation_page.html
+++ b/wqflask/wqflask/templates/test_correlation_page.html
@@ -16,6 +16,8 @@
.trait_col {
font-weight:bolder;
text-align: center;
+ color:#036ffc;
+ /*font-size: 1.1em;*/
}
table th {
font-weight: bolder;
@@ -27,17 +29,32 @@
.correlation-title h3 span {
font-weight: bolder;
}
+ .header-toggle-vis {
+ padding:10px 5px;
+ }
+ .header-toggle-vis button {
+ border-radius: 5px;
+
+ }
{% endblock %}
{% block content %}
-
Correlation Results for Dataset_name against trait_name for the top allResults
+
Correlation Results for Dataset_name against trait_name for the top all Results
+
+
+ |
index |
trait_name |
Sample r |
@@ -83,9 +100,10 @@
{% endblock %}
\ No newline at end of file
--
cgit v1.2.3
From 05acf00467b9338072cde7556dc7b0ca130242c9 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 1 May 2021 01:14:30 +0300
Subject: add default values for empt columns
---
.../wqflask/templates/test_correlation_page.html | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html
index 40d9836c..be9ec226 100644
--- a/wqflask/wqflask/templates/test_correlation_page.html
+++ b/wqflask/wqflask/templates/test_correlation_page.html
@@ -45,6 +45,7 @@
Correlation Results for Dataset_name against trait_name for the top all Results
@@ -85,8 +88,10 @@
// document.querySelector(".content").innerHTML =correlationResults
// parse the data
let counter = 0;
+ let corr_type = "tissue";
correlationResults =correlationResults.map((trait_object)=>{
let trait_name = Object.keys(trait_object)[0]
+
let new_dict = {
"index":counter,
"trait_name":trait_name,
@@ -94,7 +99,9 @@
}
counter++;
return new_dict;
- })
+ })
+
+console.log(correlationResults)
@@ -102,13 +109,15 @@
$(document).ready(function() {
let table = $('#example').DataTable( {
"data": correlationResults,
- "columns": [
- {"data":null,"width":"25px"},
+ "columns": [
+ {"data":corr_type=="sample"?null:"fd","width":"25px"},
{ "data": "index","width":"120px","title":"Index" },
{ "data": "trait_name","title":"TraitName"},
- { "data": "corr_coeffient"},
- { "data": "p_value"},
- { "data": "num_overlap"}
+ { "data": "corr_coeffient","defaultContent": "--"},
+ { "data": "p_value","defaultContent":"--"},
+ { "data": "num_overlap","defaultContent":"--"},
+ {"data":"tissue_corr","defaultContent":"--","title":"Tissue rho"},
+ {"data":"lit_corr","defaultContent":"--","title":"Lit rho"}
],
"columnDefs": [
{
--
cgit v1.2.3
From ba2253944a4752f0413c57679f05b9c05cbd9bea Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 1 May 2021 01:34:13 +0300
Subject: modify column for p_val
---
wqflask/wqflask/templates/test_correlation_page.html | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html
index be9ec226..037e9735 100644
--- a/wqflask/wqflask/templates/test_correlation_page.html
+++ b/wqflask/wqflask/templates/test_correlation_page.html
@@ -59,10 +59,11 @@
index |
trait_name |
Sample r |
-
Sample p(r) |
+
Sample r(p) |
N |
-
Tissue rho |
-
Lit rho |
+
Tissue r |
+
Tissue r(p) |
+
Lit r |
@@ -116,7 +117,8 @@ console.log(correlationResults)
{ "data": "corr_coeffient","defaultContent": "--"},
{ "data": "p_value","defaultContent":"--"},
{ "data": "num_overlap","defaultContent":"--"},
- {"data":"tissue_corr","defaultContent":"--","title":"Tissue rho"},
+ {"data":"tissue_corr","defaultContent":"--","title":"Tissue r"},
+ {"data":"tissue_p_val","defaultContent":"--","title":"Tissue r(p)"},
{"data":"lit_corr","defaultContent":"--","title":"Lit rho"}
],
"columnDefs": [
--
cgit v1.2.3
From 02916a787b384709d96eebfaefd4898cae415739 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 1 May 2021 03:23:32 +0300
Subject: delete demo correlation page
---
.../wqflask/templates/demo_correlation_page.html | 107 ---------------------
1 file changed, 107 deletions(-)
delete mode 100644 wqflask/wqflask/templates/demo_correlation_page.html
diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html
deleted file mode 100644
index 67e3c57c..00000000
--- a/wqflask/wqflask/templates/demo_correlation_page.html
+++ /dev/null
@@ -1,107 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Demo Correlation Results{% endblock %}
-{% block css %}
-
-
-
-
-
-
-{% endblock %}
-{% block content %}
-
-
CORRELATION RESULTS
-
-
- {% for corr_result in correlation_results %}
- {% for key,value in corr_result.items()%}
-
-
-
- {% if "corr_coeffient" in value %}
-
{{value["corr_coeffient"]}}
- {%elif "tissue_corr" in value %}
-
- {%elif "lit_corr" in value %}
- {{value["lit_corr"]}}
- {% endif %}
- {%if "tissue_number" in value %}
-
{{value["tissue_number"]}}
- {%elif "num_overlap" in value %}
-
- {% endif %}
-
-
-
-
-
-
- {% endfor %}
-
- {% endfor %}
-
-
-
-
-
-{% endblock %}
-
--
cgit v1.2.3
From 149f9c7c6804d4e717ed9aa3a42968b295693b3d Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 1 May 2021 03:24:05 +0300
Subject: autopep8 for file
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 126 ++++++++-------------
1 file changed, 45 insertions(+), 81 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 3c21a850..b56c09d8 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,61 +1,55 @@
"""module that calls the gn3 api's to do the correlation """
import json
-import requests
-import time
+
from wqflask.correlation import correlation_functions
from base import data_set
+
from base.trait import create_trait
from base.trait import retrieve_sample_data
-# gn3 lib
+
from gn3.computations.correlations import compute_all_sample_correlation
from gn3.computations.correlations import map_shared_keys_to_values
-from gn3.computations.correlations import compute_all_tissue_correlation
from gn3.computations.correlations import compute_all_lit_correlation
from gn3.computations.correlations import experimental_compute_all_tissue_correlation
from gn3.db_utils import database_connector
-GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
+
+def create_target_this_trait(start_vars):
+ """this function creates the required trait and target dataset for correlation"""
+
+ this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
+ target_dataset = data_set.create_dataset(
+ dataset_name=start_vars['corr_dataset'])
+ this_trait = create_trait(dataset=this_dataset,
+ name=start_vars['trait_id'])
+ sample_data = ()
+ return (this_dataset, this_trait, target_dataset, sample_data)
def process_samples(start_vars, sample_names, excluded_samples=None):
- """process samples method"""
+ """process samples"""
sample_data = {}
if not excluded_samples:
excluded_samples = ()
-
sample_vals_dict = json.loads(start_vars["sample_vals"])
-
for sample in sample_names:
if sample not in excluded_samples:
val = sample_vals_dict[sample]
if not val.strip().lower() == "x":
sample_data[str(sample)] = float(val)
-
return sample_data
-def create_target_this_trait(start_vars):
- """this function creates the required trait and target dataset for correlation"""
-
- this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
- target_dataset = data_set.create_dataset(
- dataset_name=start_vars['corr_dataset'])
+def sample_for_trait_lists(corr_results, target_dataset,
+ this_trait, this_dataset, start_vars):
+ """interface function for correlation on top results"""
- this_trait = create_trait(dataset=this_dataset,
- name=start_vars['trait_id'])
-
- # target_dataset.get_trait_data(list(self.sample_data.keys()))
-
- # this_trait = retrieve_sample_data(this_trait, this_dataset)
- sample_data = ()
- return (this_dataset, this_trait, target_dataset, sample_data)
-
-
-def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars):
sample_data = process_samples(
start_vars, this_dataset.group.samplelist)
target_dataset.get_trait_data(list(sample_data.keys()))
+ # should filter target traits from here
+ _corr_results = corr_results
this_trait = retrieve_sample_data(this_trait, this_dataset)
@@ -69,65 +63,55 @@ def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_datase
this_trait=this_trait_data,
target_dataset=results)
-
return correlation_results
-def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait):
- trait_lists = dict([(list(corr_result)[0], True)
- for corr_result in corr_results])
+def tissue_for_trait_lists(corr_results, this_dataset, this_trait):
+ """interface function for doing tissue corr_results on trait_list"""
+ # trait_lists = dict([(list(corr_result)[0], True)
+ # for corr_result in corr_results])
+ trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results}
traits_symbol_dict = this_dataset.retrieve_genes("Symbol")
traits_symbol_dict = dict({trait_name: symbol for (
trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
this_trait, traits_symbol_dict)
corr_results = experimental_compute_all_tissue_correlation(
- primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson")
+ primary_tissue_dict=primary_tissue_data,
+ target_tissues_data=target_tissue_data,
+ corr_method="pearson")
return corr_results
def compute_correlation(start_vars, method="pearson"):
"""compute correlation for to call gn3 api"""
- import time
+ # pylint: disable-msg=too-many-locals
corr_type = start_vars['corr_type']
(this_dataset, this_trait, target_dataset,
sample_data) = create_target_this_trait(start_vars)
- # cor_results = compute_correlation(start_vars)
-
method = start_vars['corr_sample_method']
-
+ _corr_return_results = start_vars.get("corr_return_results", 100)
corr_input_data = {}
if corr_type == "sample":
-
+
sample_data = process_samples(
start_vars, this_dataset.group.samplelist)
- initial_time = time.time()
target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
- print("Creating target dataset and trait took", time.time()-initial_time)
-
-
this_trait_data = {
"trait_sample_data": sample_data,
"trait_id": start_vars["trait_id"]
}
- initial_time = time.time()
results = map_shared_keys_to_values(
target_dataset.samplelist, target_dataset.trait_data)
correlation_results = compute_all_sample_correlation(corr_method=method,
this_trait=this_trait_data,
target_dataset=results)
- print("doing sample correlation took", time.time()-initial_time)
- # other_results = tissue_for_trait_lists(
- # correlation_results, this_dataset, target_dataset, this_trait)
- # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
- return correlation_results
-
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
@@ -137,50 +121,33 @@ def compute_correlation(start_vars, method="pearson"):
"primary_tissue": primary_tissue_data,
"target_tissues_dict": target_tissue_data
}
- initial_time = time.time()
- correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
- target_tissues_data=corr_input_data[
- "target_tissues_dict"],
- corr_method=method)
- print("computing tissue took >>>>", time.time()-initial_time)
- # sample_results = sample_for_trait_lists(
- # correlation_results, target_dataset, this_trait, this_dataset, start_vars)
- return correlation_results
+ correlation_results = experimental_compute_all_tissue_correlation(
+ primary_tissue_dict=corr_input_data["primary_tissue"],
+ target_tissues_data=corr_input_data[
+ "target_tissues_dict"],
+ corr_method=method
+
+ )
elif corr_type == "lit":
(this_trait_geneid, geneid_dict, species) = do_lit_correlation(
- this_trait, this_dataset, target_dataset)
+ this_trait, this_dataset)
conn, _cursor_object = database_connector()
- initial_time = time.time()
with conn:
-
- lit_corr_results = compute_all_lit_correlation(
+ correlation_results = compute_all_lit_correlation(
conn=conn, trait_lists=list(geneid_dict.items()),
species=species, gene_id=this_trait_geneid)
- return lit_corr_results
- # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
- # corr_input_data = geneid_dict
- # corr_results = requests.post(requests_url, json=corr_input_data)
-
- # data = corr_results.json()
-
- # return data
+ return correlation_results
-def do_lit_correlation(this_trait, this_dataset, target_dataset):
+def do_lit_correlation(this_trait, this_dataset):
+ """function for fetching lit inputs"""
geneid_dict = this_dataset.retrieve_genes("GeneId")
- #
- print("CALLING THE LIT CORRELATION HERE")
species = this_dataset.group.species.lower()
-
- this_trait_geneid = this_trait.geneid
- this_trait_gene_data = {
- this_trait.name: this_trait_geneid
- }
-
- return (this_trait_geneid, geneid_dict, species)
+ trait_geneid = this_trait.geneid
+ return (trait_geneid, geneid_dict, species)
def get_tissue_correlation_input(this_trait, trait_symbol_dict):
@@ -190,7 +157,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
-
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
primary_tissue_data = {
@@ -202,7 +168,5 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
"trait_symbol_dict": trait_symbol_dict,
"symbol_tissue_vals_dict": corr_result_tissue_vals_dict
}
-
return (primary_tissue_data, target_tissue_data)
-
return None
--
cgit v1.2.3
From 913d2e9113635ccf53140d53aaad55f09fc1df26 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 3 May 2021 20:48:00 +0300
Subject: apply limit to for results
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 10 +++++-----
wqflask/wqflask/views.py | 2 +-
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index b56c09d8..4949bbe2 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -11,7 +11,7 @@ from base.trait import retrieve_sample_data
from gn3.computations.correlations import compute_all_sample_correlation
from gn3.computations.correlations import map_shared_keys_to_values
from gn3.computations.correlations import compute_all_lit_correlation
-from gn3.computations.correlations import experimental_compute_all_tissue_correlation
+from gn3.computations.correlations import compute_tissue_correlation
from gn3.db_utils import database_connector
@@ -76,7 +76,7 @@ def tissue_for_trait_lists(corr_results, this_dataset, this_trait):
trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
this_trait, traits_symbol_dict)
- corr_results = experimental_compute_all_tissue_correlation(
+ corr_results = compute_tissue_correlation(
primary_tissue_dict=primary_tissue_data,
target_tissues_data=target_tissue_data,
corr_method="pearson")
@@ -93,7 +93,7 @@ def compute_correlation(start_vars, method="pearson"):
sample_data) = create_target_this_trait(start_vars)
method = start_vars['corr_sample_method']
- _corr_return_results = start_vars.get("corr_return_results", 100)
+ corr_return_results = int(start_vars.get("corr_return_results", 100))
corr_input_data = {}
if corr_type == "sample":
@@ -121,7 +121,7 @@ def compute_correlation(start_vars, method="pearson"):
"primary_tissue": primary_tissue_data,
"target_tissues_dict": target_tissue_data
}
- correlation_results = experimental_compute_all_tissue_correlation(
+ correlation_results = compute_tissue_correlation(
primary_tissue_dict=corr_input_data["primary_tissue"],
target_tissues_data=corr_input_data[
"target_tissues_dict"],
@@ -139,7 +139,7 @@ def compute_correlation(start_vars, method="pearson"):
conn=conn, trait_lists=list(geneid_dict.items()),
species=species, gene_id=this_trait_geneid)
- return correlation_results
+ return correlation_results[0:corr_return_results]
def do_lit_correlation(this_trait, this_dataset):
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 3d4376e2..b042a211 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -885,7 +885,7 @@ def corr_compute_page():
initial_time = time.time()
correlation_results = compute_correlation(request.form)
print(">>>>Time taken by this endpoint",time.time()-initial_time)
- return render_template("test_correlation_page.html",correlation_results=correlation_results[0:50])
+ return render_template("test_correlation_page.html",correlation_results=correlation_results)
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
--
cgit v1.2.3
From 049a438741d19b1cf6c8c290ec01343b51690b30 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sun, 9 May 2021 00:17:33 +0300
Subject: fix:add lit for trait list
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 39 ++++++++++++++++------
1 file changed, 28 insertions(+), 11 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 4949bbe2..d68bb604 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -83,6 +83,23 @@ def tissue_for_trait_lists(corr_results, this_dataset, this_trait):
return corr_results
+def lit_for_trait_list(corr_results, this_dataset, this_trait):
+ (this_trait_geneid, geneid_dict, species) = do_lit_correlation(
+ this_trait, this_dataset)
+
+ trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results}
+
+ geneid_dict = {trait_name: geneid for (trait_name, geneid) in geneid_dict if
+ trait_lists.get(trait_name)}
+
+ conn, _cursor_object = database_connector()
+ correlation_results = compute_all_lit_correlation(
+ conn=conn, trait_lists=list(geneid_dict.items()),
+ species=species, gene_id=this_trait_geneid)
+
+ return correlation_results[0:corr_return_results]
+
+
def compute_correlation(start_vars, method="pearson"):
"""compute correlation for to call gn3 api"""
# pylint: disable-msg=too-many-locals
@@ -136,30 +153,30 @@ def compute_correlation(start_vars, method="pearson"):
conn, _cursor_object = database_connector()
with conn:
correlation_results = compute_all_lit_correlation(
- conn=conn, trait_lists=list(geneid_dict.items()),
- species=species, gene_id=this_trait_geneid)
+ conn = conn, trait_lists = list(geneid_dict.items()),
+ species = species, gene_id = this_trait_geneid)
return correlation_results[0:corr_return_results]
def do_lit_correlation(this_trait, this_dataset):
"""function for fetching lit inputs"""
- geneid_dict = this_dataset.retrieve_genes("GeneId")
- species = this_dataset.group.species.lower()
- trait_geneid = this_trait.geneid
+ geneid_dict=this_dataset.retrieve_genes("GeneId")
+ species=this_dataset.group.species.lower()
+ trait_geneid=this_trait.geneid
return (trait_geneid, geneid_dict, species)
def get_tissue_correlation_input(this_trait, trait_symbol_dict):
"""Gets tissue expression values for the primary trait and target tissues values"""
- primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=[this_trait.symbol])
+ primary_trait_tissue_vals_dict=correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list = [this_trait.symbol])
if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
+ primary_trait_tissue_values=primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
- corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list=list(trait_symbol_dict.values()))
- primary_tissue_data = {
+ corr_result_tissue_vals_dict=correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list = list(trait_symbol_dict.values()))
+ primary_tissue_data={
"this_id": this_trait.name,
"tissue_values": primary_trait_tissue_values
--
cgit v1.2.3
From 27538980f93c1d72b0b2d76151312f3fbce4c9a5 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 10 May 2021 08:24:42 +0300
Subject: add previous endpoint for correlation
---
wqflask/base/data_set.py | 37 -------------------------------------
wqflask/wqflask/views.py | 13 ++++++++-----
2 files changed, 8 insertions(+), 42 deletions(-)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 468c4da0..d0f5e6f2 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -655,42 +655,7 @@ class DataSet(object):
"Dataset {} is not yet available in GeneNetwork.".format(self.name))
pass
- def fetch_probe_trait_data(self, sample_list=None):
- if sample_list:
- self.samplelist = sample_list
- else:
- self.samplelist = self.group.samplelist
-
- if self.group.parlist != None and self.group.f1list != None:
- if (self.group.parlist + self.group.f1list) in self.samplelist:
- self.samplelist += self.group.parlist + self.group.f1list
-
- query = """
- SELECT Strain.Name, Strain.Id FROM Strain, Species
- WHERE Strain.Name IN {}
- and Strain.SpeciesId=Species.Id
- and Species.name = '{}'
- """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
- logger.sql(query)
- results = dict(g.db.execute(query).fetchall())
- sample_ids = [results[item] for item in self.samplelist]
-
- query = """SELECT * from ProbeSetData WHERE Id in ( SELECT ProbeSetXRef.DataId FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and ProbeSetFreeze.Name = 'HC_M2_0606_P' and ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id ) and StrainId in ({})""".format(
- ",".join(str(sample_id) for sample_id in sample_ids))
- results = g.db.execute(query).fetchall()
-
- # with conn:
- # cursor = conn.cursor()
- # cursor.execute(query)
- # results = cursor.fetchall()
- trait_data = {}
- for trait_id, StrainId, value in results:
- if trait_id in trait_data:
- trait_data[trait_id].append(value)
- else:
- trait_data[trait_id] = [value]
- self.trait_data = trait_data
def get_trait_data(self, sample_list=None):
if sample_list:
@@ -711,7 +676,6 @@ class DataSet(object):
logger.sql(query)
results = dict(g.db.execute(query).fetchall())
sample_ids = [results[item] for item in self.samplelist]
- print("the number of sample ids are", len(sample_ids))
# MySQL limits the number of tables that can be used in a join to 61,
# so we break the sample ids into smaller chunks
@@ -762,7 +726,6 @@ class DataSet(object):
trait_sample_data.append(results)
trait_count = len(trait_sample_data[0])
- print("the trait count is >>>", trait_count)
self.trait_data = collections.defaultdict(list)
# put all of the separate data together into a dictionary where the keys are
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index b042a211..19779651 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -881,11 +881,14 @@ def network_graph_page():
def corr_compute_page():
logger.info("In corr_compute, request.form is:", pf(request.form))
logger.info(request.url)
- import time
- initial_time = time.time()
- correlation_results = compute_correlation(request.form)
- print(">>>>Time taken by this endpoint",time.time()-initial_time)
- return render_template("test_correlation_page.html",correlation_results=correlation_results)
+ template_vars = show_corr_results.CorrelationResults(request.form)
+ return render_template("correlation_page.html", **template_vars.__dict__)
+
+ # to test the new correlation api uncomment these lines
+
+ # correlation_results = compute_correlation(request.form)
+ # print(">>>>Time taken by this endpoint",time.time()-initial_time)
+ # return render_template("test_correlation_page.html",correlation_results=correlation_results)
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
--
cgit v1.2.3
From aebbbc9240ab63f684dae2451cfc335681c049db Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 10 May 2021 08:25:21 +0300
Subject: remove debug statements
---
wqflask/wqflask/correlation/show_corr_results.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 50b3ba26..7f69807b 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -57,6 +57,7 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK]
TISSUE_MOUSE_DB = 1
+
class CorrelationResults(object):
def __init__(self, start_vars):
# get trait list from db (database name)
@@ -445,9 +446,6 @@ class CorrelationResults(object):
"""
- print("below here>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
- print(self.target_dataset.trait_data)
-
self.this_trait_vals = []
target_vals = []
for index, sample in enumerate(self.target_dataset.samplelist):
--
cgit v1.2.3
From bc29c9dd907c8c1b024231fa713040b3c4092f3a Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 10 May 2021 08:29:46 +0300
Subject: revert python path
---
bin/genenetwork2 | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index f73f235c..5f4e0f9a 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -154,8 +154,7 @@ if [ ! -d $R_LIBS_SITE ] ; then
fi
# We may change this one:
-# export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH
-PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/project/genenetwork3:$PYTHONPATH
+export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH
# Our UNIX TMPDIR defaults to /tmp - change this on a shared server
if [ -z $TMPDIR ]; then
--
cgit v1.2.3
From 93baf59aca12fb85e668315e67d36137c854f12d Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 10 May 2021 08:33:22 +0300
Subject: minor changes to interface
---
wqflask/wqflask/correlation/correlation_gn3_api.py | 60 ++++++++++++++--------
1 file changed, 38 insertions(+), 22 deletions(-)
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index d68bb604..98d52591 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -68,9 +68,9 @@ def sample_for_trait_lists(corr_results, target_dataset,
def tissue_for_trait_lists(corr_results, this_dataset, this_trait):
"""interface function for doing tissue corr_results on trait_list"""
- # trait_lists = dict([(list(corr_result)[0], True)
- # for corr_result in corr_results])
- trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results}
+ trait_lists = dict([(list(corr_result)[0], True)
+ for corr_result in corr_results])
+ # trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results}
traits_symbol_dict = this_dataset.retrieve_genes("Symbol")
traits_symbol_dict = dict({trait_name: symbol for (
trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
@@ -85,19 +85,24 @@ def tissue_for_trait_lists(corr_results, this_dataset, this_trait):
def lit_for_trait_list(corr_results, this_dataset, this_trait):
(this_trait_geneid, geneid_dict, species) = do_lit_correlation(
- this_trait, this_dataset)
+ this_trait, this_dataset)
- trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results}
+ # trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results}
+ trait_lists = dict([(list(corr_result)[0], True)
+ for corr_result in corr_results])
- geneid_dict = {trait_name: geneid for (trait_name, geneid) in geneid_dict if
- trait_lists.get(trait_name)}
+ geneid_dict = {trait_name: geneid for (trait_name, geneid) in geneid_dict.items() if
+ trait_lists.get(trait_name)}
conn, _cursor_object = database_connector()
- correlation_results = compute_all_lit_correlation(
- conn=conn, trait_lists=list(geneid_dict.items()),
- species=species, gene_id=this_trait_geneid)
- return correlation_results[0:corr_return_results]
+ with conn:
+
+ correlation_results = compute_all_lit_correlation(
+ conn=conn, trait_lists=list(geneid_dict.items()),
+ species=species, gene_id=this_trait_geneid)
+
+ return correlation_results
def compute_correlation(start_vars, method="pearson"):
@@ -129,6 +134,17 @@ def compute_correlation(start_vars, method="pearson"):
this_trait=this_trait_data,
target_dataset=results)
+ # do tissue correaltion
+
+ # code to be use later
+
+ # tissue_result = tissue_for_trait_lists(
+ # correlation_results, this_dataset, this_trait)
+ # # lit spoils the party so slow
+ # lit_result = lit_for_trait_list(
+ # correlation_results, this_dataset, this_trait)
+
+
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
@@ -153,30 +169,30 @@ def compute_correlation(start_vars, method="pearson"):
conn, _cursor_object = database_connector()
with conn:
correlation_results = compute_all_lit_correlation(
- conn = conn, trait_lists = list(geneid_dict.items()),
- species = species, gene_id = this_trait_geneid)
+ conn=conn, trait_lists=list(geneid_dict.items()),
+ species=species, gene_id=this_trait_geneid)
return correlation_results[0:corr_return_results]
def do_lit_correlation(this_trait, this_dataset):
"""function for fetching lit inputs"""
- geneid_dict=this_dataset.retrieve_genes("GeneId")
- species=this_dataset.group.species.lower()
- trait_geneid=this_trait.geneid
+ geneid_dict = this_dataset.retrieve_genes("GeneId")
+ species = this_dataset.group.species.lower()
+ trait_geneid = this_trait.geneid
return (trait_geneid, geneid_dict, species)
def get_tissue_correlation_input(this_trait, trait_symbol_dict):
"""Gets tissue expression values for the primary trait and target tissues values"""
- primary_trait_tissue_vals_dict=correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list = [this_trait.symbol])
+ primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=[this_trait.symbol])
if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
- primary_trait_tissue_values=primary_trait_tissue_vals_dict[this_trait.symbol.lower(
+ primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
- corr_result_tissue_vals_dict=correlation_functions.get_trait_symbol_and_tissue_values(
- symbol_list = list(trait_symbol_dict.values()))
- primary_tissue_data={
+ corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
+ symbol_list=list(trait_symbol_dict.values()))
+ primary_tissue_data = {
"this_id": this_trait.name,
"tissue_values": primary_trait_tissue_values
--
cgit v1.2.3
From 0b723720f7b1b9802b2f5453b747c7e48b693817 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 10 May 2021 08:34:10 +0300
Subject: use new correlation code in endpoint
---
wqflask/wqflask/views.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 19779651..a470a0c9 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -881,14 +881,14 @@ def network_graph_page():
def corr_compute_page():
logger.info("In corr_compute, request.form is:", pf(request.form))
logger.info(request.url)
- template_vars = show_corr_results.CorrelationResults(request.form)
- return render_template("correlation_page.html", **template_vars.__dict__)
+ # template_vars = show_corr_results.CorrelationResults(request.form)
+ # return render_template("correlation_page.html", **template_vars.__dict__)
# to test the new correlation api uncomment these lines
- # correlation_results = compute_correlation(request.form)
+ correlation_results = compute_correlation(request.form)
# print(">>>>Time taken by this endpoint",time.time()-initial_time)
- # return render_template("test_correlation_page.html",correlation_results=correlation_results)
+ return render_template("test_correlation_page.html",correlation_results=correlation_results)
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
--
cgit v1.2.3
From 848fc236461384e5352798e19d710480ead379c4 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Mon, 10 May 2021 08:50:20 +0300
Subject: minor-fix:remove debug statements
---
wqflask/wqflask/views.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 317c5f63..c2e403a0 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -951,10 +951,9 @@ def corr_compute_page():
# template_vars = show_corr_results.CorrelationResults(request.form)
# return render_template("correlation_page.html", **template_vars.__dict__)
- # to test the new correlation api uncomment these lines
+ # to test/disable the new correlation api uncomment these lines
correlation_results = compute_correlation(request.form)
- # print(">>>>Time taken by this endpoint",time.time()-initial_time)
return render_template("test_correlation_page.html",correlation_results=correlation_results)
--
cgit v1.2.3
From 56fc1a2a53496a8b3720515f61e54a74cc95821e Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Tue, 11 May 2021 00:29:33 +0300
Subject: modify js & add button for test correlation
---
wqflask/wqflask/static/new/javascript/show_trait.js | 10 ++++++++++
.../templates/show_trait_calculate_correlations.html | 3 +++
wqflask/wqflask/views.py | 15 ++++++++++-----
3 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js
index 6e9d68c4..569046d3 100644
--- a/wqflask/wqflask/static/new/javascript/show_trait.js
+++ b/wqflask/wqflask/static/new/javascript/show_trait.js
@@ -585,6 +585,16 @@ get_table_contents_for_form_submit = function(form_id) {
var corr_input_list = ['sample_vals', 'corr_type', 'primary_samples', 'trait_id', 'dataset', 'group', 'tool_used', 'form_url', 'corr_sample_method', 'corr_samples_group', 'corr_dataset', 'min_expr',
'corr_return_results', 'location_type', 'loc_chr', 'min_loc_mb', 'max_loc_mb', 'p_range_lower', 'p_range_upper']
+$(".test_corr_compute").on("click", (function(_this) {
+ return function() {
+ $('input[name=tool_used]').val("Correlation");
+ $('input[name=form_url]').val("/test_corr_compute");
+ $('input[name=wanted_inputs]').val(corr_input_list.join(","));
+ url = "/loading";
+ return submit_special(url);
+ };
+})(this));
+
$(".corr_compute").on("click", (function(_this) {
return function() {
$('input[name=tool_used]').val("Correlation");
diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
index 50803978..ef784c84 100644
--- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html
+++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html
@@ -117,6 +117,9 @@
+
+
+
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index c2e403a0..f3b0257c 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -705,7 +705,7 @@ def mapping_results_container_page():
@app.route("/loading", methods=('POST',))
def loading_page():
- logger.info(request.url)
+ # logger.info(request.url)
initial_start_vars = request.form
start_vars_container = {}
n_samples = 0 # ZS: So it can be displayed on loading page
@@ -948,15 +948,20 @@ def network_graph_page():
def corr_compute_page():
logger.info("In corr_compute, request.form is:", pf(request.form))
logger.info(request.url)
- # template_vars = show_corr_results.CorrelationResults(request.form)
- # return render_template("correlation_page.html", **template_vars.__dict__)
+ template_vars = show_corr_results.CorrelationResults(request.form)
+ return render_template("correlation_page.html", **template_vars.__dict__)
# to test/disable the new correlation api uncomment these lines
- correlation_results = compute_correlation(request.form)
- return render_template("test_correlation_page.html",correlation_results=correlation_results)
+ # correlation_results = compute_correlation(request.form)
+ # return render_template("test_correlation_page.html", correlation_results=correlation_results)
+@app.route("/test_corr_compute", methods=["POST"])
+def test_corr_compute_page():
+ correlation_results = compute_correlation(request.form)
+ return render_template("test_correlation_page.html", correlation_results=correlation_results)
+
@app.route("/corr_matrix", methods=('POST',))
def corr_matrix_page():
logger.info("In corr_matrix, request.form is:", pf(request.form))
--
cgit v1.2.3
From e159fb1e0d9a9874be9a3475dd4a1a055d9204ad Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 04:33:55 +0300
Subject: replace datatable scroller
---
wqflask/wqflask/templates/mapping_results.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/templates/mapping_results.html b/wqflask/wqflask/templates/mapping_results.html
index 73d7501b..d6fc6e37 100644
--- a/wqflask/wqflask/templates/mapping_results.html
+++ b/wqflask/wqflask/templates/mapping_results.html
@@ -357,7 +357,7 @@
{% endif %}
-
+
--
cgit v1.2.3
From 92afd94c14891bec381b6de5ecf9926032bab908 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 04:37:11 +0300
Subject: replace datatable scroller cdn
---
wqflask/wqflask/templates/show_trait.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html
index 09ecb7b6..fc14822c 100644
--- a/wqflask/wqflask/templates/show_trait.html
+++ b/wqflask/wqflask/templates/show_trait.html
@@ -148,7 +148,7 @@
-
+
--
cgit v1.2.3
From 4f826611242080089856ccb4e3a7cda398e57b0d Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Thu, 11 Feb 2021 04:46:20 +0300
Subject: replace font-awesome
---
wqflask/wqflask/templates/correlation_page.html | 4 ++--
wqflask/wqflask/templates/search_result_page.html | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html
index a9a3e1a0..4cad2749 100644
--- a/wqflask/wqflask/templates/correlation_page.html
+++ b/wqflask/wqflask/templates/correlation_page.html
@@ -4,7 +4,7 @@
-
+
{% endblock %}
@@ -141,7 +141,7 @@
-
+
diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html
index e7a7bc51..827bad98 100644
--- a/wqflask/wqflask/templates/search_result_page.html
+++ b/wqflask/wqflask/templates/search_result_page.html
@@ -4,7 +4,7 @@
-
+
{% endblock %}
@@ -150,7 +150,7 @@
-
+
--
cgit v1.2.3
From 45fc1da136b78bac906aad013686a9530f68bd5e Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 10:33:49 +0300
Subject: doc: docker-container: Default to python3-genenetwork2 for examples
---
doc/docker-container.org | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/doc/docker-container.org b/doc/docker-container.org
index ec91824a..c894c4b5 100644
--- a/doc/docker-container.org
+++ b/doc/docker-container.org
@@ -59,11 +59,11 @@ RUN tar -xzf /tmp/gn2.tar.gz -C / && rm -f /tmp/gn2.tar.gz && \
Build the image(Note the fullstop at the end):
-: sudo docker build -t python2-genenetwork2:latest -f Dockerfile .
+: sudo docker build -t python3-genenetwork2:latest -f Dockerfile .
To load the image interactively you've just created:
-: docker run -ti "python2-genenetwork2:latest" bash
+: docker run -ti "python3-genenetwork2:latest" bash
Assuming you have a docker instance running, you could always run
commands in it e.g:
@@ -78,7 +78,7 @@ CI environment using Github Actions.
To push to dockerhub, first get the image name by running =docker
images=. Push to dockerhub using a command similar to:
-: docker push bonfacekilz/python2-genenetwork2:latest
+: docker push bonfacekilz/python3-genenetwork2:latest
Right now, we have 2 images on DockerHub:
--
cgit v1.2.3
From a3ea9cb840bc8dbc9e5e0940ec72a36712e13a2a Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 10:47:51 +0300
Subject: workflows: main.yml: Update container image
---
.github/workflows/main.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a36abc0a..64c0fdb2 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -11,7 +11,7 @@ on:
jobs:
unittest:
runs-on: ubuntu-latest
- container: bonfacekilz/python3-genenetwork2:ad741c1
+ container: bonfacekilz/python3-genenetwork2:00ba1f8
steps:
# First start with mariadb set then checkout. The checkout gives
--
cgit v1.2.3
From 5933991c3a82e847ebf0a86583aa5461d8c3f937 Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 11:06:22 +0300
Subject: links_scraper: genelinks: Remove unused import
---
test/requests/links_scraper/genelinks.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 12300f4a..13aee7c8 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -2,7 +2,6 @@ import re
import requests
import urllib3
import os
-import logging
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
--
cgit v1.2.3
From d32b7f3a877f33ca90af2a4206d8e0d5e04c36de Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 11:08:57 +0300
Subject: links_scraper: genelinks: Apply pep-8 formatting
---
test/requests/links_scraper/genelinks.py | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py
index 13aee7c8..52c13489 100644
--- a/test/requests/links_scraper/genelinks.py
+++ b/test/requests/links_scraper/genelinks.py
@@ -25,7 +25,6 @@ def search_templates():
parsed_page = soup(
open(file_path, encoding="utf8"), "html.parser")
html_parsed_pages.append(parsed_page)
-
return html_parsed_pages
@@ -33,7 +32,7 @@ def is_valid_link(url_link):
try:
result = urlparse(url_link)
return all([result.scheme, result.netloc, result.path])
- except Exception as e:
+ except Exception:
return False
@@ -41,13 +40,10 @@ def test_link(link):
print(f'Checking -->{link}')
results = None
try:
-
results = requests.get(link, verify=False, timeout=10)
status_code = results.status_code
-
- except Exception as e:
+ except Exception:
status_code = 408
-
return int(status_code) > 403
@@ -55,14 +51,11 @@ def fetch_css_links(parsed_page):
print("fetching css links")
for link in parsed_page.findAll("link"):
full_path = None
-
link_url = link.attrs.get("href")
if is_valid_link(link_url):
full_path = link_url
-
elif re.match(r"^/css", link_url) or re.match(r"^/js", link_url):
full_path = urljoin('http://localhost:5004/', link_url)
-
if full_path is not None:
if test_link(full_path):
BROKEN_LINKS.add(full_path)
@@ -70,16 +63,13 @@ def fetch_css_links(parsed_page):
def fetch_html_links(parsed_page):
print("fetching a tags ")
-
for link in parsed_page.findAll("a"):
full_path = None
link_url = link.attrs.get("href")
if re.match(r"^/", link_url):
full_path = urljoin('http://localhost:5004/', link_url)
-
elif is_valid_link(link_url):
full_path = link_url
-
if full_path is not None:
if test_link(full_path):
BROKEN_LINKS.add(full_path)
@@ -91,8 +81,11 @@ def fetch_script_tags(parsed_page):
js_link = link.attrs.get("src")
if js_link is not None:
if is_valid_link(js_link):
- raise SystemExit("Failed,the library should be packaged in guix.\
- Please contact,http://genenetwork.org/ for more details")
+ raise SystemExit("Failed,the library should be "
+ "packaged in guix. "
+ "Please contact, "
+ "http://genenetwork.org/ "
+ "for more details")
elif re.match(r"^/css", js_link) or re.match(r"^/js", js_link):
full_path = urljoin('http://localhost:5004/', js_link)
@@ -101,11 +94,9 @@ def fetch_script_tags(parsed_page):
def fetch_page_links(page_url):
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
html_page = uReq(page_url)
parsed_page = soup(html_page, "html.parser")
-
fetch_script_tags(parsed_page=parsed_page)
fetch_css_links(parsed_page=parsed_page)
fetch_html_links(parsed_page=parsed_page)
@@ -113,13 +104,10 @@ def fetch_page_links(page_url):
def webpages_to_check():
pages = [f"http://localhost:{PORT}/"]
-
return pages
if __name__ == '__main__':
- # results = search_templates()
-
for page in webpages_to_check():
fetch_page_links(page)
if len(BROKEN_LINKS) > 0:
@@ -129,4 +117,5 @@ if __name__ == '__main__':
if len(BROKEN_LINKS) > 0:
raise SystemExit(
- "The links Above are broken.Please contact genenetwork.org<<<<<<<<")
+ "The links Above are broken. "
+ "Please contact genenetwork.org<<<<<<<<")
--
cgit v1.2.3
From 5e3e8e5847fe994652e3c6675ce96312fd9dc16a Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 11:09:36 +0300
Subject: workflows: main.yml: Remove hard-coded path
---
.github/workflows/main.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 64c0fdb2..2b7c3b16 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -57,10 +57,10 @@ jobs:
- name: Test for Broken Links
run: |
-
env GN2_PROFILE=/gn2-profile \
TMPDIR=/tmp\
WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
- etc/default_settings.py -c /__w/genenetwork2/genenetwork2/test/requests/links_scraper/genelinks.py
+ etc/default_settings.py -c \
+ $PWD/test/requests/links_scraper/genelinks.py
--
cgit v1.2.3
From 232148b1304ae531df6d9157f6d574d5c944830e Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 11:30:53 +0300
Subject: workflows: main.yml: Use screen to run gn2
---
.github/workflows/main.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 2b7c3b16..f9a3fa13 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -49,11 +49,11 @@ jobs:
- name: Start Genenetwork as a Background Task
run: |
- env GN2_PROFILE=/gn2-profile \
+ /gn2-profile/bin/screen -dmL bash -c "env GN2_PROFILE=/gn2-profile \
TMPDIR=/tmp SERVER_PORT=5004 \
WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
- etc/default_settings.py&
+ etc/default_settings.py"
- name: Test for Broken Links
run: |
--
cgit v1.2.3
From 4538c0ad693ae94ebda5f0bf39678d776c0c8297 Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 11:47:20 +0300
Subject: workflows: main.yml: Set up gn2 before running the unit tests
---
.github/workflows/main.yml | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index f9a3fa13..b09f2f1d 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -39,21 +39,21 @@ jobs:
mysql -u root -e "CREATE USER 'gn2'@'localhost' IDENTIFIED BY 'mysql_password';"
mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'gn2'@'localhost';FLUSH PRIVILEGES;"
- - name: Run the unit tests
+ - name: Start Genenetwork as a Background Task
run: |
- env GN2_PROFILE=/gn2-profile \
+ /gn2-profile/bin/screen -dmL bash -c "env GN2_PROFILE=/gn2-profile \
TMPDIR=/tmp SERVER_PORT=5004 \
WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
- etc/default_settings.py -c -m unittest discover -v
+ etc/default_settings.py"
- - name: Start Genenetwork as a Background Task
+ - name: Run the unit tests
run: |
- /gn2-profile/bin/screen -dmL bash -c "env GN2_PROFILE=/gn2-profile \
+ env GN2_PROFILE=/gn2-profile \
TMPDIR=/tmp SERVER_PORT=5004 \
WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
- etc/default_settings.py"
+ etc/default_settings.py -c -m unittest discover -v
- name: Test for Broken Links
run: |
--
cgit v1.2.3
From 75b378f757979af1d6fdaff3f3abde0599c5f744 Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 12:10:37 +0300
Subject: workflows: main.yml: Use mariadb as a service
---
.github/workflows/main.yml | 30 +++++++++++++-----------------
1 file changed, 13 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b09f2f1d..a2925e08 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -12,16 +12,19 @@ jobs:
unittest:
runs-on: ubuntu-latest
container: bonfacekilz/python3-genenetwork2:00ba1f8
+ services:
+ mysql:
+ image: mariadb:15.1
+ env:
+ MYSQL_ALLOW_EMPTY_PASSWORD: yes
+ MYSQL_USER: gn2
+ MYSQL_PASSWORD: mysql_password
+ MYSQL_DATABASE: db_webqtl_s
+ ports:
+ - 3306:3306
+ options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
steps:
- # First start with mariadb set then checkout. The checkout gives
- # the mysqld enough time to start
- - name: Set up mariadb
- run: |
- mysql_install_db --user=mysql --datadir=/usr/local/mysql
- # Wait for the mysqld_safe process to start
- mysqld_safe --user=mysql --datadir=/usr/local/mysql &
-
# Use v1 of checkout since v2 fails
- name: Checkout Project
uses: actions/checkout@v1
@@ -31,17 +34,10 @@ jobs:
run: |
/gn2-profile/bin/screen -dmLS redisconn /gn2-profile/bin/redis-server
- # Redis is required by some of the tests 6379
- - name: Bootstrap tables
- run: |
- mysql -u root -e "SHOW DATABASES;"
- mysql -u root -e "CREATE DATABASE db_webqtl_s;"
- mysql -u root -e "CREATE USER 'gn2'@'localhost' IDENTIFIED BY 'mysql_password';"
- mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'gn2'@'localhost';FLUSH PRIVILEGES;"
-
- name: Start Genenetwork as a Background Task
run: |
- /gn2-profile/bin/screen -dmL bash -c "env GN2_PROFILE=/gn2-profile \
+ /gn2-profile/bin/screen -dmLS gn2conn
+ bash -c "env GN2_PROFILE=/gn2-profile \
TMPDIR=/tmp SERVER_PORT=5004 \
WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
--
cgit v1.2.3
From c4ec08c1b544c05cce1c1247b8bc1afb741643e4 Mon Sep 17 00:00:00 2001
From: BonfaceKilz
Date: Tue, 11 May 2021 13:38:47 +0300
Subject: workflows: main.yml: Remove mysql service and update gn2 b/g task
---
.github/workflows/main.yml | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a2925e08..c6b56a81 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -12,19 +12,16 @@ jobs:
unittest:
runs-on: ubuntu-latest
container: bonfacekilz/python3-genenetwork2:00ba1f8
- services:
- mysql:
- image: mariadb:15.1
- env:
- MYSQL_ALLOW_EMPTY_PASSWORD: yes
- MYSQL_USER: gn2
- MYSQL_PASSWORD: mysql_password
- MYSQL_DATABASE: db_webqtl_s
- ports:
- - 3306:3306
- options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
steps:
+ # First start with mariadb set then checkout. The checkout gives
+ # the mysqld enough time to start
+ - name: Set up mariadb
+ run: |
+ mysql_install_db --user=mysql --datadir=/usr/local/mysql
+ # Wait for the mysqld_safe process to start
+ mysqld_safe --user=mysql --datadir=/usr/local/mysql &
+
# Use v1 of checkout since v2 fails
- name: Checkout Project
uses: actions/checkout@v1
@@ -34,10 +31,17 @@ jobs:
run: |
/gn2-profile/bin/screen -dmLS redisconn /gn2-profile/bin/redis-server
+ # Initialise the tables
+ - name: Bootstrap tables
+ run: |
+ mysql -u root -e "SHOW DATABASES;"
+ mysql -u root -e "CREATE DATABASE db_webqtl_s;"
+ mysql -u root -e "CREATE USER 'gn2'@'localhost' IDENTIFIED BY 'mysql_password';"
+ mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'gn2'@'localhost';FLUSH PRIVILEGES;"
+
- name: Start Genenetwork as a Background Task
run: |
- /gn2-profile/bin/screen -dmLS gn2conn
- bash -c "env GN2_PROFILE=/gn2-profile \
+ /gn2-profile/bin/screen -dm bash -c "env GN2_PROFILE=/gn2-profile \
TMPDIR=/tmp SERVER_PORT=5004 \
WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \
GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
@@ -59,4 +63,3 @@ jobs:
GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \
etc/default_settings.py -c \
$PWD/test/requests/links_scraper/genelinks.py
-
--
cgit v1.2.3