aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/correlation/correlation_gn3_api.py71
1 files changed, 70 insertions, 1 deletions
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 51bf5fb5..c945f699 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -52,8 +52,64 @@ def create_target_this_trait(start_vars):
return (this_dataset, this_trait, target_dataset, sample_data)
+def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars):
+ sample_data = process_samples(
+ start_vars, this_dataset.group.samplelist)
+ target_dataset.get_trait_data(list(sample_data.keys()))
+
+ this_trait = retrieve_sample_data(this_trait, this_dataset)
+
+ this_trait_data = {
+ "trait_sample_data": sample_data,
+ "trait_id": start_vars["trait_id"]
+ }
+ # trait_lists = dict([(list(corr_result)[0],True) for corr_result in corr_results])
+ # target_dataset.trait_data =list(filter(lambda dict_obj: dict_obj.keys()[
+ # 0] in corr_results_traits, target_dataset_data))
+ results = map_shared_keys_to_values(
+ target_dataset.samplelist, target_dataset.trait_data)
+ correlation_results = compute_all_sample_correlation(corr_method="pearson",
+ this_trait=this_trait_data,
+ target_dataset=results)
+
+
+ return correlation_results
+
+
+def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait):
+ # # print(corr_results[0])--
+ # [{"awsdsd_at": {'corr_coeffient': 0.49714692782257336, 'p_value': 1.872077762359228e-05, 'num_overlap': 67}}]
+
+ print("creating trait_lists")
+ # corr_results = corr_results[0::]
+ trait_lists = dict([(list(corr_result)[0], True)
+ for corr_result in corr_results])
+ print("finished creating trait_list")
+
+ traits_symbol_dict = this_dataset.retrieve_genes("Symbol")
+ print("Retrieved symbol dict")
+ print("creating dict here>>>>>>>>>")
+ import time
+ init_time = time.time()
+ traits_symbol_dict = dict({trait_name: symbol for (
+ trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
+ print("time taken to create this max dict is>>>>", time.time()-init_time)
+ print("finished creatinf the dict")
+ print("Fetching tissue datas")
+ primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
+ this_trait, traits_symbol_dict)
+ print("finihsed>>>>>>>>>>>>>>>>>>")
+ print("Calling experimental_compute_all_tissue_correlation")
+ corr_results = experimental_compute_all_tissue_correlation(
+ primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson")
+ # print('finished calling this tissue reuslts',corr_results)
+
+ return corr_results
+
+
def compute_correlation(start_vars, method="pearson"):
"""compute correlation for to call gn3 api"""
+ import time
corr_type = start_vars['corr_type']
@@ -67,6 +123,7 @@ def compute_correlation(start_vars, method="pearson"):
corr_input_data = {}
if corr_type == "sample":
+ import time
initial_time = time.time()
# corr_input_data = {
# "target_dataset": target_dataset.trait_data,
@@ -78,7 +135,7 @@ def compute_correlation(start_vars, method="pearson"):
# }
sample_data = process_samples(
start_vars, this_dataset.group.samplelist)
- target_dataset.fetch_probe_trait_data(list(sample_data.keys()))
+ target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
print("Creating dataset and trait took", time.time()-initial_time)
@@ -94,8 +151,15 @@ def compute_correlation(start_vars, method="pearson"):
this_trait=this_trait_data,
target_dataset=results)
+ print("computedd>>>>>>>>>>>>>")
+
print("doing sample correlation took", time.time()-initial_time)
+ other_results_time = time.time()
+ other_results = tissue_for_trait_lists(
+ correlation_results, this_dataset, target_dataset, this_trait)
+ print(">>>time taken for this is", time.time()-other_results_time)
+
# requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
return correlation_results
@@ -121,6 +185,9 @@ def compute_correlation(start_vars, method="pearson"):
# print("time taken for compute tissue is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+
+ sample_results = sample_for_trait_lists(
+ correlation_results, target_dataset, this_trait, this_dataset, start_vars)
return correlation_results
elif corr_type == "lit":
@@ -148,6 +215,8 @@ def compute_correlation(start_vars, method="pearson"):
def do_lit_correlation(this_trait, this_dataset, target_dataset):
geneid_dict = this_dataset.retrieve_genes("GeneId")
+ #
+ print("CALLING THE LIT CORRELATION HERE")
species = this_dataset.group.species.lower()
this_trait_geneid = this_trait.geneid