From 7556f8a5dfc4c98bc0f0c8241592acec22b65102 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 26 Apr 2021 15:42:07 +0300 Subject: test for probe-type sample and tissue --- wqflask/wqflask/correlation/correlation_gn3_api.py | 71 +++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 51bf5fb5..c945f699 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -52,8 +52,64 @@ def create_target_this_trait(start_vars): return (this_dataset, this_trait, target_dataset, sample_data) +def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars): + sample_data = process_samples( + start_vars, this_dataset.group.samplelist) + target_dataset.get_trait_data(list(sample_data.keys())) + + this_trait = retrieve_sample_data(this_trait, this_dataset) + + this_trait_data = { + "trait_sample_data": sample_data, + "trait_id": start_vars["trait_id"] + } + # trait_lists = dict([(list(corr_result)[0],True) for corr_result in corr_results]) + # target_dataset.trait_data =list(filter(lambda dict_obj: dict_obj.keys()[ + # 0] in corr_results_traits, target_dataset_data)) + results = map_shared_keys_to_values( + target_dataset.samplelist, target_dataset.trait_data) + correlation_results = compute_all_sample_correlation(corr_method="pearson", + this_trait=this_trait_data, + target_dataset=results) + + + return correlation_results + + +def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait): + # # print(corr_results[0])-- + # [{"awsdsd_at": {'corr_coeffient': 0.49714692782257336, 'p_value': 1.872077762359228e-05, 'num_overlap': 67}}] + + print("creating trait_lists") + # corr_results = corr_results[0::] + trait_lists = dict([(list(corr_result)[0], True) + for corr_result in corr_results]) + print("finished creating trait_list") + + traits_symbol_dict = this_dataset.retrieve_genes("Symbol") + print("Retrieved symbol dict") + print("creating dict here>>>>>>>>>") + import time + init_time = time.time() + traits_symbol_dict = dict({trait_name: symbol for ( + trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)}) + print("time taken to create this max dict is>>>>", time.time()-init_time) + print("finished creatinf the dict") + print("Fetching tissue datas") + primary_tissue_data, target_tissue_data = get_tissue_correlation_input( + this_trait, traits_symbol_dict) + print("finihsed>>>>>>>>>>>>>>>>>>") + print("Calling experimental_compute_all_tissue_correlation") + corr_results = experimental_compute_all_tissue_correlation( + primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson") + # print('finished calling this tissue reuslts',corr_results) + + return corr_results + + def compute_correlation(start_vars, method="pearson"): """compute correlation for to call gn3 api""" + import time corr_type = start_vars['corr_type'] @@ -67,6 +123,7 @@ def compute_correlation(start_vars, method="pearson"): corr_input_data = {} if corr_type == "sample": + import time initial_time = time.time() # corr_input_data = { # "target_dataset": target_dataset.trait_data, @@ -78,7 +135,7 @@ def compute_correlation(start_vars, method="pearson"): # } sample_data = process_samples( start_vars, this_dataset.group.samplelist) - target_dataset.fetch_probe_trait_data(list(sample_data.keys())) + target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) print("Creating dataset and trait took", time.time()-initial_time) @@ -94,8 +151,15 @@ def compute_correlation(start_vars, method="pearson"): this_trait=this_trait_data, target_dataset=results) + print("computedd>>>>>>>>>>>>>") + print("doing sample correlation took", time.time()-initial_time) + other_results_time = time.time() + other_results = tissue_for_trait_lists( + correlation_results, this_dataset, target_dataset, this_trait) + print(">>>time taken for this is", time.time()-other_results_time) + # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" return correlation_results @@ -121,6 +185,9 @@ def compute_correlation(start_vars, method="pearson"): # print("time taken for compute tissue is", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" + + sample_results = sample_for_trait_lists( + correlation_results, target_dataset, this_trait, this_dataset, start_vars) return correlation_results elif corr_type == "lit": @@ -148,6 +215,8 @@ def compute_correlation(start_vars, method="pearson"): def do_lit_correlation(this_trait, this_dataset, target_dataset): geneid_dict = this_dataset.retrieve_genes("GeneId") + # + print("CALLING THE LIT CORRELATION HERE") species = this_dataset.group.species.lower() this_trait_geneid = this_trait.geneid -- cgit v1.2.3