about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlexander Kabui2021-04-26 15:42:07 +0300
committerAlexander Kabui2021-04-26 15:42:07 +0300
commit7556f8a5dfc4c98bc0f0c8241592acec22b65102 (patch)
tree92979ae1baf2d608e871156e00ec301f690bd139
parent1b0566d7c9779b979d20c350f66d5628fb55eba6 (diff)
downloadgenenetwork2-7556f8a5dfc4c98bc0f0c8241592acec22b65102.tar.gz
test for probe-type sample and tissue
-rw-r--r--wqflask/wqflask/correlation/correlation_gn3_api.py71
1 files changed, 70 insertions, 1 deletions
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 51bf5fb5..c945f699 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -52,8 +52,64 @@ def create_target_this_trait(start_vars):
     return (this_dataset, this_trait, target_dataset, sample_data)
 
 
+def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars):
+    sample_data = process_samples(
+        start_vars, this_dataset.group.samplelist)
+    target_dataset.get_trait_data(list(sample_data.keys()))
+
+    this_trait = retrieve_sample_data(this_trait, this_dataset)
+
+    this_trait_data = {
+        "trait_sample_data": sample_data,
+        "trait_id": start_vars["trait_id"]
+    }
+    # trait_lists = dict([(list(corr_result)[0],True) for corr_result in corr_results])
+    # target_dataset.trait_data =list(filter(lambda dict_obj: dict_obj.keys()[
+    #                  0] in corr_results_traits, target_dataset_data))
+    results = map_shared_keys_to_values(
+        target_dataset.samplelist, target_dataset.trait_data)
+    correlation_results = compute_all_sample_correlation(corr_method="pearson",
+                                                         this_trait=this_trait_data,
+                                                         target_dataset=results)
+
+
+    return correlation_results
+
+
+def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait):
+    # # print(corr_results[0])--
+    # [{"awsdsd_at": {'corr_coeffient': 0.49714692782257336, 'p_value': 1.872077762359228e-05, 'num_overlap': 67}}]
+
+    print("creating trait_lists")
+    # corr_results = corr_results[0::]
+    trait_lists = dict([(list(corr_result)[0], True)
+                        for corr_result in corr_results])
+    print("finished creating trait_list")
+
+    traits_symbol_dict = this_dataset.retrieve_genes("Symbol")
+    print("Retrieved symbol dict")
+    print("creating dict here>>>>>>>>>")
+    import time
+    init_time = time.time()
+    traits_symbol_dict = dict({trait_name: symbol for (
+        trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
+    print("time taken to create this max dict is>>>>", time.time()-init_time)
+    print("finished creatinf the dict")
+    print("Fetching tissue datas")
+    primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
+        this_trait, traits_symbol_dict)
+    print("finihsed>>>>>>>>>>>>>>>>>>")
+    print("Calling experimental_compute_all_tissue_correlation")
+    corr_results = experimental_compute_all_tissue_correlation(
+        primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson")
+    # print('finished calling this tissue reuslts',corr_results)
+
+    return corr_results
+
+
 def compute_correlation(start_vars, method="pearson"):
     """compute correlation for to call gn3  api"""
+    import time
 
     corr_type = start_vars['corr_type']
 
@@ -67,6 +123,7 @@ def compute_correlation(start_vars, method="pearson"):
     corr_input_data = {}
 
     if corr_type == "sample":
+        import time
         initial_time = time.time()
         # corr_input_data = {
         #     "target_dataset": target_dataset.trait_data,
@@ -78,7 +135,7 @@ def compute_correlation(start_vars, method="pearson"):
         # }
         sample_data = process_samples(
             start_vars, this_dataset.group.samplelist)
-        target_dataset.fetch_probe_trait_data(list(sample_data.keys()))
+        target_dataset.get_trait_data(list(sample_data.keys()))
         this_trait = retrieve_sample_data(this_trait, this_dataset)
 
         print("Creating dataset and trait took", time.time()-initial_time)
@@ -94,8 +151,15 @@ def compute_correlation(start_vars, method="pearson"):
                                                              this_trait=this_trait_data,
                                                              target_dataset=results)
 
+        print("computedd>>>>>>>>>>>>>")
+
         print("doing sample correlation took", time.time()-initial_time)
 
+        other_results_time = time.time()
+        other_results = tissue_for_trait_lists(
+            correlation_results, this_dataset, target_dataset, this_trait)
+        print(">>>time taken for this is", time.time()-other_results_time)
+
         # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
         return correlation_results
 
@@ -121,6 +185,9 @@ def compute_correlation(start_vars, method="pearson"):
         # print("time taken for compute tissue is", time.time()-initial_time)
 
         # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
+
+        sample_results = sample_for_trait_lists(
+            correlation_results, target_dataset, this_trait, this_dataset, start_vars)
         return correlation_results
 
     elif corr_type == "lit":
@@ -148,6 +215,8 @@ def compute_correlation(start_vars, method="pearson"):
 
 def do_lit_correlation(this_trait, this_dataset, target_dataset):
     geneid_dict = this_dataset.retrieve_genes("GeneId")
+    #
+    print("CALLING THE LIT CORRELATION HERE")
     species = this_dataset.group.species.lower()
 
     this_trait_geneid = this_trait.geneid