about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlexander Kabui2021-05-01 03:24:05 +0300
committerAlexander Kabui2021-05-01 03:24:05 +0300
commit149f9c7c6804d4e717ed9aa3a42968b295693b3d (patch)
tree6e666ad6e6c08dfb43f7c40adfa722892491459c
parent02916a787b384709d96eebfaefd4898cae415739 (diff)
downloadgenenetwork2-149f9c7c6804d4e717ed9aa3a42968b295693b3d.tar.gz
autopep8 for file
-rw-r--r--wqflask/wqflask/correlation/correlation_gn3_api.py126
1 files changed, 45 insertions, 81 deletions
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 3c21a850..b56c09d8 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,61 +1,55 @@
 """module that calls the gn3 api's to do the correlation """
 import json
-import requests
-import time
+
 from wqflask.correlation import correlation_functions
 
 from base import data_set
+
 from base.trait import create_trait
 from base.trait import retrieve_sample_data
-# gn3 lib
+
 from gn3.computations.correlations import compute_all_sample_correlation
 from gn3.computations.correlations import map_shared_keys_to_values
-from gn3.computations.correlations import compute_all_tissue_correlation
 from gn3.computations.correlations import compute_all_lit_correlation
 from gn3.computations.correlations import experimental_compute_all_tissue_correlation
 from gn3.db_utils import database_connector
 
-GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
+
+def create_target_this_trait(start_vars):
+    """this function creates the required trait and target dataset for correlation"""
+
+    this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
+    target_dataset = data_set.create_dataset(
+        dataset_name=start_vars['corr_dataset'])
+    this_trait = create_trait(dataset=this_dataset,
+                              name=start_vars['trait_id'])
+    sample_data = ()
+    return (this_dataset, this_trait, target_dataset, sample_data)
 
 
 def process_samples(start_vars, sample_names, excluded_samples=None):
-    """process samples method"""
+    """process samples"""
     sample_data = {}
     if not excluded_samples:
         excluded_samples = ()
-
         sample_vals_dict = json.loads(start_vars["sample_vals"])
-
         for sample in sample_names:
             if sample not in excluded_samples:
                 val = sample_vals_dict[sample]
                 if not val.strip().lower() == "x":
                     sample_data[str(sample)] = float(val)
-
     return sample_data
 
 
-def create_target_this_trait(start_vars):
-    """this function creates the required trait and target dataset for correlation"""
-
-    this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
-    target_dataset = data_set.create_dataset(
-        dataset_name=start_vars['corr_dataset'])
+def sample_for_trait_lists(corr_results, target_dataset,
+                           this_trait, this_dataset, start_vars):
+    """interface function for correlation on top results"""
 
-    this_trait = create_trait(dataset=this_dataset,
-                              name=start_vars['trait_id'])
-
-    # target_dataset.get_trait_data(list(self.sample_data.keys()))
-
-    # this_trait = retrieve_sample_data(this_trait, this_dataset)
-    sample_data = ()
-    return (this_dataset, this_trait, target_dataset, sample_data)
-
-
-def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars):
     sample_data = process_samples(
         start_vars, this_dataset.group.samplelist)
     target_dataset.get_trait_data(list(sample_data.keys()))
+    # should filter target traits from here
+    _corr_results = corr_results
 
     this_trait = retrieve_sample_data(this_trait, this_dataset)
 
@@ -69,65 +63,55 @@ def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_datase
                                                          this_trait=this_trait_data,
                                                          target_dataset=results)
 
-
     return correlation_results
 
 
-def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait):
-    trait_lists = dict([(list(corr_result)[0], True)
-                        for corr_result in corr_results])
+def tissue_for_trait_lists(corr_results, this_dataset, this_trait):
+    """interface function for doing tissue corr_results on trait_list"""
+    # trait_lists = dict([(list(corr_result)[0], True)
+    #                     for corr_result in corr_results])
+    trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results}
     traits_symbol_dict = this_dataset.retrieve_genes("Symbol")
     traits_symbol_dict = dict({trait_name: symbol for (
         trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
     primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
         this_trait, traits_symbol_dict)
     corr_results = experimental_compute_all_tissue_correlation(
-        primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson")
+        primary_tissue_dict=primary_tissue_data,
+        target_tissues_data=target_tissue_data,
+        corr_method="pearson")
     return corr_results
 
 
 def compute_correlation(start_vars, method="pearson"):
     """compute correlation for to call gn3  api"""
-    import time
+    # pylint: disable-msg=too-many-locals
 
     corr_type = start_vars['corr_type']
 
     (this_dataset, this_trait, target_dataset,
      sample_data) = create_target_this_trait(start_vars)
 
-    # cor_results = compute_correlation(start_vars)
-
     method = start_vars['corr_sample_method']
-
+    _corr_return_results = start_vars.get("corr_return_results", 100)
     corr_input_data = {}
 
     if corr_type == "sample":
-        
+
         sample_data = process_samples(
             start_vars, this_dataset.group.samplelist)
-        initial_time = time.time()
         target_dataset.get_trait_data(list(sample_data.keys()))
         this_trait = retrieve_sample_data(this_trait, this_dataset)
-        print("Creating target dataset and trait took", time.time()-initial_time)
-
-
         this_trait_data = {
             "trait_sample_data": sample_data,
             "trait_id": start_vars["trait_id"]
         }
-        initial_time = time.time()
         results = map_shared_keys_to_values(
             target_dataset.samplelist, target_dataset.trait_data)
         correlation_results = compute_all_sample_correlation(corr_method=method,
                                                              this_trait=this_trait_data,
                                                              target_dataset=results)
 
-        print("doing sample correlation took", time.time()-initial_time)
-        # other_results = tissue_for_trait_lists(
-        #     correlation_results, this_dataset, target_dataset, this_trait)
-        # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
-        return correlation_results
-
     elif corr_type == "tissue":
         trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
         primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
@@ -137,50 +121,33 @@ def compute_correlation(start_vars, method="pearson"):
             "primary_tissue": primary_tissue_data,
             "target_tissues_dict": target_tissue_data
         }
-        initial_time = time.time()
-        correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
-                                                                          target_tissues_data=corr_input_data[
-            "target_tissues_dict"],
-            corr_method=method)
-        print("computing tissue took >>>>", time.time()-initial_time)
-        # sample_results = sample_for_trait_lists(
-        #     correlation_results, target_dataset, this_trait, this_dataset, start_vars)
-        return correlation_results
+        correlation_results = experimental_compute_all_tissue_correlation(
+            primary_tissue_dict=corr_input_data["primary_tissue"],
+            target_tissues_data=corr_input_data[
+                "target_tissues_dict"],
+            corr_method=method
+
+        )
 
     elif corr_type == "lit":
         (this_trait_geneid, geneid_dict, species) = do_lit_correlation(
-            this_trait, this_dataset, target_dataset)
+            this_trait, this_dataset)
 
         conn, _cursor_object = database_connector()
-        initial_time = time.time()
         with conn:
-
-            lit_corr_results = compute_all_lit_correlation(
+            correlation_results = compute_all_lit_correlation(
                 conn=conn, trait_lists=list(geneid_dict.items()),
                 species=species, gene_id=this_trait_geneid)
 
-        return lit_corr_results
-        # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
-        # corr_input_data = geneid_dict
-    # corr_results = requests.post(requests_url, json=corr_input_data)
-
-    # data = corr_results.json()
-
-    # return data
+    return correlation_results
 
 
-def do_lit_correlation(this_trait, this_dataset, target_dataset):
+def do_lit_correlation(this_trait, this_dataset):
+    """function for fetching lit inputs"""
     geneid_dict = this_dataset.retrieve_genes("GeneId")
-    #
-    print("CALLING THE LIT CORRELATION HERE")
     species = this_dataset.group.species.lower()
-
-    this_trait_geneid = this_trait.geneid
-    this_trait_gene_data = {
-        this_trait.name: this_trait_geneid
-    }
-
-    return (this_trait_geneid, geneid_dict, species)
+    trait_geneid = this_trait.geneid
+    return (trait_geneid, geneid_dict, species)
 
 
 def get_tissue_correlation_input(this_trait, trait_symbol_dict):
@@ -190,7 +157,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
     if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
         primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
         )]
-
         corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
             symbol_list=list(trait_symbol_dict.values()))
         primary_tissue_data = {
@@ -202,7 +168,5 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
             "trait_symbol_dict": trait_symbol_dict,
             "symbol_tissue_vals_dict": corr_result_tissue_vals_dict
         }
-
         return (primary_tissue_data, target_tissue_data)
-
     return None