aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/correlation/correlation_gn3_api.py126
1 files changed, 45 insertions, 81 deletions
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index 3c21a850..b56c09d8 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -1,61 +1,55 @@
"""module that calls the gn3 api's to do the correlation """
import json
-import requests
-import time
+
from wqflask.correlation import correlation_functions
from base import data_set
+
from base.trait import create_trait
from base.trait import retrieve_sample_data
-# gn3 lib
+
from gn3.computations.correlations import compute_all_sample_correlation
from gn3.computations.correlations import map_shared_keys_to_values
-from gn3.computations.correlations import compute_all_tissue_correlation
from gn3.computations.correlations import compute_all_lit_correlation
from gn3.computations.correlations import experimental_compute_all_tissue_correlation
from gn3.db_utils import database_connector
-GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
+
+def create_target_this_trait(start_vars):
+ """this function creates the required trait and target dataset for correlation"""
+
+ this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
+ target_dataset = data_set.create_dataset(
+ dataset_name=start_vars['corr_dataset'])
+ this_trait = create_trait(dataset=this_dataset,
+ name=start_vars['trait_id'])
+ sample_data = ()
+ return (this_dataset, this_trait, target_dataset, sample_data)
def process_samples(start_vars, sample_names, excluded_samples=None):
- """process samples method"""
+ """process samples"""
sample_data = {}
if not excluded_samples:
excluded_samples = ()
-
sample_vals_dict = json.loads(start_vars["sample_vals"])
-
for sample in sample_names:
if sample not in excluded_samples:
val = sample_vals_dict[sample]
if not val.strip().lower() == "x":
sample_data[str(sample)] = float(val)
-
return sample_data
-def create_target_this_trait(start_vars):
- """this function creates the required trait and target dataset for correlation"""
-
- this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
- target_dataset = data_set.create_dataset(
- dataset_name=start_vars['corr_dataset'])
+def sample_for_trait_lists(corr_results, target_dataset,
+ this_trait, this_dataset, start_vars):
+ """interface function for correlation on top results"""
- this_trait = create_trait(dataset=this_dataset,
- name=start_vars['trait_id'])
-
- # target_dataset.get_trait_data(list(self.sample_data.keys()))
-
- # this_trait = retrieve_sample_data(this_trait, this_dataset)
- sample_data = ()
- return (this_dataset, this_trait, target_dataset, sample_data)
-
-
-def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars):
sample_data = process_samples(
start_vars, this_dataset.group.samplelist)
target_dataset.get_trait_data(list(sample_data.keys()))
+ # should filter target traits from here
+ _corr_results = corr_results
this_trait = retrieve_sample_data(this_trait, this_dataset)
@@ -69,65 +63,55 @@ def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_datase
this_trait=this_trait_data,
target_dataset=results)
-
return correlation_results
-def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait):
- trait_lists = dict([(list(corr_result)[0], True)
- for corr_result in corr_results])
+def tissue_for_trait_lists(corr_results, this_dataset, this_trait):
+ """interface function for doing tissue corr_results on trait_list"""
+ # trait_lists = dict([(list(corr_result)[0], True)
+ # for corr_result in corr_results])
+ trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results}
traits_symbol_dict = this_dataset.retrieve_genes("Symbol")
traits_symbol_dict = dict({trait_name: symbol for (
trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)})
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
this_trait, traits_symbol_dict)
corr_results = experimental_compute_all_tissue_correlation(
- primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson")
+ primary_tissue_dict=primary_tissue_data,
+ target_tissues_data=target_tissue_data,
+ corr_method="pearson")
return corr_results
def compute_correlation(start_vars, method="pearson"):
"""compute correlation for to call gn3 api"""
- import time
+ # pylint: disable-msg=too-many-locals
corr_type = start_vars['corr_type']
(this_dataset, this_trait, target_dataset,
sample_data) = create_target_this_trait(start_vars)
- # cor_results = compute_correlation(start_vars)
-
method = start_vars['corr_sample_method']
-
+ _corr_return_results = start_vars.get("corr_return_results", 100)
corr_input_data = {}
if corr_type == "sample":
-
+
sample_data = process_samples(
start_vars, this_dataset.group.samplelist)
- initial_time = time.time()
target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
- print("Creating target dataset and trait took", time.time()-initial_time)
-
-
this_trait_data = {
"trait_sample_data": sample_data,
"trait_id": start_vars["trait_id"]
}
- initial_time = time.time()
results = map_shared_keys_to_values(
target_dataset.samplelist, target_dataset.trait_data)
correlation_results = compute_all_sample_correlation(corr_method=method,
this_trait=this_trait_data,
target_dataset=results)
- print("doing sample correlation took", time.time()-initial_time)
- # other_results = tissue_for_trait_lists(
- # correlation_results, this_dataset, target_dataset, this_trait)
- # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
- return correlation_results
-
elif corr_type == "tissue":
trait_symbol_dict = this_dataset.retrieve_genes("Symbol")
primary_tissue_data, target_tissue_data = get_tissue_correlation_input(
@@ -137,50 +121,33 @@ def compute_correlation(start_vars, method="pearson"):
"primary_tissue": primary_tissue_data,
"target_tissues_dict": target_tissue_data
}
- initial_time = time.time()
- correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
- target_tissues_data=corr_input_data[
- "target_tissues_dict"],
- corr_method=method)
- print("computing tissue took >>>>", time.time()-initial_time)
- # sample_results = sample_for_trait_lists(
- # correlation_results, target_dataset, this_trait, this_dataset, start_vars)
- return correlation_results
+ correlation_results = experimental_compute_all_tissue_correlation(
+ primary_tissue_dict=corr_input_data["primary_tissue"],
+ target_tissues_data=corr_input_data[
+ "target_tissues_dict"],
+ corr_method=method
+
+ )
elif corr_type == "lit":
(this_trait_geneid, geneid_dict, species) = do_lit_correlation(
- this_trait, this_dataset, target_dataset)
+ this_trait, this_dataset)
conn, _cursor_object = database_connector()
- initial_time = time.time()
with conn:
-
- lit_corr_results = compute_all_lit_correlation(
+ correlation_results = compute_all_lit_correlation(
conn=conn, trait_lists=list(geneid_dict.items()),
species=species, gene_id=this_trait_geneid)
- return lit_corr_results
- # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
- # corr_input_data = geneid_dict
- # corr_results = requests.post(requests_url, json=corr_input_data)
-
- # data = corr_results.json()
-
- # return data
+ return correlation_results
-def do_lit_correlation(this_trait, this_dataset, target_dataset):
+def do_lit_correlation(this_trait, this_dataset):
+ """function for fetching lit inputs"""
geneid_dict = this_dataset.retrieve_genes("GeneId")
- #
- print("CALLING THE LIT CORRELATION HERE")
species = this_dataset.group.species.lower()
-
- this_trait_geneid = this_trait.geneid
- this_trait_gene_data = {
- this_trait.name: this_trait_geneid
- }
-
- return (this_trait_geneid, geneid_dict, species)
+ trait_geneid = this_trait.geneid
+ return (trait_geneid, geneid_dict, species)
def get_tissue_correlation_input(this_trait, trait_symbol_dict):
@@ -190,7 +157,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
if this_trait.symbol.lower() in primary_trait_tissue_vals_dict:
primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
-
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
primary_tissue_data = {
@@ -202,7 +168,5 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
"trait_symbol_dict": trait_symbol_dict,
"symbol_tissue_vals_dict": corr_result_tissue_vals_dict
}
-
return (primary_tissue_data, target_tissue_data)
-
return None