aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Kabui2021-11-15 18:03:55 +0300
committerAlexander Kabui2021-11-15 18:03:55 +0300
commit18b53441a0136071db94c72b112a746e056ef971 (patch)
treeb1a70e0470901bcce9fcc8dc4bc92bc881f40930
parent01d42255f52a61c6d3d007ffd1e5e02765a76730 (diff)
downloadgenenetwork2-18b53441a0136071db94c72b112a746e056ef971.tar.gz
refactor function to fetch datasets data for precomputes
-rw-r--r--wqflask/wqflask/correlation/pre_computes.py72
1 files changed, 56 insertions, 16 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index 01fa1a3d..e7147ddf 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -20,6 +20,7 @@ def generate_filename(base_dataset_name, target_dataset_name, base_timestamp, ta
def cache_compute_results(base_dataset_type,
base_dataset_name,
target_dataset_name,
+ corr_method,
correlation_results,
trait_name):
# pass
@@ -34,43 +35,35 @@ def cache_compute_results(base_dataset_type,
target_dataset_timestamp = base_timestamp
-
-
file_name = generate_filename(
base_dataset_name, target_dataset_name,
base_timestamp, target_dataset_timestamp)
-
- file_path = os.path.join(TMPDIR,f"{file_name}.json")
-
+ file_path = os.path.join(TMPDIR, f"{file_name}.json")
try:
- with open(file_path,"r+") as json_file_handler:
+ with open(file_path, "r+") as json_file_handler:
data = json.load(json_file_handler)
data[trait_name] = correlation_results
json_file_handler.seek(0)
- json.dump(data,json_file_handler)
+ json.dump(data, json_file_handler)
json_file_handler.truncate()
-
+
except FileNotFoundError:
- with open(file_path,"w+") as file_handler:
+ with open(file_path, "w+") as file_handler:
data = {}
- data[trait_name] =correlation_results
-
- json.dump(data,file_handler)
-
+ data[trait_name] = correlation_results
+ json.dump(data, file_handler)
# create the file only if it does not exists
# else open the file to cache the results
-
-
def fetch_precompute_results(base_dataset_name, target_dataset_name, dataset_type, trait_name):
"""function to check for precomputed results"""
@@ -84,7 +77,8 @@ def fetch_precompute_results(base_dataset_name, target_dataset_name, dataset_typ
return
else:
- base_timestamp = target_dataset_timestamp = base_timestamp.decode("utf-8")
+ base_timestamp = target_dataset_timestamp = base_timestamp.decode(
+ "utf-8")
file_name = generate_filename(
base_dataset_name, target_dataset_name,
@@ -103,3 +97,49 @@ def fetch_precompute_results(base_dataset_name, target_dataset_name, dataset_typ
except FileNotFoundError:
pass
+
+
+def pre_compute_dataset_vs_dataset(base_dataset, target_dataset, corr_method):
+ """compute sample correlation between dataset vs dataset
+ wn:heavy function should be invoked less frequently
+ input:datasets_data(two dicts),corr_method
+
+ output:correlation results for entire dataset against entire dataset
+ """
+ dataset_correlation_results = {}
+
+ for (trait_name, strain_values) in target_dataset.trait_data:
+
+ this_trait_data = {
+ "trait_sample_data": strain_values,
+ "trait_id": trait_name
+ }
+
+ trait_correlation_result = fast_compute_all_sample_correlation(
+ corr_method=corr_method, this_trait=this_trait_data, target_dataset=target_dataset_data)
+
+ dataset_correlation_results[trait_name] = trait_correlation_result
+
+ return dataset_correlation_results
+
+
+def get_datasets_data(base_dataset, target_dataset_data):
+ """required to pass data in a given format to the pre compute
+ function
+
+ output:two dicts for datasets with key==trait and value==strains
+ """
+ target_traits_data = target_dataset.get_trait_data(
+ base_dataset.group.all_samples_ordered())
+
+ base_traits_data = base_dataset.get_trait_data(
+ base_dataset.group.all_samples_ordered())
+
+ samples_fetched = base_dataset.group.all_samples_ordered()
+
+ target_results = map_shared_keys_to_values(
+ samples_fetched, target_traits_data)
+ base_results = map_shared_keys_to_values(
+ samples_fetched, base_traits_data)
+
+ return (target_results, base_results)