aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/correlation/pre_computes.py75
1 files changed, 34 insertions, 41 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py
index 4244fcfb..1d832fde 100644
--- a/wqflask/wqflask/correlation/pre_computes.py
+++ b/wqflask/wqflask/correlation/pre_computes.py
@@ -1,28 +1,28 @@
-
+import json
import os
import hashlib
from base.data_set import query_table_timestamp
from base.webqtlConfig import TMPDIR
+from redis import Redis
+r = Redis()
-def generate_filename(**kwargs):
- """generate unique filename"""
- base_dataset_name = kwargs["base_dataset"]
- target_dataset_name = kwargs["target_dataset"]
- base_timestamp = kwargs["base_timestamp"]
- target_dataset_timestamp = kwargs["target_timestamp"]
+def generate_filename(base_dataset_name, target_dataset_name, base_timestamp, target_dataset_timestamp):
+ """generate unique filename"""
string_unicode = f"{base_dataset_name}{target_dataset_name}{base_timestamp}{target_dataset_timestamp}sample_corr_compute".encode()
return hashlib.md5(string_unicode).hexdigest()
-def cache_compute_results(start_vars,
- base_dataset_type,
- correlation_results,
- trait_name):
+def cache_compute_results(base_dataset_type,
+ base_dataset_name,
+ target_dataset_name,
+ correlation_results,
+ trait_name):
# pass
+ """function to cache correlation results for heavy computations"""
# init assumption only caching probeset type
# fix redis;issue potential redis_cache!=current_timestamp
@@ -30,10 +30,11 @@ def cache_compute_results(start_vars,
if base_timestamp is None:
# fetch the timestamp
- base_timestamp = target_dataset_timestamp = query_table_timestamp(
- dataset_type)
+ base_timestamp = query_table_timestamp(
+ base_dataset_type)
+ r.set(f"{base_dataset_type}timestamp", base_timestamp)
- r.set(f"{dataset_type}timestamp", target_dataset_timestamp)
+ target_dataset_timestamp = base_timestamp
file_name = generate_filename(
base_dataset_name, target_dataset_name,
@@ -41,51 +42,43 @@ def cache_compute_results(start_vars,
file_path = os.path.join(TMPDIR, f"{file_name}.json")
- try:
+ try:
+
+ with open(file_path, "r+") as json_handler:
- with open(file_path, "r+") as json_handler:
+ results = json.load(json_handler)
+ results[trait_name] = correlation_results
- results = json.load(json_handler)
+ json.dump(results, json_handler)
- if results.get(trait_name) is None:
- results.update({trait_name: correlation_results})
+ except FileNotFoundError:
- json.dump(results, json_handler)
+ with open(file_path, "w+") as write_json_handler:
+ json.dump({trait_name: correlation_results}, write_json_handler)
- except FileNotFoundError:
- with open(file_path, "w") as json_handler:
- json.dump({trait_name: correlation_results}, json_handler)
-def fetch_precompute_results(base_dataset_name,target_dataset_name,trait_name):
+def fetch_precompute_results(base_dataset_name, target_dataset_name, dataset_type, trait_name):
"""function to check for precomputed results"""
# check for redis timestamp
# fix rely on the fact correlation run oftenly probeset is set
- base_timestamp = target_dataset_timestamp = r.get(dataset_type)
-
+ base_timestamp = target_dataset_timestamp = r.get(f"{dataset_type}timestamp")
if base_timestamp is None:
return
- else:
- file_name = generate_filename(
+ file_name = generate_filename(
base_dataset_name, target_dataset_name,
base_timestamp, target_dataset_timestamp)
- try:
- with open(file_path,"r") as json_handler:
- correlation_results = json.load(json_handler)
-
- return correlation_results.get(trait_name)
-
- except FileNotFoundError:
- pass
-
-
-
-
-
+ file_path = os.path.join(TMPDIR, f"{file_name}.json")
+ try:
+ with open(file_path, "r") as json_handler:
+ correlation_results = json.load(json_handler)
+ return correlation_results.get(trait_name)
+ except FileNotFoundError:
+ pass