diff options
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/correlation/pre_computes.py | 32 |
1 files changed, 19 insertions, 13 deletions
diff --git a/wqflask/wqflask/correlation/pre_computes.py b/wqflask/wqflask/correlation/pre_computes.py index 355701f2..638ae860 100644 --- a/wqflask/wqflask/correlation/pre_computes.py +++ b/wqflask/wqflask/correlation/pre_computes.py @@ -9,6 +9,25 @@ from redis import Redis r = Redis() +# code to isolate metadata caching + + +def fetch_all_cached_metadata(dataset_name): + """in a gvein dataset fetch all the traits metadata""" + file_name = f"{dataset_name}_metadata.json" + + file_path = os.path.join(TMPDIR, file_name) + + with open(file_path, "r+") as file_handler: + dataset_metadata = json.load(file_handler) + + except FileNotFoundError: + Path(file_path).touch(exist_ok=True) + return {} + + return dataset_metadata + + def generate_filename(base_dataset_name, target_dataset_name, base_timestamp, target_dataset_timestamp): """generate unique filename""" @@ -60,18 +79,10 @@ def cache_compute_results(base_dataset_type, json.dump(data, file_handler) - # create the file only if it does not exists - - # else open the file to cache the results - def fetch_precompute_results(base_dataset_name, target_dataset_name, dataset_type, trait_name): """function to check for precomputed results""" - # check for redis timestamp - - # fix rely on the fact correlation run oftenly probeset is set - base_timestamp = target_dataset_timestamp = r.get(f"{dataset_type}timestamp") if base_timestamp is None: return @@ -86,11 +97,9 @@ def fetch_precompute_results(base_dataset_name, target_dataset_name, dataset_typ file_path = os.path.join(TMPDIR, f"{file_name}.json") - try: with open(file_path, "r+") as json_handler: correlation_results = json.load(json_handler) - # print(correlation_results) return correlation_results.get(trait_name) @@ -131,8 +140,6 @@ def get_datasets_data(base_dataset, target_dataset_data): (works for bxd only probeset datasets) - # fix issue with fetching of the datasets - output:two dicts for datasets with key==trait and value==strains """ samples_fetched = base_dataset.group.all_samples_ordered() @@ -142,7 +149,6 @@ def get_datasets_data(base_dataset, target_dataset_data): base_traits_data = base_dataset.get_trait_data( samples_fetched) - target_results = map_shared_keys_to_values( samples_fetched, target_traits_data) base_results = map_shared_keys_to_values( |