aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base/data_set.py
diff options
context:
space:
mode:
authorzsloan2022-03-22 19:02:15 +0000
committerzsloan2022-03-22 19:02:15 +0000
commita49da43ba00245cf23a2b72c314127986f567f28 (patch)
treece64e83370c52add94927bc050febf5d242722db /wqflask/base/data_set.py
parent68ac19153b128f60b660e11365e5fd4304c95300 (diff)
parent32cb57b82db328bc84753af9d25e9aaa1bd31152 (diff)
downloadgenenetwork2-a49da43ba00245cf23a2b72c314127986f567f28.tar.gz
Merge remote-tracking branch 'origin/testing' into feature/add_rqtl_pairscan
Diffstat (limited to 'wqflask/base/data_set.py')
-rw-r--r--wqflask/base/data_set.py20
1 files changed, 10 insertions, 10 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index af248659..d7e4e62f 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -756,7 +756,7 @@ class DataSet:
chunk_size = 50
number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
- cached_results = fetch_cached_results(self.name, self.type)
+ cached_results = fetch_cached_results(self.name, self.type, self.samplelist)
if cached_results is None:
trait_sample_data = []
for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
@@ -812,9 +812,8 @@ class DataSet:
trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
cache_dataset_results(
- self.name, self.type, self.trait_data)
+ self.name, self.type, self.samplelist, self.trait_data)
else:
-
self.trait_data = cached_results
@@ -1278,14 +1277,14 @@ def query_table_timestamp(dataset_type: str):
return date_time_obj.strftime("%Y-%m-%d %H:%M:%S")
-def generate_hash_file(dataset_name: str, dataset_type: str, dataset_timestamp: str):
+def generate_hash_file(dataset_name: str, dataset_type: str, dataset_timestamp: str, samplelist: str):
"""given the trait_name generate a unique name for this"""
- string_unicode = f"{dataset_name}{dataset_timestamp}".encode()
+ string_unicode = f"{dataset_name}{dataset_timestamp}{samplelist}".encode()
md5hash = hashlib.md5(string_unicode)
return md5hash.hexdigest()
-def cache_dataset_results(dataset_name: str, dataset_type: str, query_results: List):
+def cache_dataset_results(dataset_name: str, dataset_type: str, samplelist: List, query_results: List):
"""function to cache dataset query results to file
input dataset_name and type query_results(already processed in default dict format)
"""
@@ -1293,21 +1292,22 @@ def cache_dataset_results(dataset_name: str, dataset_type: str, query_results: L
# store the file path on redis
table_timestamp = query_table_timestamp(dataset_type)
+ samplelist_as_str = ",".join(samplelist)
-
- file_name = generate_hash_file(dataset_name, dataset_type, table_timestamp)
+ file_name = generate_hash_file(dataset_name, dataset_type, table_timestamp, samplelist_as_str)
file_path = os.path.join(TMPDIR, f"{file_name}.json")
with open(file_path, "w") as file_handler:
json.dump(query_results, file_handler)
-def fetch_cached_results(dataset_name: str, dataset_type: str):
+def fetch_cached_results(dataset_name: str, dataset_type: str, samplelist: List):
"""function to fetch the cached results"""
table_timestamp = query_table_timestamp(dataset_type)
+ samplelist_as_str = ",".join(samplelist)
- file_name = generate_hash_file(dataset_name, dataset_type, table_timestamp)
+ file_name = generate_hash_file(dataset_name, dataset_type, table_timestamp, samplelist_as_str)
file_path = os.path.join(TMPDIR, f"{file_name}.json")
try:
with open(file_path, "r") as file_handler: