about summary refs log tree commit diff
path: root/wqflask/base/data_set/utils.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-09-20 11:40:56 +0300
committerFrederick Muriuki Muriithi2022-09-20 11:50:32 +0300
commit8f732461b897a7c229c3b49a74fd831c2e440989 (patch)
tree769f044294c118eb3aec631d3d26485b4280637e /wqflask/base/data_set/utils.py
parentce07180fbc07fbf61d4dc26bf5d20cdf6f7df91f (diff)
downloadgenenetwork2-8f732461b897a7c229c3b49a74fd831c2e440989.tar.gz
Decompose file into separate modules refactor-data_set_py-20220920
To ease future refactors on the code, decompose the file into a module
with multiple modules that can be refactored semi-independently.
Diffstat (limited to 'wqflask/base/data_set/utils.py')
-rw-r--r--wqflask/base/data_set/utils.py77
1 files changed, 77 insertions, 0 deletions
diff --git a/wqflask/base/data_set/utils.py b/wqflask/base/data_set/utils.py
new file mode 100644
index 00000000..0077c292
--- /dev/null
+++ b/wqflask/base/data_set/utils.py
@@ -0,0 +1,77 @@
+"data_set package utilities"
+
+import os
+import json
+import hashlib
+from typing import List
+
+
+from utility.tools import SQL_URI
+from base.webqtlConfig import TMPDIR
+from wqflask.database import parse_db_url, database_connection
+
+def geno_mrna_confidentiality(ob):
+    with database_connection() as conn, conn.cursor() as cursor:
+        cursor.execute(
+            "SELECT confidentiality, "
+            f"AuthorisedUsers FROM {ob.type}Freeze WHERE Name = %s",
+            (ob.name,)
+        )
+        result = cursor.fetchall()
+        if len(result) > 0 and result[0]:
+            return True
+
+def query_table_timestamp(dataset_type: str):
+    """function to query the update timestamp of a given dataset_type"""
+
+    # computation data and actions
+    with database_connection() as conn, conn.cursor() as cursor:
+        fetch_db_name = parse_db_url(SQL_URI)
+        cursor.execute(
+            "SELECT UPDATE_TIME FROM "
+            "information_schema.tables "
+            f"WHERE TABLE_SCHEMA = '{fetch_db_name[3]}' "
+            f"AND TABLE_NAME = '{dataset_type}Data'")
+        date_time_obj = cursor.fetchone()[0]
+        return date_time_obj.strftime("%Y-%m-%d %H:%M:%S")
+
+
+def generate_hash_file(dataset_name: str, dataset_type: str, dataset_timestamp: str, samplelist: str):
+    """given the trait_name generate a unique name for this"""
+    string_unicode = f"{dataset_name}{dataset_timestamp}{samplelist}".encode()
+    md5hash = hashlib.md5(string_unicode)
+    return md5hash.hexdigest()
+
+
+def cache_dataset_results(dataset_name: str, dataset_type: str, samplelist: List, query_results: List):
+    """function to cache dataset query results to file
+    input dataset_name and type query_results(already processed in default dict format)
+    """
+    # data computations actions
+    # store the file path on redis
+
+    table_timestamp = query_table_timestamp(dataset_type)
+    samplelist_as_str = ",".join(samplelist)
+
+    file_name = generate_hash_file(dataset_name, dataset_type, table_timestamp, samplelist_as_str)
+    file_path = os.path.join(TMPDIR, f"{file_name}.json")
+
+    with open(file_path, "w") as file_handler:
+        json.dump(query_results, file_handler)
+
+
+def fetch_cached_results(dataset_name: str, dataset_type: str, samplelist: List):
+    """function to fetch the cached results"""
+
+    table_timestamp = query_table_timestamp(dataset_type)
+    samplelist_as_str = ",".join(samplelist)
+
+    file_name = generate_hash_file(dataset_name, dataset_type, table_timestamp, samplelist_as_str)
+    file_path = os.path.join(TMPDIR, f"{file_name}.json")
+    try:
+        with open(file_path, "r") as file_handler:
+
+            return json.load(file_handler)
+
+    except Exception:
+        pass