code refactoring;pep8 formatting

author: Alexander_Kabui 2023-04-19 20:28:25 +0300
committer: Alexander_Kabui 2023-04-19 20:28:25 +0300
commit: b70d58ae19ab785052a37426c941059f2e76e335 (patch)
tree: e03eef538dcbc7c878c9d328461b1c5fc97b83f8 /wqflask/scripts
parent: c1c5ca6e994ff8afbfa7a4270d210fd80d118f98 (diff)
download: genenetwork2-b70d58ae19ab785052a37426c941059f2e76e335.tar.gz
1 files changed, 57 insertions, 44 deletions
diff --git a/wqflask/scripts/textfiles_generator.py b/wqflask/scripts/textfiles_generator.py
index 2f35d6f8..c2e5aa7d 100644
--- a/wqflask/scripts/textfiles_generator.py
+++ b/wqflask/scripts/textfiles_generator.py
@@ -1,4 +1,3 @@
-
 # database connection
 import contextlib
 import pickle
@@ -28,22 +27,31 @@ flags:
 # example  python3 
 
  python3 meta_data_script.py "mysql://kabui:1234@localhost/db_webqtl" /tmp --textfile
+ python3 meta_data_script.py "mysql://kabui:1234@localhost/db_webqtl" /tmp --metadata
 
+python3 meta_data_script.py "mysql://kabui:1234@localhost/db_webqtl" /tmp --metadata  --textfile
 
 """
 
+
+#! add to this list or use get_probes_meta to populate
+
+
 DATASET_NAMES = [
     ("ProbeSet", "HC_M2_0606_P", "mouse"),
-    ("ProbeSet", "UMUTAffyExon_0209_RMA","mouse")
+    ("ProbeSet", "UMUTAffyExon_0209_RMA", "mouse")
 ]
 
 
-def get_probes_meta():
+def get_probes_meta(sql_uri):
 
     # if you need to generate for all probes use this note 1000+
-    query = "SELECT Id,NAME FROM ProbeSetFreeze"
-    cursor.execute(query)
-    return cursor.fetchall()
+
+    with database_connection(sql_uri) as conn:
+        with conn.cursor() as cursor:
+            query = "SELECT Id,NAME FROM ProbeSetFreeze"
+            cursor.execute(query)
+            return cursor.fetchall()
 
 
 def parse_db_url(sql_uri: str) -> Tuple:
@@ -135,22 +143,23 @@ def get_metadata(dataset_type, dataset_name, species, sql_uri):
         in query_probes_metadata(dataset_type, dataset_name, species, sql_uri)}
 
 
-def cache_trait_metadata(dataset_name, data):
+def cache_trait_metadata(dataset_name, dataset_type, data):
     if not data:
         return
     try:
-
-        path = os.path.join(TMPDIR, "metadata")
-        Path(path).mkdir(parents=True, exist_ok=True)
-        db_path =  os.path.join(path, f"metadata_{dataset_name}")
-        if not check_file_expiry(db_path):
-            return 
-        with lmdb.open(db_path, map_size=80971520) as env:
+        path = os.path.join(TMPDIR, f"metadata_{dataset_type}")
+        if not check_file_expiry(path, dataset_name):
+            return
+        with lmdb.open(path, map_size=500971520) as env:
             with env.begin(write=True) as txn:
-                data_bytes = pickle.dumps(data)
-                txn.put(f"{dataset_name}".encode(), data_bytes)
-                current_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-                txn.put(b"creation_date", current_date.encode())
+
+                metadata = {
+                    "data": data,
+                    "creation_date": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+                    "dataset_name": dataset_name
+                }
+
+                txn.put(f"{dataset_name}".encode(), pickle.dumps(metadata))
     except lmdb.Error as error:
         raise error
 
@@ -159,9 +168,9 @@ def __sanitise_filename__(filename):
     ttable = str.maketrans({" ": "_", "/": "_", "\\": "_"})
     return str.translate(filename, ttable)
 
-def __generate_file_name__(db_name,sql_uri):
-    # todo add expiry time and checker
 
+def __generate_file_name__(db_name, sql_uri):
+    # todo add expiry time and checker
 
     with database_connection(sql_uri) as conn:
         with conn.cursor() as cursor:
@@ -182,23 +191,22 @@ def write_strains_data(sql_uri, dataset_name: str, col_names: list[str], data):
     if not data:
         return
     try:
-        
-        db_path =  os.path.join(TMPDIR, __generate_file_name__(dataset_name,sql_uri))
-        breakpoint()
 
-        with lmdb.open(db_path, map_size=80971520) as env:
+        with lmdb.open(os.path.join(TMPDIR, "Probesets"), map_size=500971520) as env:
             with env.begin(write=True) as txn:
 
-                txn.put(f"strain_names".encode(), pickle.dumps(col_names))
+                meta = {
+                    "strain_names": col_names,
+                    "data": data,
+                    "creation_date": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+                }
 
-                txn.put(f"data".encode(), pickle.dumps(data))
-                current_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-                txn.put(b"creation_date", current_date.encode())
+                txn.put(__generate_file_name__(dataset_name,
+                                               sql_uri).encode(), pickle.dumps(meta))
     except lmdb.Error as error:
         raise error
 
 
-
 def generate_probes_textfiles(db_name, db_type, sql_uri):
 
     def __parse_to_dict__(results):
@@ -251,11 +259,12 @@ def argument_parser():
 
 def run_textfiles_generator(args):
     try:
+
         for (d_type, dataset_name, _species) in DATASET_NAMES:
-            if not check_file_expiry(os.path.join(args.TMPDIR,__generate_file_name__(dataset_name,args.SQL_URI))):
-                return 
+            file_name = __generate_file_name__(dataset_name, args.SQL_URI)
 
-            breakpoint()
+            if not check_file_expiry(os.path.join(args.TMPDIR, "Probesets"), file_name):
+                return
             write_strains_data(
                 args.SQL_URI, dataset_name, *generate_probes_textfiles(dataset_name, d_type, args.SQL_URI))
     except Exception as error:
@@ -265,15 +274,16 @@ def run_textfiles_generator(args):
 def run_metadata_files_generator(args):
     for (dataset_type, dataset_name, species) in DATASET_NAMES:
         try:
-            cache_trait_metadata(dataset_name, get_metadata(
+            cache_trait_metadata(dataset_name, dataset_type, get_metadata(
                 dataset_type, dataset_name, species, args.SQL_URI))
         except Exception as error:
             raise error
 
-def read_trait_metadata(dataset_name):
+
+def read_trait_metadata(dataset_name, dataset_type):
     try:
-        with lmdb.open(os.path.join(TMPDIR,f"metadata_{dataset_name}"),
-            readonly=True, lock=False) as env:
+        with lmdb.open(os.path.join(TMPDIR, f"metadata_{dataset_type}"),
+                       readonly=True, lock=False) as env:
             with env.begin() as txn:
                 db_name = txn.get(dataset_name.encode())
                 return (pickle.loads(db_name) if db_name else {})
@@ -281,17 +291,20 @@ def read_trait_metadata(dataset_name):
         return {}
 
 
+def check_file_expiry(target_file_path, dataset_name, max_days=20):
+    # return true if file has expired
 
-def check_file_expiry(target_file_path,max_days=20):
-
-    # return true if file has expired 
     try:
-        with lmdb.open(target_file_path,readonly=True, lock=False) as env:
+        with lmdb.open(target_file_path, readonly=True, lock=False) as env:
             with env.begin() as txn:
-
-                creation_date = datetime.datetime.strptime(txn.get(b"creation_date").decode(), '%Y-%m-%d %H:%M:%S') 
-                return  ((datetime.datetime.now() - creation_date).days > max_days)
-    except lmdb.Error as error:
+                dataset = txn.get(dataset_name.encode())
+                if dataset:
+                    meta = pickle.loads(dataset)
+                    creation_date = datetime.datetime.strptime(
+                        meta["creation_date"], '%Y-%m-%d %H:%M:%S')
+                    return ((datetime.datetime.now() - creation_date).days > max_days)
+                return True
+    except Exception:
         return True
author	Alexander_Kabui	2023-04-19 20:28:25 +0300
committer	Alexander_Kabui	2023-04-19 20:28:25 +0300
commit	b70d58ae19ab785052a37426c941059f2e76e335 (patch)
tree	e03eef538dcbc7c878c9d328461b1c5fc97b83f8 /wqflask/scripts
parent	c1c5ca6e994ff8afbfa7a4270d210fd80d118f98 (diff)
download	genenetwork2-b70d58ae19ab785052a37426c941059f2e76e335.tar.gz