about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-01-24 23:48:27 +0300
committerBonfaceKilz2023-01-26 16:03:25 +0300
commitf682ff4c27b63e5e400603f32c23523a62e637e3 (patch)
tree6ea130c922830b88cfbfff825397a7be4a336195
parenta21cd4c10bbbd2267f33a4acb968ccc8eee0cc7c (diff)
downloadgenenetwork2-f682ff4c27b63e5e400603f32c23523a62e637e3.tar.gz
Dump metadata in addition to sample data.
* scripts/sampledata: Dump metadata in addition to data.
-rw-r--r--scripts/sampledata.py29
1 files changed, 28 insertions, 1 deletions
diff --git a/scripts/sampledata.py b/scripts/sampledata.py
index 543ec70a..9e9733bf 100644
--- a/scripts/sampledata.py
+++ b/scripts/sampledata.py
@@ -6,6 +6,8 @@ import time
 from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 from functools import partial
 
+from re import sub
+
 # Required Evils!
 from flask import g
 from wqflask import app
@@ -25,6 +27,11 @@ class UserSessionSimulator():
         return self._user_id
 
 
+def camel_case(string):
+    s = sub(r"(_|-)+", " ", string).title().replace(" ", "")
+    return ''.join([s[0].lower(), s[1:]])
+
+
 def dump_sample_data(dataset_name, trait_id):
     """Given a DATASET_NAME e.g. 'BXDPublish' and a TRAIT_ID
     e.g. '10007', dump the sample data as json object"""
@@ -68,10 +75,30 @@ def fetch_all_traits(species, group, type_, dataset):
             yield result.get('name') or result.get('display_name')
 
 
+def get_trait_metadata(dataset_name, trait_id):
+    with database_connection() as conn, conn.cursor() as cursor:
+        with app.app_context():
+            g.user_session = UserSessionSimulator(None)
+            data = show_trait.ShowTrait(
+                cursor, user_id=None,
+                kw={
+                    "trait_id": trait_id,
+                    "dataset": dataset_name,
+                }).this_trait.__dict__
+            data.pop("data")
+            data.pop("comments")
+            # filter any emply values and camelCase the keys
+            _d = {camel_case(key): value for key, value in data.items() if value}
+            _d["dataset"] = dataset_name
+            return _d
+
+
 def dump_json(base_dir, dataset_name, trait):
     print(f"\033[FDumping: {dataset_name}/{trait}]")
     with open(os.path.join(base_dir, f"{trait}.json"), "w") as f:
-        json.dump(dump_sample_data(dataset_name, trait), f)
+        _data = dump_sample_data(dataset_name, trait)
+        _data["metadata"] = get_trait_metadata(dataset_name, trait)
+        json.dump(_data, f)
 
 
 def dump_dataset(target_dir, species, group_name, dataset_type, dataset):