aboutsummaryrefslogtreecommitdiff
path: root/scripts/sampledata.py
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-01-24 23:48:27 +0300
committerBonfaceKilz2023-01-26 16:03:25 +0300
commitf682ff4c27b63e5e400603f32c23523a62e637e3 (patch)
tree6ea130c922830b88cfbfff825397a7be4a336195 /scripts/sampledata.py
parenta21cd4c10bbbd2267f33a4acb968ccc8eee0cc7c (diff)
downloadgenenetwork2-f682ff4c27b63e5e400603f32c23523a62e637e3.tar.gz
Dump metadata in addition to sample data.
* scripts/sampledata: Dump metadata in addition to data.
Diffstat (limited to 'scripts/sampledata.py')
-rw-r--r--scripts/sampledata.py29
1 files changed, 28 insertions, 1 deletions
diff --git a/scripts/sampledata.py b/scripts/sampledata.py
index 543ec70a..9e9733bf 100644
--- a/scripts/sampledata.py
+++ b/scripts/sampledata.py
@@ -6,6 +6,8 @@ import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from functools import partial
+from re import sub
+
# Required Evils!
from flask import g
from wqflask import app
@@ -25,6 +27,11 @@ class UserSessionSimulator():
return self._user_id
+def camel_case(string):
+ s = sub(r"(_|-)+", " ", string).title().replace(" ", "")
+ return ''.join([s[0].lower(), s[1:]])
+
+
def dump_sample_data(dataset_name, trait_id):
"""Given a DATASET_NAME e.g. 'BXDPublish' and a TRAIT_ID
e.g. '10007', dump the sample data as json object"""
@@ -68,10 +75,30 @@ def fetch_all_traits(species, group, type_, dataset):
yield result.get('name') or result.get('display_name')
+def get_trait_metadata(dataset_name, trait_id):
+ with database_connection() as conn, conn.cursor() as cursor:
+ with app.app_context():
+ g.user_session = UserSessionSimulator(None)
+ data = show_trait.ShowTrait(
+ cursor, user_id=None,
+ kw={
+ "trait_id": trait_id,
+ "dataset": dataset_name,
+ }).this_trait.__dict__
+ data.pop("data")
+ data.pop("comments")
+ # filter any emply values and camelCase the keys
+ _d = {camel_case(key): value for key, value in data.items() if value}
+ _d["dataset"] = dataset_name
+ return _d
+
+
def dump_json(base_dir, dataset_name, trait):
print(f"\033[FDumping: {dataset_name}/{trait}]")
with open(os.path.join(base_dir, f"{trait}.json"), "w") as f:
- json.dump(dump_sample_data(dataset_name, trait), f)
+ _data = dump_sample_data(dataset_name, trait)
+ _data["metadata"] = get_trait_metadata(dataset_name, trait)
+ json.dump(_data, f)
def dump_dataset(target_dir, species, group_name, dataset_type, dataset):