diff options
author | Munyoki Kilyungi | 2023-01-24 23:48:27 +0300 |
---|---|---|
committer | BonfaceKilz | 2023-01-26 16:03:25 +0300 |
commit | f682ff4c27b63e5e400603f32c23523a62e637e3 (patch) | |
tree | 6ea130c922830b88cfbfff825397a7be4a336195 /scripts | |
parent | a21cd4c10bbbd2267f33a4acb968ccc8eee0cc7c (diff) | |
download | genenetwork2-f682ff4c27b63e5e400603f32c23523a62e637e3.tar.gz |
Dump metadata in addition to sample data.
* scripts/sampledata: Dump metadata in addition to data.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/sampledata.py | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/scripts/sampledata.py b/scripts/sampledata.py index 543ec70a..9e9733bf 100644 --- a/scripts/sampledata.py +++ b/scripts/sampledata.py @@ -6,6 +6,8 @@ import time from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor from functools import partial +from re import sub + # Required Evils! from flask import g from wqflask import app @@ -25,6 +27,11 @@ class UserSessionSimulator(): return self._user_id +def camel_case(string): + s = sub(r"(_|-)+", " ", string).title().replace(" ", "") + return ''.join([s[0].lower(), s[1:]]) + + def dump_sample_data(dataset_name, trait_id): """Given a DATASET_NAME e.g. 'BXDPublish' and a TRAIT_ID e.g. '10007', dump the sample data as json object""" @@ -68,10 +75,30 @@ def fetch_all_traits(species, group, type_, dataset): yield result.get('name') or result.get('display_name') +def get_trait_metadata(dataset_name, trait_id): + with database_connection() as conn, conn.cursor() as cursor: + with app.app_context(): + g.user_session = UserSessionSimulator(None) + data = show_trait.ShowTrait( + cursor, user_id=None, + kw={ + "trait_id": trait_id, + "dataset": dataset_name, + }).this_trait.__dict__ + data.pop("data") + data.pop("comments") + # filter any emply values and camelCase the keys + _d = {camel_case(key): value for key, value in data.items() if value} + _d["dataset"] = dataset_name + return _d + + def dump_json(base_dir, dataset_name, trait): print(f"\033[FDumping: {dataset_name}/{trait}]") with open(os.path.join(base_dir, f"{trait}.json"), "w") as f: - json.dump(dump_sample_data(dataset_name, trait), f) + _data = dump_sample_data(dataset_name, trait) + _data["metadata"] = get_trait_metadata(dataset_name, trait) + json.dump(_data, f) def dump_dataset(target_dir, species, group_name, dataset_type, dataset): |