diff options
| author | zsloan | 2025-05-19 19:23:46 +0000 |
|---|---|---|
| committer | zsloan | 2025-06-17 10:46:29 -0500 |
| commit | c2002d3348656f87729f48c738f298c81464d8e8 (patch) | |
| tree | b7982be4d1c88c9c35f1569482201bf49f1866f0 /gn3/db/sample_data.py | |
| parent | daac7361a4521d8a04a057e715c3f9df2e6651e6 (diff) | |
| download | genenetwork3-c2002d3348656f87729f48c738f298c81464d8e8.tar.gz | |
Add Batch Update code to GN3
Diffstat (limited to 'gn3/db/sample_data.py')
| -rw-r--r-- | gn3/db/sample_data.py | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/gn3/db/sample_data.py b/gn3/db/sample_data.py index 965767c..af979fe 100644 --- a/gn3/db/sample_data.py +++ b/gn3/db/sample_data.py @@ -606,3 +606,122 @@ def insert_sample_data( return count except Exception as _e: raise MySQLdb.Error(_e) from _e + +def batch_update_sample_data( + conn: Any, diff_data: Dict +): + def __fetch_data_id(conn, db_type, trait_id, dataset_name): + with conn.cursor() as cursor: + if db_type == "Publish": + cursor.execute( + ( + f"SELECT {db_type}XRef.DataId " + f"FROM {db_type}XRef, {db_type}Freeze " + f"WHERE {db_type}XRef.InbredSetId = {db_type}Freeze.InbredSetId AND " + f"{db_type}XRef.Id = %s AND " + f"{db_type}Freeze.Name = %s" + ), (trait_id, dataset_name) + ) + elif db_type == "ProbeSet": + cursor.execute( + ( + f"SELECT {db_type}XRef.DataId " + f"FROM {db_type}XRef, {db_type}, {db_type}Freeze " + f"WHERE {db_type}XRef.InbredSetId = {db_type}Freeze.InbredSetId AND " + f"{db_type}XRef.ProbeSetId = {db_type}.Id AND " + f"{db_type}.Name = %s AND " + f"{db_type}Freeze.Name = %s" + ), (trait_id, dataset_name) + ) + return cursor.fetchone()[0] + + def __fetch_strain_id(cursor, strain_name): + with conn.cursor() as cursor: + cursor.execute( + "SELECT Id FROM Strain WHERE Name = %s", (strain_name,) + ) + return cursor.fetchone()[0] + + def __update_query(conn, db_type, data_id, strain_id, diffs): + with conn.cursor() as cursor: + if 'value' in diffs: + cursor.execute( + ( + f"UPDATE {db_type}Data " + f"SET value = %s " + f"WHERE Id = %s AND StrainId = %s" + ), (diffs['value']['Current'], data_id, strain_id) + ) + if 'error' in diffs: + cursor.execute( + ( + f"UPDATE {db_type}SE " + f"SET error = %s " + f"WHERE DataId = %s AND StrainId = %s" + ), (diffs['error']['Current'], data_id, strain_id) + ) + + conn.commit() + + def __insert_query(conn, db_type, data_id, strain_id, diffs): + with conn.cursor() as cursor: + if 'value' in diffs: + cursor.execute( + ( + f"INSERT INTO {db_type}Data (Id, StrainId, value)" + f"VALUES (%s, %s, %s)" + ), (data_id, strain_id, diffs['value']) + ) + if 'error' in diffs: + cursor.execute( + ( + f"INSERT INTO {db_type}SE (DataId, StrainId, error)" + f"VALUES (%s, %s, %s)" + ), (data_id, strain_id, diffs['error']) + ) + + conn.commit() + + def __delete_query(conn, db_type, data_id, strain_id, diffs): + with conn.cursor() as cursor: + if 'value' in diffs: + cursor.execute( + ( + f"DELETE FROM {db_type}Data " + f"WHERE Id = %s AND StrainId = %s" + ), (data_id, strain_id) + ) + if 'error' in diffs: + cursor.execute( + ( + f"DELETE FROM {db_type}SE " + f"WHERE DataId = %s AND StrainId = %s" + ), (data_id, strain_id) + ) + + conn.commit() + + def __update_data(conn, db_type, data_id, diffs, update_type): + for strain in diffs: + strain_id = __fetch_strain_id(conn, strain) + if update_type == "update": + __update_query(conn, db_type, data_id, strain_id, diffs[strain]) + elif update_type == "insert": + __insert_query(conn, db_type, data_id, strain_id, diffs[strain]) + elif update_type == "delete": + __delete_query(conn, db_type, data_id, strain_id, diffs[strain]) + + for key in diff_data: + dataset, trait = key.split(":") + if "Publish" in dataset: + db_type = "Publish" + else: + db_type = "ProbeSet" + + data_id = __fetch_data_id(conn, db_type, trait, dataset) + + __update_data(conn, db_type, data_id, diff_data[key]['Modifications'], 'update') + __update_data(conn, db_type, data_id, diff_data[key]['Additions'], 'insert') + __update_data(conn, db_type, data_id, diff_data[key]['Deletions'], 'delete') + + return diff_data |
