aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-03-26 13:10:00 -0500
committerFrederick Muriuki Muriithi2025-03-26 15:59:05 -0500
commit4bfce99098799571152755b870677a94326ff3fe (patch)
tree2938b425e0ddc9e677bce6d630caabdbf41878b8 /scripts
parentc35d84fbe327bae00361e2e09a56090aa2e3c72f (diff)
downloadgn-uploader-4bfce99098799571152755b870677a94326ff3fe.tar.gz
Compute differences in the descriptions.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py48
1 files changed, 46 insertions, 2 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 0ced2ab..8da3c77 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -45,9 +45,53 @@ def check_for_mandatory_fields():
pass
-def compute_differences():
+def __fetch_phenotypes__(conn, ids: tuple[int, ...]) -> tuple[dict, ...]:
+ """Fetch basic (non-numeric) phenotypes data from the database."""
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ paramstr = ",".join(["%s"] * len(ids))
+ cursor.execute(f"SELECT * FROM Phenotype WHERE Id IN ({paramstr}) "
+ "ORDER BY Id ASC",
+ ids)
+ return tuple(dict(row) for row in cursor.fetchall())
+
+
+def descriptions_differences(file_data, db_data) -> dict[str, str]:
+ """Compute differences in the descriptions."""
+ logger.info("Computing differences in phenotype descriptions.")
+ assert len(file_data) == len(db_data), "The counts of phenotypes differ!"
+ description_columns = ("Pre_publication_description",
+ "Post_publication_description",
+ "Original_description",
+ "Pre_publication_abbreviation",
+ "Post_publication_abbreviation")
+ diff = tuple()
+ for file_row, db_row in zip(file_data, db_data):
+ assert file_row["phenotype_id"] == db_row["Id"]
+ inner_diff = {
+ key: file_row[key]
+ for key in description_columns
+ if not file_row[key] == db_row[key]
+ }
+ if bool(inner_diff):
+ diff = diff + ({
+ "phenotype_id": file_row["phenotype_id"],
+ **inner_diff
+ },)
+
+ return diff
+
+
+def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
"""Compute differences between data in DB and edited data."""
logger.info("Computing differences.")
+ # 1. Basic Phenotype data differences
+ # a. Descriptions differences
+ desc_diff = descriptions_differences(file_contents, __fetch_phenotypes__(conn, pheno_ids))
+ logger.debug("DESCRIPTIONS DIFFERENCES: %s", desc_diff)
+ # b. Publications differences
+ # pub_diff = publications_differences(...)
+ # 2. Data differences
+ # data_diff = data_differences(...)
pass
@@ -130,7 +174,7 @@ def run(conn, job):
check_ids(conn, pheno_xref_ids)
check_for_mandatory_fields()
# stop running here if any errors are found.
- compute_differences()
+ compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids)
update_descriptions()
link_publications()
update_values()