diff options
author | Frederick Muriuki Muriithi | 2025-03-26 13:10:00 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-03-26 15:59:05 -0500 |
commit | 4bfce99098799571152755b870677a94326ff3fe (patch) | |
tree | 2938b425e0ddc9e677bce6d630caabdbf41878b8 /scripts | |
parent | c35d84fbe327bae00361e2e09a56090aa2e3c72f (diff) | |
download | gn-uploader-4bfce99098799571152755b870677a94326ff3fe.tar.gz |
Compute differences in the descriptions.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/phenotypes_bulk_edit.py | 48 |
1 files changed, 46 insertions, 2 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index 0ced2ab..8da3c77 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -45,9 +45,53 @@ def check_for_mandatory_fields(): pass -def compute_differences(): +def __fetch_phenotypes__(conn, ids: tuple[int, ...]) -> tuple[dict, ...]: + """Fetch basic (non-numeric) phenotypes data from the database.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + paramstr = ",".join(["%s"] * len(ids)) + cursor.execute(f"SELECT * FROM Phenotype WHERE Id IN ({paramstr}) " + "ORDER BY Id ASC", + ids) + return tuple(dict(row) for row in cursor.fetchall()) + + +def descriptions_differences(file_data, db_data) -> dict[str, str]: + """Compute differences in the descriptions.""" + logger.info("Computing differences in phenotype descriptions.") + assert len(file_data) == len(db_data), "The counts of phenotypes differ!" + description_columns = ("Pre_publication_description", + "Post_publication_description", + "Original_description", + "Pre_publication_abbreviation", + "Post_publication_abbreviation") + diff = tuple() + for file_row, db_row in zip(file_data, db_data): + assert file_row["phenotype_id"] == db_row["Id"] + inner_diff = { + key: file_row[key] + for key in description_columns + if not file_row[key] == db_row[key] + } + if bool(inner_diff): + diff = diff + ({ + "phenotype_id": file_row["phenotype_id"], + **inner_diff + },) + + return diff + + +def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]: """Compute differences between data in DB and edited data.""" logger.info("Computing differences.") + # 1. Basic Phenotype data differences + # a. Descriptions differences + desc_diff = descriptions_differences(file_contents, __fetch_phenotypes__(conn, pheno_ids)) + logger.debug("DESCRIPTIONS DIFFERENCES: %s", desc_diff) + # b. Publications differences + # pub_diff = publications_differences(...) + # 2. Data differences + # data_diff = data_differences(...) pass @@ -130,7 +174,7 @@ def run(conn, job): check_ids(conn, pheno_xref_ids) check_for_mandatory_fields() # stop running here if any errors are found. - compute_differences() + compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) update_descriptions() link_publications() update_values() |