about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--scripts/phenotypes_bulk_edit.py48
1 files changed, 46 insertions, 2 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 0ced2ab..8da3c77 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -45,9 +45,53 @@ def check_for_mandatory_fields():
     pass
 
 
-def compute_differences():
+def __fetch_phenotypes__(conn, ids: tuple[int, ...]) -> tuple[dict, ...]:
+    """Fetch basic (non-numeric) phenotypes data from the database."""
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        paramstr = ",".join(["%s"] * len(ids))
+        cursor.execute(f"SELECT * FROM Phenotype WHERE Id IN ({paramstr}) "
+                       "ORDER BY Id ASC",
+                       ids)
+        return tuple(dict(row) for row in cursor.fetchall())
+
+
+def descriptions_differences(file_data, db_data) -> dict[str, str]:
+    """Compute differences in the descriptions."""
+    logger.info("Computing differences in phenotype descriptions.")
+    assert len(file_data) == len(db_data), "The counts of phenotypes differ!"
+    description_columns = ("Pre_publication_description",
+                           "Post_publication_description",
+                           "Original_description",
+                           "Pre_publication_abbreviation",
+                           "Post_publication_abbreviation")
+    diff = tuple()
+    for file_row, db_row in zip(file_data, db_data):
+        assert file_row["phenotype_id"] == db_row["Id"]
+        inner_diff = {
+            key: file_row[key]
+                for key in description_columns
+                if not file_row[key] == db_row[key]
+        }
+        if bool(inner_diff):
+            diff = diff + ({
+                "phenotype_id": file_row["phenotype_id"],
+                **inner_diff
+            },)
+
+    return diff
+
+
+def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
     """Compute differences between data in DB and edited data."""
     logger.info("Computing differences.")
+    # 1. Basic Phenotype data differences
+    #    a. Descriptions differences
+    desc_diff = descriptions_differences(file_contents, __fetch_phenotypes__(conn, pheno_ids))
+    logger.debug("DESCRIPTIONS DIFFERENCES: %s", desc_diff)
+    #    b. Publications differences
+    # pub_diff = publications_differences(...)
+    # 2. Data differences
+    # data_diff = data_differences(...)
     pass
 
 
@@ -130,7 +174,7 @@ def run(conn, job):
     check_ids(conn, pheno_xref_ids)
     check_for_mandatory_fields()
     # stop running here if any errors are found.
-    compute_differences()
+    compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids)
     update_descriptions()
     link_publications()
     update_values()