about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--scripts/phenotypes_bulk_edit.py41
1 files changed, 37 insertions, 4 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 9ff5ffc..395b1bb 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -225,24 +225,57 @@ def __save_new_publications__(conn, publications, pubmed_ids) -> dict:
         return {}
 
 
+def publications_differences(conn, file_data, db_data, pubmed_ids) -> dict:
     """Compute differences in the publications."""
     logger.info("Computing differences in publications.")
+    assert len(file_data) == len(db_data), "Publication counts differ!"
     db_pubmed_ids = reduce(lambda coll, curr: coll.union(set([curr["PubMed_ID"]])),
                            db_data,
                            set([None]))
+
+    pubmedid_to_id_map = {
+        f"{row['PhenotypeId']}::{row['xref_id']}": row["PublicationId"] for row in db_data
+    }
     new_pubmed_ids = tuple(pubmed_ids.difference(db_pubmed_ids))
-    new_publications = __fetch_new_pubmed_ids__(new_pubmed_ids)
+    new_publications = __save_new_publications__(
+        conn, __fetch_new_pubmed_ids__(new_pubmed_ids), new_pubmed_ids)
+    new_pubmedid_to_id_map = {
+        row["PubMed_ID"]: new_publications.get(
+            row["PubMed_ID"], pubmedid_to_id_map[f"{row['phenotype_id']}::{row['xref_id']}"])
+        for row in file_data
+    }
 
+    return tuple(
+        item for item in ({
+            "PhenotypeId": row["phenotype_id"],
+            "xref_id": row["xref_id"],
+            "PublicationId": new_pubmedid_to_id_map.get(
+                row["PubMed_ID"], pubmedid_to_id_map[f"{row['phenotype_id']}::{row['xref_id']}"])
+        } for row in file_data)
+        if item["PublicationId"] != pubmedid_to_id_map[f"{item['PhenotypeId']}::{item['xref_id']}"])
 
-def compute_differences(conn, file_contents, pheno_ids, pheno_xref_ids, pubmed_ids) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
+
+def compute_differences(
+        conn,
+        file_contents,
+        pheno_ids,
+        pheno_xref_ids,
+        pubmed_ids
+) -> tuple[tuple[dict, ...], tuple[dict, ...], tuple[dict, ...]]:
     """Compute differences between data in DB and edited data."""
     logger.info("Computing differences.")
     # 1. Basic Phenotype data differences
     #    a. Descriptions differences
-    desc_diff = descriptions_differences(file_contents, __fetch_phenotypes__(conn, pheno_ids))
+    desc_diff = descriptions_differences(
+        file_contents, __fetch_phenotypes__(conn, pheno_ids))
     logger.debug("DESCRIPTIONS DIFFERENCES: %s", desc_diff)
     #    b. Publications differences
-    # pub_diff = publications_differences(...)
+    pub_diff = publications_differences(
+        conn,
+        file_contents,
+        __fetch_publications__(conn, pheno_xref_ids),
+        pubmed_ids)
+    logger.debug("Publications diff: %s", pub_diff)
     # 2. Data differences
     # data_diff = data_differences(...)
     pass