about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py42
1 files changed, 8 insertions, 34 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 20fa66b..6d277e5 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -13,6 +13,7 @@ from MySQLdb.cursors import DictCursor
 from gn_libs import jobs, mysqldb, sqlite3
 
 import uploader.publications.pubmed as pmed
+from uploader.publications.misc import publications_differences
 from uploader.publications.models import fetch_phenotype_publications
 logging.basicConfig(
     format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
@@ -85,36 +86,6 @@ def descriptions_differences(file_data, db_data) -> dict[str, str]:
     return diff
 
 
-def publications_differences(conn, file_data, db_data, pubmed_ids) -> dict:
-    """Compute differences in the publications."""
-    logger.info("Computing differences in publications.")
-    assert len(file_data) == len(db_data), "Publication counts differ!"
-    db_pubmed_ids = reduce(lambda coll, curr: coll.union(set([curr["PubMed_ID"]])),
-                           db_data,
-                           set([None]))
-
-    pubmedid_to_id_map = {
-        f"{row['PhenotypeId']}::{row['xref_id']}": row["PublicationId"] for row in db_data
-    }
-    new_pubmed_ids = tuple(pubmed_ids.difference(db_pubmed_ids))
-    new_publications = __save_new_publications__(
-        conn, pmed.fetch_publications(new_pubmed_ids), new_pubmed_ids)
-    new_pubmedid_to_id_map = {
-        row["PubMed_ID"]: new_publications.get(
-            row["PubMed_ID"], pubmedid_to_id_map[f"{row['phenotype_id']}::{row['xref_id']}"])
-        for row in file_data
-    }
-
-    return tuple(
-        item for item in ({
-            "PhenotypeId": row["phenotype_id"],
-            "xref_id": row["xref_id"],
-            "PublicationId": new_pubmedid_to_id_map.get(
-                row["PubMed_ID"], pubmedid_to_id_map[f"{row['phenotype_id']}::{row['xref_id']}"])
-        } for row in file_data)
-        if item["PublicationId"] != pubmedid_to_id_map[f"{item['PhenotypeId']}::{item['xref_id']}"])
-
-
 def compute_differences(
         conn,
         file_contents,
@@ -129,12 +100,15 @@ def compute_differences(
     desc_diff = descriptions_differences(
         file_contents, __fetch_phenotypes__(conn, pheno_ids))
     logger.debug("DESCRIPTIONS DIFFERENCES: %s", desc_diff)
+
     #    b. Publications differences
+    db_publications = fetch_phenotype_publications(conn, pheno_xref_ids)
+    new_publications = create_new_publications(
+        conn, pmed.fetch_publications(new_pubmed_ids))
     pub_diff = publications_differences(
-        conn,
-        file_contents,
-        fetch_phenotype_publications(conn, pheno_xref_ids),
-        pubmed_ids)
+        file_contents, db_publications, {
+            row["PubMed_ID"]: row["PublicationId"]
+            for row in db_publications + new_publications})
     logger.debug("Publications diff: %s", pub_diff)
     # 2. Data differences
     # data_diff = data_differences(...)