about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--scripts/phenotypes_bulk_edit.py23
-rw-r--r--uploader/publications/models.py25
2 files changed, 42 insertions, 6 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 7de3347..1d8124e 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -14,7 +14,7 @@ import uploader.publications.pubmed as pmed
 from uploader.publications.misc import publications_differences
 from uploader.phenotypes.views import BULK_EDIT_COMMON_FIELDNAMES
 from uploader.publications.models import (
-    create_new_publications, fetch_phenotype_publications)
+    update_publications, fetch_phenotype_publications)
 
 logging.basicConfig(
     format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
@@ -104,15 +104,28 @@ def compute_differences(
 
     #    b. Publications differences
     db_publications = fetch_phenotype_publications(conn, pheno_xref_ids)
-    new_publications = create_new_publications(
-        conn, pmed.fetch_publications(tuple(
+
+    _pubmed_map = {
+        (int(row["PubMed_ID"]) if bool(row["PubMed_ID"]) else None): f"{row['phenotype_id']}::{row['xref_id']}"
+        for row in file_contents
+    }
+    _pub_id_map = {
+        f"{pub['PhenotypeId']}::{pub['xref_id']}": pub["PublicationId"]
+        for pub in db_publications
+    }
+
+    new_publications = update_publications(
+        conn, tuple({
+            **pub, "publication_id": _pub_id_map[_pubmed_map[pub["pubmed_id"]]]
+        } for pub in pmed.fetch_publications(tuple(
             pubmed_id for pubmed_id in pubmed_ids
             if pubmed_id not in
-            tuple(row["PubMed_ID"] for row in db_publications))))
+            tuple(row["PubMed_ID"] for row in db_publications)))))
     logger.debug("New Publications: %s", new_publications)
     pub_diff = publications_differences(
         file_contents, db_publications, {
-            row["PubMed_ID"]: row["PublicationId"]
+            row["PubMed_ID" if "PubMed_ID" in row else "pubmed_id"]: row[
+                "PublicationId" if "PublicationId" in row else "publication_id"]
             for row in db_publications + new_publications})
     logger.debug("Publications diff: %s", pub_diff)
     # 2. Data differences
diff --git a/uploader/publications/models.py b/uploader/publications/models.py
index 37c1df6..3fc9542 100644
--- a/uploader/publications/models.py
+++ b/uploader/publications/models.py
@@ -1,7 +1,11 @@
 """Module to handle persistence and retrieval of publication to/from MariaDB"""
+import logging
+
 from MySQLdb.cursors import DictCursor
 
-from gn_libs.mysqldb import Connection
+from gn_libs.mysqldb import Connection, debug_query
+
+logger = logging.getLogger(__name__)
 
 
 def fetch_phenotype_publications(
@@ -48,3 +52,22 @@ def create_new_publications(
                 **row, "PublicationId": row["Id"]
             } for row in cursor.fetchall())
         return tuple()
+
+
+def update_publications(conn: Connection , publications: tuple[dict, ...]) -> tuple[dict, ...]:
+    """Update details for multiple publications"""
+    if len(publications) > 0:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            logger.debug("UPDATING PUBLICATIONS: %s", publications)
+            cursor.executemany(
+                ("UPDATE Publication SET "
+                 "PubMed_ID=%(pubmed_id)s, Abstract=%(abstract)s, "
+                 "Authors=%(authors)s, Title=%(title)s, Journal=%(journal)s, "
+                 "Volume=%(volume)s, Pages=%(pages)s, Month=%(month)s, "
+                 "Year=%(year)s "
+                 "WHERE Id=%(publication_id)s"),
+                publications)
+            debug_query(cursor, logger)
+            return publications
+        return tuple()
+    return tuple()