about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-04-09 16:59:47 -0500
committerFrederick Muriuki Muriithi2025-04-09 16:59:47 -0500
commitd45ea3bbd7185b0867ea6ea0695015ea9c441f7e (patch)
tree6e053c0b54f8106d097c285f50301a4a2ffceff9 /scripts
parent8b4be691fa9e5574b3d9151ca0a31921f72fff33 (diff)
downloadgn-uploader-d45ea3bbd7185b0867ea6ea0695015ea9c441f7e.tar.gz
Save new publications retrieved from NCBI's PubMed database.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py32
1 files changed, 31 insertions, 1 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index ebb0241..9ff5ffc 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -194,7 +194,37 @@ def __fetch_new_pubmed_ids__(pubmed_ids):
     return tuple()
 
 
-def publications_differences(file_data, db_data, pubmed_ids) -> dict:
+def __save_new_publications__(conn, publications, pubmed_ids) -> dict:
+    if len(publications) > 0:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.executemany(
+                ("INSERT INTO "
+                 "Publication( "
+                 "PubMed_ID, Abstract, Authors, Title, Journal, Volume, Pages, "
+                 "Month, Year"
+                 ") "
+                 "VALUES("
+                 "%(pubmed_id)s, %(abstract)s, %(authors)s, %(title)s, "
+                 "%(journal)s, %(volume)s, %(pages)s, %(month)s, %(year)s"
+                 ") "
+                 "ON DUPLICATE KEY UPDATE "
+                 "Abstract=VALUES(Abstract), Authors=VALUES(Authors), "
+                 "Title=VALUES(Title), Journal=VALUES(Journal), "
+                 "Volume=VALUES(Volume), Pages=VALUES(pages), "
+                 "Month=VALUES(Month), Year=VALUES(Year)"),
+                publications)
+
+            paramstr = ", ".join(["%s"] * len(pubmed_ids))
+            cursor.execute(
+                ("SELECT Id, PubMed_ID FROM Publication "
+                 f"WHERE PubMed_ID IN ({paramstr})"),
+                pubmed_ids)
+            return {
+                row["PubMed_ID"]: row["Id"] for row in cursor.fetchall()
+            }
+        return {}
+
+
     """Compute differences in the publications."""
     logger.info("Computing differences in publications.")
     db_pubmed_ids = reduce(lambda coll, curr: coll.union(set([curr["PubMed_ID"]])),