aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-04-09 16:59:47 -0500
committerFrederick Muriuki Muriithi2025-04-09 16:59:47 -0500
commitd45ea3bbd7185b0867ea6ea0695015ea9c441f7e (patch)
tree6e053c0b54f8106d097c285f50301a4a2ffceff9 /scripts
parent8b4be691fa9e5574b3d9151ca0a31921f72fff33 (diff)
downloadgn-uploader-d45ea3bbd7185b0867ea6ea0695015ea9c441f7e.tar.gz
Save new publications retrieved from NCBI's PubMed database.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py32
1 files changed, 31 insertions, 1 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index ebb0241..9ff5ffc 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -194,7 +194,37 @@ def __fetch_new_pubmed_ids__(pubmed_ids):
return tuple()
-def publications_differences(file_data, db_data, pubmed_ids) -> dict:
+def __save_new_publications__(conn, publications, pubmed_ids) -> dict:
+ if len(publications) > 0:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.executemany(
+ ("INSERT INTO "
+ "Publication( "
+ "PubMed_ID, Abstract, Authors, Title, Journal, Volume, Pages, "
+ "Month, Year"
+ ") "
+ "VALUES("
+ "%(pubmed_id)s, %(abstract)s, %(authors)s, %(title)s, "
+ "%(journal)s, %(volume)s, %(pages)s, %(month)s, %(year)s"
+ ") "
+ "ON DUPLICATE KEY UPDATE "
+ "Abstract=VALUES(Abstract), Authors=VALUES(Authors), "
+ "Title=VALUES(Title), Journal=VALUES(Journal), "
+ "Volume=VALUES(Volume), Pages=VALUES(pages), "
+ "Month=VALUES(Month), Year=VALUES(Year)"),
+ publications)
+
+ paramstr = ", ".join(["%s"] * len(pubmed_ids))
+ cursor.execute(
+ ("SELECT Id, PubMed_ID FROM Publication "
+ f"WHERE PubMed_ID IN ({paramstr})"),
+ pubmed_ids)
+ return {
+ row["PubMed_ID"]: row["Id"] for row in cursor.fetchall()
+ }
+ return {}
+
+
"""Compute differences in the publications."""
logger.info("Computing differences in publications.")
db_pubmed_ids = reduce(lambda coll, curr: coll.union(set([curr["PubMed_ID"]])),