From d45ea3bbd7185b0867ea6ea0695015ea9c441f7e Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 9 Apr 2025 16:59:47 -0500 Subject: Save new publications retrieved from NCBI's PubMed database. --- scripts/phenotypes_bulk_edit.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index ebb0241..9ff5ffc 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -194,7 +194,37 @@ def __fetch_new_pubmed_ids__(pubmed_ids): return tuple() -def publications_differences(file_data, db_data, pubmed_ids) -> dict: +def __save_new_publications__(conn, publications, pubmed_ids) -> dict: + if len(publications) > 0: + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.executemany( + ("INSERT INTO " + "Publication( " + "PubMed_ID, Abstract, Authors, Title, Journal, Volume, Pages, " + "Month, Year" + ") " + "VALUES(" + "%(pubmed_id)s, %(abstract)s, %(authors)s, %(title)s, " + "%(journal)s, %(volume)s, %(pages)s, %(month)s, %(year)s" + ") " + "ON DUPLICATE KEY UPDATE " + "Abstract=VALUES(Abstract), Authors=VALUES(Authors), " + "Title=VALUES(Title), Journal=VALUES(Journal), " + "Volume=VALUES(Volume), Pages=VALUES(pages), " + "Month=VALUES(Month), Year=VALUES(Year)"), + publications) + + paramstr = ", ".join(["%s"] * len(pubmed_ids)) + cursor.execute( + ("SELECT Id, PubMed_ID FROM Publication " + f"WHERE PubMed_ID IN ({paramstr})"), + pubmed_ids) + return { + row["PubMed_ID"]: row["Id"] for row in cursor.fetchall() + } + return {} + + """Compute differences in the publications.""" logger.info("Computing differences in publications.") db_pubmed_ids = reduce(lambda coll, curr: coll.union(set([curr["PubMed_ID"]])), -- cgit v1.2.3