diff options
-rw-r--r-- | scripts/phenotypes_bulk_edit.py | 23 | ||||
-rw-r--r-- | uploader/publications/models.py | 25 |
2 files changed, 42 insertions, 6 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index 7de3347..1d8124e 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -14,7 +14,7 @@ import uploader.publications.pubmed as pmed from uploader.publications.misc import publications_differences from uploader.phenotypes.views import BULK_EDIT_COMMON_FIELDNAMES from uploader.publications.models import ( - create_new_publications, fetch_phenotype_publications) + update_publications, fetch_phenotype_publications) logging.basicConfig( format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s") @@ -104,15 +104,28 @@ def compute_differences( # b. Publications differences db_publications = fetch_phenotype_publications(conn, pheno_xref_ids) - new_publications = create_new_publications( - conn, pmed.fetch_publications(tuple( + + _pubmed_map = { + (int(row["PubMed_ID"]) if bool(row["PubMed_ID"]) else None): f"{row['phenotype_id']}::{row['xref_id']}" + for row in file_contents + } + _pub_id_map = { + f"{pub['PhenotypeId']}::{pub['xref_id']}": pub["PublicationId"] + for pub in db_publications + } + + new_publications = update_publications( + conn, tuple({ + **pub, "publication_id": _pub_id_map[_pubmed_map[pub["pubmed_id"]]] + } for pub in pmed.fetch_publications(tuple( pubmed_id for pubmed_id in pubmed_ids if pubmed_id not in - tuple(row["PubMed_ID"] for row in db_publications)))) + tuple(row["PubMed_ID"] for row in db_publications))))) logger.debug("New Publications: %s", new_publications) pub_diff = publications_differences( file_contents, db_publications, { - row["PubMed_ID"]: row["PublicationId"] + row["PubMed_ID" if "PubMed_ID" in row else "pubmed_id"]: row[ + "PublicationId" if "PublicationId" in row else "publication_id"] for row in db_publications + new_publications}) logger.debug("Publications diff: %s", pub_diff) # 2. Data differences diff --git a/uploader/publications/models.py b/uploader/publications/models.py index 37c1df6..3fc9542 100644 --- a/uploader/publications/models.py +++ b/uploader/publications/models.py @@ -1,7 +1,11 @@ """Module to handle persistence and retrieval of publication to/from MariaDB""" +import logging + from MySQLdb.cursors import DictCursor -from gn_libs.mysqldb import Connection +from gn_libs.mysqldb import Connection, debug_query + +logger = logging.getLogger(__name__) def fetch_phenotype_publications( @@ -48,3 +52,22 @@ def create_new_publications( **row, "PublicationId": row["Id"] } for row in cursor.fetchall()) return tuple() + + +def update_publications(conn: Connection , publications: tuple[dict, ...]) -> tuple[dict, ...]: + """Update details for multiple publications""" + if len(publications) > 0: + with conn.cursor(cursorclass=DictCursor) as cursor: + logger.debug("UPDATING PUBLICATIONS: %s", publications) + cursor.executemany( + ("UPDATE Publication SET " + "PubMed_ID=%(pubmed_id)s, Abstract=%(abstract)s, " + "Authors=%(authors)s, Title=%(title)s, Journal=%(journal)s, " + "Volume=%(volume)s, Pages=%(pages)s, Month=%(month)s, " + "Year=%(year)s " + "WHERE Id=%(publication_id)s"), + publications) + debug_query(cursor, logger) + return publications + return tuple() + return tuple() |