aboutsummaryrefslogtreecommitdiff
path: root/scripts/phenotypes_bulk_edit.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-04-14 12:33:27 -0500
committerFrederick Muriuki Muriithi2025-04-14 12:33:27 -0500
commit49f1b2fa06da54d2f839ef6d4ca3061b92883417 (patch)
tree5125af311d32dcee37cf601f39eee0b528db1861 /scripts/phenotypes_bulk_edit.py
parent386e3e70de5bad8db03b83d91f08fcfb3faa1d05 (diff)
downloadgn-uploader-49f1b2fa06da54d2f839ef6d4ca3061b92883417.tar.gz
Update existing linked publication(s) rather than creating new.
Diffstat (limited to 'scripts/phenotypes_bulk_edit.py')
-rw-r--r--scripts/phenotypes_bulk_edit.py23
1 files changed, 18 insertions, 5 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 7de3347..1d8124e 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -14,7 +14,7 @@ import uploader.publications.pubmed as pmed
from uploader.publications.misc import publications_differences
from uploader.phenotypes.views import BULK_EDIT_COMMON_FIELDNAMES
from uploader.publications.models import (
- create_new_publications, fetch_phenotype_publications)
+ update_publications, fetch_phenotype_publications)
logging.basicConfig(
format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
@@ -104,15 +104,28 @@ def compute_differences(
# b. Publications differences
db_publications = fetch_phenotype_publications(conn, pheno_xref_ids)
- new_publications = create_new_publications(
- conn, pmed.fetch_publications(tuple(
+
+ _pubmed_map = {
+ (int(row["PubMed_ID"]) if bool(row["PubMed_ID"]) else None): f"{row['phenotype_id']}::{row['xref_id']}"
+ for row in file_contents
+ }
+ _pub_id_map = {
+ f"{pub['PhenotypeId']}::{pub['xref_id']}": pub["PublicationId"]
+ for pub in db_publications
+ }
+
+ new_publications = update_publications(
+ conn, tuple({
+ **pub, "publication_id": _pub_id_map[_pubmed_map[pub["pubmed_id"]]]
+ } for pub in pmed.fetch_publications(tuple(
pubmed_id for pubmed_id in pubmed_ids
if pubmed_id not in
- tuple(row["PubMed_ID"] for row in db_publications))))
+ tuple(row["PubMed_ID"] for row in db_publications)))))
logger.debug("New Publications: %s", new_publications)
pub_diff = publications_differences(
file_contents, db_publications, {
- row["PubMed_ID"]: row["PublicationId"]
+ row["PubMed_ID" if "PubMed_ID" in row else "pubmed_id"]: row[
+ "PublicationId" if "PublicationId" in row else "publication_id"]
for row in db_publications + new_publications})
logger.debug("Publications diff: %s", pub_diff)
# 2. Data differences