Convert PubMed_ID value from file to int

author: Frederick Muriuki Muriithi 2025-04-09 17:02:57 -0500
committer: Frederick Muriuki Muriithi 2025-04-09 17:02:57 -0500
commit: dd2b36c5a3427c58df0cee332bd4661a3ceb0b4d (patch)
tree: 12713e5a661982ef9712bda16e82251d483f07c2 /scripts
parent: b1b154b3d7cb146e6d9862ca5df622738e61654d (diff)
download: gn-uploader-dd2b36c5a3427c58df0cee332bd4661a3ceb0b4d.tar.gz
1 files changed, 4 insertions, 1 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 395b1bb..07104a5 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -168,7 +168,7 @@ def __fetch_new_pubmed_ids__(pubmed_ids):
         return tuple()
 
     logger.info("Fetching publications data for the following PubMed IDs: %s",
-                ", ".join(pubmed_ids))
+                ", ".join((str(pid) for pid in pubmed_ids)))
 
     # Should we, perhaps, pass this in from a config variable?
     uri = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
@@ -343,6 +343,9 @@ def read_file(filepath: Path) -> Iterator[str]:
             _pheno, _xref = _dict.pop("UniqueIdentifier").split("::")
             _dict["phenotype_id"] = int(_pheno.split(":")[1])
             _dict["xref_id"] = int(_xref.split(":")[1])
+            if _dict["PubMed_ID"] is not None:
+                _dict["PubMed_ID"] = int(_dict["PubMed_ID"])
+
             yield _dict
             count = count + 1
author	Frederick Muriuki Muriithi	2025-04-09 17:02:57 -0500
committer	Frederick Muriuki Muriithi	2025-04-09 17:02:57 -0500
commit	dd2b36c5a3427c58df0cee332bd4661a3ceb0b4d (patch)
tree	12713e5a661982ef9712bda16e82251d483f07c2 /scripts
parent	b1b154b3d7cb146e6d9862ca5df622738e61654d (diff)
download	gn-uploader-dd2b36c5a3427c58df0cee332bd4661a3ceb0b4d.tar.gz