aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-03-26 16:01:08 -0500
committerFrederick Muriuki Muriithi2025-03-26 16:01:31 -0500
commitaec3fdf6dc62b3976163850e5857d5e4a0544b98 (patch)
treee7b209f6ee187c33a56654f880bd29807be44449 /scripts
parentee9c2e021759e967e9a257843579519e2fd2286e (diff)
downloadgn-uploader-aec3fdf6dc62b3976163850e5857d5e4a0544b98.tar.gz
Partial implementation: Fetch publications from NCBI.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 72a901a..175282e 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -6,6 +6,7 @@ from pathlib import Path
from typing import Iterator
from functools import reduce
+import requests
from MySQLdb.cursors import DictCursor
from gn_libs import jobs, mysqldb, sqlite3
@@ -96,6 +97,44 @@ def __fetch_publications__(conn, ids):
return tuple(dict(row) for row in cursor.fetchall())
+def __process_pubmed_publication_data__(text):
+ """Process the data from PubMed into usable data."""
+ # Process with lxml
+ pass
+
+
+def __fetch_new_pubmed_ids__(pubmed_ids):
+ """Retrieve data on new publications from NCBI."""
+ # See whether we can retrieve multiple publications in one go
+ # Parse data and save to DB
+ # Return PublicationId(s) for new publication(s).
+ logger.info("Fetching publications data for the following PubMed IDs: %s",
+ ", ".join(pubmed_ids))
+
+ # Should we, perhaps, pass this in from a config variable?
+ uri = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
+ try:
+ response = request.get(
+ uri,
+ params={
+ "db": "pubmed",
+ "retmode": "xml",
+ "id": ",".join(str(item) for item in pubmed_ids)
+ })
+
+ if response.status_code == 200:
+ return __process_pubmed_publication_data__(response.text)
+
+ logger.error(
+ "Could not fetch the new publication from %s (status code: %s)",
+ uri,
+ response.status_code)
+ except requests.exceptions.ConnectionError:
+ logger.error("Could not find the domain %s", uri)
+
+ return tuple()
+
+
"""Compute differences between data in DB and edited data."""
logger.info("Computing differences.")
# 1. Basic Phenotype data differences