about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-03-26 16:01:08 -0500
committerFrederick Muriuki Muriithi2025-03-26 16:01:31 -0500
commitaec3fdf6dc62b3976163850e5857d5e4a0544b98 (patch)
treee7b209f6ee187c33a56654f880bd29807be44449 /scripts
parentee9c2e021759e967e9a257843579519e2fd2286e (diff)
downloadgn-uploader-aec3fdf6dc62b3976163850e5857d5e4a0544b98.tar.gz
Partial implementation: Fetch publications from NCBI.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 72a901a..175282e 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -6,6 +6,7 @@ from pathlib import Path
 from typing import Iterator
 from functools import reduce
 
+import requests
 from MySQLdb.cursors import DictCursor
 
 from gn_libs import jobs, mysqldb, sqlite3
@@ -96,6 +97,44 @@ def __fetch_publications__(conn, ids):
         return tuple(dict(row) for row in cursor.fetchall())
 
 
+def __process_pubmed_publication_data__(text):
+    """Process the data from PubMed into usable data."""
+    # Process with lxml
+    pass
+
+
+def __fetch_new_pubmed_ids__(pubmed_ids):
+    """Retrieve data on new publications from NCBI."""
+    # See whether we can retrieve multiple publications in one go
+    # Parse data and save to DB
+    # Return PublicationId(s) for new publication(s).
+    logger.info("Fetching publications data for the following PubMed IDs: %s",
+                ", ".join(pubmed_ids))
+
+    # Should we, perhaps, pass this in from a config variable?
+    uri = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
+    try:
+        response = request.get(
+            uri,
+            params={
+                "db": "pubmed",
+                "retmode": "xml",
+                "id": ",".join(str(item) for item in pubmed_ids)
+            })
+
+        if response.status_code == 200:
+            return __process_pubmed_publication_data__(response.text)
+
+        logger.error(
+            "Could not fetch the new publication from %s (status code: %s)",
+            uri,
+            response.status_code)
+    except requests.exceptions.ConnectionError:
+        logger.error("Could not find the domain %s", uri)
+
+    return tuple()
+
+
     """Compute differences between data in DB and edited data."""
     logger.info("Computing differences.")
     # 1. Basic Phenotype data differences