diff options
author | Frederick Muriuki Muriithi | 2025-03-26 16:01:08 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-03-26 16:01:31 -0500 |
commit | aec3fdf6dc62b3976163850e5857d5e4a0544b98 (patch) | |
tree | e7b209f6ee187c33a56654f880bd29807be44449 /scripts | |
parent | ee9c2e021759e967e9a257843579519e2fd2286e (diff) | |
download | gn-uploader-aec3fdf6dc62b3976163850e5857d5e4a0544b98.tar.gz |
Partial implementation: Fetch publications from NCBI.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/phenotypes_bulk_edit.py | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index 72a901a..175282e 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import Iterator from functools import reduce +import requests from MySQLdb.cursors import DictCursor from gn_libs import jobs, mysqldb, sqlite3 @@ -96,6 +97,44 @@ def __fetch_publications__(conn, ids): return tuple(dict(row) for row in cursor.fetchall()) +def __process_pubmed_publication_data__(text): + """Process the data from PubMed into usable data.""" + # Process with lxml + pass + + +def __fetch_new_pubmed_ids__(pubmed_ids): + """Retrieve data on new publications from NCBI.""" + # See whether we can retrieve multiple publications in one go + # Parse data and save to DB + # Return PublicationId(s) for new publication(s). + logger.info("Fetching publications data for the following PubMed IDs: %s", + ", ".join(pubmed_ids)) + + # Should we, perhaps, pass this in from a config variable? + uri = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" + try: + response = request.get( + uri, + params={ + "db": "pubmed", + "retmode": "xml", + "id": ",".join(str(item) for item in pubmed_ids) + }) + + if response.status_code == 200: + return __process_pubmed_publication_data__(response.text) + + logger.error( + "Could not fetch the new publication from %s (status code: %s)", + uri, + response.status_code) + except requests.exceptions.ConnectionError: + logger.error("Could not find the domain %s", uri) + + return tuple() + + """Compute differences between data in DB and edited data.""" logger.info("Computing differences.") # 1. Basic Phenotype data differences |