about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-04-11 14:59:49 -0500
committerFrederick Muriuki Muriithi2025-04-11 15:18:53 -0500
commit5eddcada30e8d6bbc4631103ae3fd96bdc6b134b (patch)
tree07e9c5a91de11dd37f7787bb1793a8277bb98aff
parent70ecf94d8629d75dfa6ef295ddd18c0f4aa622f8 (diff)
downloadgn-uploader-5eddcada30e8d6bbc4631103ae3fd96bdc6b134b.tar.gz
Move code to fetch phenotype publications from DB to publications package.
-rw-r--r--scripts/phenotypes_bulk_edit.py3
-rw-r--r--uploader/publications/models.py15
2 files changed, 17 insertions, 1 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 488805c..4888924 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -13,6 +13,7 @@ from MySQLdb.cursors import DictCursor
 from gn_libs import jobs, mysqldb, sqlite3
 
 import uploader.publications.pubmed as pmed
+from uploader.publications.models import fetch_phenotype_publications
 logging.basicConfig(
     format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
 logger = logging.getLogger(__name__)
@@ -163,7 +164,7 @@ def compute_differences(
     pub_diff = publications_differences(
         conn,
         file_contents,
-        __fetch_publications__(conn, pheno_xref_ids),
+        fetch_phenotype_publications(conn, pheno_xref_ids),
         pubmed_ids)
     logger.debug("Publications diff: %s", pub_diff)
     # 2. Data differences
diff --git a/uploader/publications/models.py b/uploader/publications/models.py
new file mode 100644
index 0000000..89da06c
--- /dev/null
+++ b/uploader/publications/models.py
@@ -0,0 +1,15 @@
+"""Module to handle persistence and retrieval of publication to/from MariaDB"""
+
+def fetch_phenotype_publications(
+        conn, ids: tuple[tuple[int, int], ...]) -> tuple[dict, ...]:
+    """Fetch publication from database by ID."""
+    paramstr = ",".join(["(%s, %s)"] * len(ids))
+    query = (
+        "SELECT "
+        "pxr.PhenotypeId, pxr.Id AS xref_id, pxr.PublicationId, pub.PubMed_ID "
+        "FROM PublishXRef AS pxr INNER JOIN Publication AS pub "
+        "ON pxr.PublicationId=pub.Id "
+        f"WHERE (pxr.PhenotypeId, pxr.Id) IN ({paramstr})")
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute(query, tuple(item for row in ids for item in row))
+        return tuple(dict(row) for row in cursor.fetchall())