about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-05-19 10:16:52 -0500
committerFrederick Muriuki Muriithi2025-05-19 10:26:29 -0500
commite6adace2f9302a01b796176b7016feb9fae3d351 (patch)
tree7ed1420bf676a528cb42b4fba31311fa2eb22e06 /scripts
parent65367196ae1fc0c7fd0fa004abefd0bd86d2684e (diff)
downloadgn-uploader-e6adace2f9302a01b796176b7016feb9fae3d351.tar.gz
Initialise function to save publications
Do a rudimentary save of the publications: this is incomplete and
probably very buggy.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py41
1 files changed, 31 insertions, 10 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 5c792f0..d48084e 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -1,21 +1,41 @@
 import uuid
+import json
 import logging
 import argparse
+from typing import Union
 from pathlib import Path
 
 from MySQLdb.cursors import Cursor, DictCursor
 
 from gn_libs import jobs, mysqldb, sqlite3
 
+from uploader.publications.models import create_new_publications
 logging.basicConfig(
     format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
 logger = logging.getLogger(__name__)
 
 
-def save_publications(cursor: Cursor, pubfiles):
+def save_publications(
+        conn: mysqldb.Connection,
+        pubfiles,
+        standalone_publication_id=Optional[int] = None
+) -> tuple[dict[str, Union[int, str]]]:
     """Read the `pubfiles` and save the publications therein."""
+    _publications = tuple()
+    if standalone_publication_id:
+        # HACK: This is a hack. Remove once we update bundle creation to include
+        #       publication(s) in the bundle
+        _publications = _publications + (
+            fetch_publication_by_id(conn, standalone_publication_id),)
+    # -> check whether the publication-id exists?
+    # -> perhaps setup the bundle with the appropriate publications
+    # -> gn-data -> (phenotypes-se, phenotypes-n)
+    # -> gn-metadata -> (pheno, geno)
+    if len(pubfiles) > 0:# TODO: check for presence of data — improve this check.
+        logger.info("Saving publications.")
+        _publications = _publication + create_new_publications(conn, pubs)
     # Check for PubMed IDs, perhaps?
-    pass
+    return _publications
 
 
 def save_phenotypes(cursor: Cursor, phenofiles):
@@ -46,6 +66,7 @@ def cross_reference_phenotypes_publications_and_data(
 
 def load_data(conn, job):
     """Load the data attached in the given job."""
+    _job_metadata = json.loads(job["job-metadata"])
     with conn.cursor(cursorclass=DictCursor) as cursor:
         # Steps
         # 0. Read data from the files: can be multiple files per type
@@ -54,7 +75,8 @@ def load_data(conn, job):
         #     -> return phenotype IDs
         _control_data = rqtl.control_data(job["job-metadata"]["bundle-file"])
         logger.info("Saving basic phenotype data.")
-        _phenos = save_phenotypes(cursor, _control_data["pheno"])
+
+        _phenos = save_phenotypes(cursor, _control_data)
         _next_data_id = fetch_next_dataid(...)
         dataidmap = {
             row["phenotype_id"]: {
@@ -65,13 +87,12 @@ def load_data(conn, job):
         }
         # 2. Save any new publications (in multi-file bundle):
         #     -> return publication IDS
-        logger.info("Saving publications.")
-        # -> check whether the publication-id exists?
-        # -> perhaps setup the bundle with the appropriate publications
-        # -> gn-data -> (phenotypes-se, phenotypes-n)
-        # -> gn-metadata -> (pheno, geno)
-        publication = save_publications(
-            cursor, _control_data.get("metadata", {}).get("publications"))
+        publications = publications + save_publications(
+            cursor,
+            _control_data.get(
+                "metadata", {}).get(
+                    "publications"),
+            _job_metadata.get("publicationid"))
         _pubidmap = {
             # TODO: Map the pheno ids to the publication ids
         }