about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-03-25 14:22:22 -0500
committerFrederick Muriuki Muriithi2025-03-25 14:22:22 -0500
commitfa86b2d93918e6e7ed857e32bf0da0d08b927869 (patch)
tree5e9ae76390f7b0d4d7e06234edfd164310762d00 /scripts
parent03cc1b298e03879c41b44436fcc511c1a10938e8 (diff)
downloadgn-uploader-fa86b2d93918e6e7ed857e32bf0da0d08b927869.tar.gz
Initialise background script to handle bulk edits.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
new file mode 100644
index 0000000..67bf65a
--- /dev/null
+++ b/scripts/phenotypes_bulk_edit.py
@@ -0,0 +1,95 @@
+import sys
+import uuid
+import logging
+import argparse
+from pathlib import Path
+
+from gn_libs import jobs, mysqldb, sqlite3
+
+logging.basicConfig(
+    format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def check_ids():
+    """Verify that all the `UniqueIdentifier` values are valid."""
+    logger.info("Checking the 'UniqueIdentifier' values.")
+    pass
+
+
+def check_for_mandatory_fields():
+    """Verify that mandatory fields have values."""
+    pass
+
+
+def compute_differences():
+    """Compute differences between data in DB and edited data."""
+    logger.info("Computing differences.")
+    pass
+
+
+def update_descriptions():
+    """Update descriptions in the database"""
+    logger.info("Updating descriptions")
+    # Compute differences between db data and uploaded file
+    # Only run query for changed descriptions
+    pass
+
+
+def link_publications():
+    """Link phenotypes to relevant publications."""
+    logger.info("Linking phenotypes to publications.")
+    # Create publication if PubMed_ID doesn't exist in db
+    pass
+
+
+def update_values():
+    """Update the phenotype values."""
+    logger.info("Updating phenotypes values.")
+    # Compute differences between db data and uploaded file
+    # Only run query for changed data
+    pass
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        prog="Phenotypes Bulk-Edit Processor",
+        description="Process the bulk-edits to phenotype data and descriptions.")
+    parser.add_argument("db_uri", type=str, help="MariaDB/MySQL connection URL")
+    parser.add_argument(
+        "jobs_db_path", type=Path, help="Path to jobs' SQLite database.")
+    parser.add_argument("job_id", type=uuid.UUID, help="ID of the running job")
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        help="Determines what is logged out.",
+        choices=("debug", "info", "warning", "error", "critical"),
+        default="info")
+    return parser.parse_args()
+
+
+def run(conn, job):
+    """Process the data and update it."""
+    check_ids()
+    check_for_mandatory_fields()
+    # stop running here if any errors are found.
+    compute_differences()
+    update_descriptions()
+    link_publications()
+    update_values()
+    return 0
+
+
+def main():
+    """Entry-point for this script."""
+    args = parse_args()
+    logger.setLevel(args.log_level.upper())
+    logger.debug("Arguments: %s", args)
+
+    with (mysqldb.database_connection(args.db_uri) as conn,
+          sqlite3.connection(args.jobs_db_path) as jobs_conn):
+        return run(conn, jobs.job(jobs_conn, args.job_id))
+
+
+if __name__ == "__main__":
+    sys.exit(main())