aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-03-25 14:22:22 -0500
committerFrederick Muriuki Muriithi2025-03-25 14:22:22 -0500
commitfa86b2d93918e6e7ed857e32bf0da0d08b927869 (patch)
tree5e9ae76390f7b0d4d7e06234edfd164310762d00 /scripts
parent03cc1b298e03879c41b44436fcc511c1a10938e8 (diff)
downloadgn-uploader-fa86b2d93918e6e7ed857e32bf0da0d08b927869.tar.gz
Initialise background script to handle bulk edits.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
new file mode 100644
index 0000000..67bf65a
--- /dev/null
+++ b/scripts/phenotypes_bulk_edit.py
@@ -0,0 +1,95 @@
+import sys
+import uuid
+import logging
+import argparse
+from pathlib import Path
+
+from gn_libs import jobs, mysqldb, sqlite3
+
+logging.basicConfig(
+ format="%(asctime)s — %(filename)s:%(lineno)s — %(levelname)s: %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def check_ids():
+ """Verify that all the `UniqueIdentifier` values are valid."""
+ logger.info("Checking the 'UniqueIdentifier' values.")
+ pass
+
+
+def check_for_mandatory_fields():
+ """Verify that mandatory fields have values."""
+ pass
+
+
+def compute_differences():
+ """Compute differences between data in DB and edited data."""
+ logger.info("Computing differences.")
+ pass
+
+
+def update_descriptions():
+ """Update descriptions in the database"""
+ logger.info("Updating descriptions")
+ # Compute differences between db data and uploaded file
+ # Only run query for changed descriptions
+ pass
+
+
+def link_publications():
+ """Link phenotypes to relevant publications."""
+ logger.info("Linking phenotypes to publications.")
+ # Create publication if PubMed_ID doesn't exist in db
+ pass
+
+
+def update_values():
+ """Update the phenotype values."""
+ logger.info("Updating phenotypes values.")
+ # Compute differences between db data and uploaded file
+ # Only run query for changed data
+ pass
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ prog="Phenotypes Bulk-Edit Processor",
+ description="Process the bulk-edits to phenotype data and descriptions.")
+ parser.add_argument("db_uri", type=str, help="MariaDB/MySQL connection URL")
+ parser.add_argument(
+ "jobs_db_path", type=Path, help="Path to jobs' SQLite database.")
+ parser.add_argument("job_id", type=uuid.UUID, help="ID of the running job")
+ parser.add_argument(
+ "--log-level",
+ type=str,
+ help="Determines what is logged out.",
+ choices=("debug", "info", "warning", "error", "critical"),
+ default="info")
+ return parser.parse_args()
+
+
+def run(conn, job):
+ """Process the data and update it."""
+ check_ids()
+ check_for_mandatory_fields()
+ # stop running here if any errors are found.
+ compute_differences()
+ update_descriptions()
+ link_publications()
+ update_values()
+ return 0
+
+
+def main():
+ """Entry-point for this script."""
+ args = parse_args()
+ logger.setLevel(args.log_level.upper())
+ logger.debug("Arguments: %s", args)
+
+ with (mysqldb.database_connection(args.db_uri) as conn,
+ sqlite3.connection(args.jobs_db_path) as jobs_conn):
+ return run(conn, jobs.job(jobs_conn, args.job_id))
+
+
+if __name__ == "__main__":
+ sys.exit(main())