about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/cli_parser.py24
-rw-r--r--scripts/compute_phenotype_means.py101
-rw-r--r--scripts/load_phenotypes_to_db.py2
3 files changed, 119 insertions, 8 deletions
diff --git a/scripts/cli_parser.py b/scripts/cli_parser.py
index 0c91c5e..bf39731 100644
--- a/scripts/cli_parser.py
+++ b/scripts/cli_parser.py
@@ -3,6 +3,20 @@ from uuid import UUID
 from typing import Optional
 from argparse import ArgumentParser
 
+
+def add_logging_option(parser: ArgumentParser) -> ArgumentParser:
+    """Add optional log-level option"""
+    parser.add_argument(
+        "--log-level",
+        "--loglevel",
+        type=str,
+        default="INFO",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL",
+                 "debug", "info", "warning", "error", "critical"],
+        help="The severity of events to track with the logger.")
+    return parser
+
+
 def init_cli_parser(program: str, description: Optional[str] = None) -> ArgumentParser:
     """Initialise the CLI arguments parser."""
     parser = ArgumentParser(prog=program, description=description)
@@ -19,14 +33,8 @@ def init_cli_parser(program: str, description: Optional[str] = None) -> Argument
         type=int,
         default=86400,
         help="How long to keep any redis keys around.")
-    parser.add_argument(
-        "--loglevel",
-        type=str,
-        default="INFO",
-        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL",
-                 "debug", "info", "warning", "error", "critical"],
-        help="The severity of events to track with the logger.")
-    return parser
+    return add_logging_option(parser)
+
 
 def add_global_data_arguments(parser: ArgumentParser) -> ArgumentParser:
     """Add the global (present in nearly ALL scripts) CLI arguments."""
diff --git a/scripts/compute_phenotype_means.py b/scripts/compute_phenotype_means.py
new file mode 100644
index 0000000..ef2fabc
--- /dev/null
+++ b/scripts/compute_phenotype_means.py
@@ -0,0 +1,101 @@
+"""Compute phenotype means."""
+import sys
+import logging
+from pathlib import Path
+from typing import TypeVar
+from argparse import Namespace, ArgumentParser
+
+import MySQLdb
+
+from gn_libs import mysqldb
+from uploader import setup_modules_logging
+
+from .cli_parser import add_logging_option
+from .load_phenotypes_to_db import update_means
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+        encoding="utf-8",
+        format="%(asctime)s - %(name)s - %(levelname)s — %(message)s",
+        level=logging.INFO)
+
+
+def fetch_xref_id(conn: mysqldb.Connection, population_id: int) -> tuple[int, ...]:
+    """Fetch a population's cross-reference IDs."""
+    logger.debug("Fetching the xref IDs.")
+    with conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) as cursor:
+        query = "SELECT Id FROM PublishXRef WHERE InbredSetId=%(population_id)s"
+        cursor.execute(query, {"population_id": population_id})
+        return tuple(int(row["Id"]) for row in cursor.fetchall())
+
+
+def run(args) -> int:
+    """Run the script."""
+    logger.debug("Running the script!")
+    with mysqldb.database_connection(args.db_uri) as mariadb_conn:
+        xref_ids = args.cross_ref_ids or fetch_xref_id(mariadb_conn, args.population_id)
+        if len(xref_ids):
+            update_means(mariadb_conn,
+                         args.population_id,
+                         xref_ids)
+            logger.debug("Successfully computed means for %02d phenotypes.",
+                         len(xref_ids))
+            return 0
+        _reasons = (
+            f"no population exists with the ID {args.population_id}",
+            "the population exists but it has no phenotypes linked to it yet")
+        logger.error(
+            "No cross-reference IDs to run against. Likely causes are: %s",
+            " OR ".join(_reasons) + ".")
+        return 1
+
+
+T = TypeVar("T")
+def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]:
+    """Convert val into a list of items of type 'itemstype'."""
+    return tuple(itemstype(item.strip()) for item in val.split(","))
+
+
+def comma_separated_list_of_integers(val: str) -> tuple[int, ...]:
+    """Convert 'val' into list of items of type 'int'."""
+    return comma_separated_list(val, int)
+
+
+if __name__ == "__main__":
+    def parse_args() -> Namespace:
+        """Define and parse the CLI parsers accepted by this script."""
+        parser = ArgumentParser(
+            "compute-phenotype-means",
+            description="Compute/Recompute the phenotype means.")
+        parser.add_argument("db_uri",
+                            metavar="db-uri",
+                            type=str,
+                            help="MariaDB/MySQL connection URL")
+        parser.add_argument("jobs_db_path",
+                            metavar="jobs-db-path",
+                            type=Path,
+                            help="Path to jobs' SQLite database.")
+        parser.add_argument("population_id",
+                            metavar="population-id",
+                            type=int,
+                            help=("Identifier for the InbredSet group/"
+                                  "population to run means against."))
+        ## Optional arguments
+        parser = add_logging_option(parser)
+        parser.add_argument(
+            "--cross-ref-ids",
+            type=comma_separated_list_of_integers,
+            help=("Provide cross-reference IDs to narrow the number of "
+                  "phenotypes that the means are computed against."),
+            default=[])
+
+        return parser.parse_args()
+
+    def main() -> int:
+        """compute-phenotype-means: Entry-point function."""
+        args = parse_args()
+        logger.setLevel(getattr(logging, args.log_level.upper()))
+        setup_modules_logging(logger, ("scripts.load_phenotypes_to_db",))
+        return run(args)
+
+    sys.exit(main())
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 9158307..e449b82 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -414,6 +414,7 @@ def update_means(
         xref_ids: tuple[int, ...]
 ):
     """Compute the means from the data and update them in the database."""
+    logger.info("Computing means for %02d phenotypes.", len(xref_ids))
     query = (
         "UPDATE PublishXRef SET mean = "
         "(SELECT AVG(value) FROM PublishData"
@@ -426,6 +427,7 @@ def update_means(
             batch = take(_xref_iterator, 10000)
             if len(batch) == 0:
                 break
+            logger.info("\tComputing means for batch of %02d phenotypes.", len(batch))
             cursor.executemany(
                 query,
                 tuple({