about summary refs log tree commit diff
path: root/scripts/compute_phenotype_means.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/compute_phenotype_means.py')
-rw-r--r--scripts/compute_phenotype_means.py101
1 files changed, 101 insertions, 0 deletions
diff --git a/scripts/compute_phenotype_means.py b/scripts/compute_phenotype_means.py
new file mode 100644
index 0000000..ef2fabc
--- /dev/null
+++ b/scripts/compute_phenotype_means.py
@@ -0,0 +1,101 @@
+"""Compute phenotype means."""
+import sys
+import logging
+from pathlib import Path
+from typing import TypeVar
+from argparse import Namespace, ArgumentParser
+
+import MySQLdb
+
+from gn_libs import mysqldb
+from uploader import setup_modules_logging
+
+from .cli_parser import add_logging_option
+from .load_phenotypes_to_db import update_means
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+        encoding="utf-8",
+        format="%(asctime)s - %(name)s - %(levelname)s — %(message)s",
+        level=logging.INFO)
+
+
+def fetch_xref_id(conn: mysqldb.Connection, population_id: int) -> tuple[int, ...]:
+    """Fetch a population's cross-reference IDs."""
+    logger.debug("Fetching the xref IDs.")
+    with conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) as cursor:
+        query = "SELECT Id FROM PublishXRef WHERE InbredSetId=%(population_id)s"
+        cursor.execute(query, {"population_id": population_id})
+        return tuple(int(row["Id"]) for row in cursor.fetchall())
+
+
+def run(args) -> int:
+    """Run the script."""
+    logger.debug("Running the script!")
+    with mysqldb.database_connection(args.db_uri) as mariadb_conn:
+        xref_ids = args.cross_ref_ids or fetch_xref_id(mariadb_conn, args.population_id)
+        if len(xref_ids):
+            update_means(mariadb_conn,
+                         args.population_id,
+                         xref_ids)
+            logger.debug("Successfully computed means for %02d phenotypes.",
+                         len(xref_ids))
+            return 0
+        _reasons = (
+            f"no population exists with the ID {args.population_id}",
+            "the population exists but it has no phenotypes linked to it yet")
+        logger.error(
+            "No cross-reference IDs to run against. Likely causes are: %s",
+            " OR ".join(_reasons) + ".")
+        return 1
+
+
+T = TypeVar("T")
+def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]:
+    """Convert val into a list of items of type 'itemstype'."""
+    return tuple(itemstype(item.strip()) for item in val.split(","))
+
+
+def comma_separated_list_of_integers(val: str) -> tuple[int, ...]:
+    """Convert 'val' into list of items of type 'int'."""
+    return comma_separated_list(val, int)
+
+
+if __name__ == "__main__":
+    def parse_args() -> Namespace:
+        """Define and parse the CLI parsers accepted by this script."""
+        parser = ArgumentParser(
+            "compute-phenotype-means",
+            description="Compute/Recompute the phenotype means.")
+        parser.add_argument("db_uri",
+                            metavar="db-uri",
+                            type=str,
+                            help="MariaDB/MySQL connection URL")
+        parser.add_argument("jobs_db_path",
+                            metavar="jobs-db-path",
+                            type=Path,
+                            help="Path to jobs' SQLite database.")
+        parser.add_argument("population_id",
+                            metavar="population-id",
+                            type=int,
+                            help=("Identifier for the InbredSet group/"
+                                  "population to run means against."))
+        ## Optional arguments
+        parser = add_logging_option(parser)
+        parser.add_argument(
+            "--cross-ref-ids",
+            type=comma_separated_list_of_integers,
+            help=("Provide cross-reference IDs to narrow the number of "
+                  "phenotypes that the means are computed against."),
+            default=[])
+
+        return parser.parse_args()
+
+    def main() -> int:
+        """compute-phenotype-means: Entry-point function."""
+        args = parse_args()
+        logger.setLevel(getattr(logging, args.log_level.upper()))
+        setup_modules_logging(logger, ("scripts.load_phenotypes_to_db",))
+        return run(args)
+
+    sys.exit(main())