diff options
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/cli_parser.py | 24 | ||||
| -rw-r--r-- | scripts/compute_phenotype_means.py | 101 | ||||
| -rw-r--r-- | scripts/load_phenotypes_to_db.py | 2 |
3 files changed, 119 insertions, 8 deletions
diff --git a/scripts/cli_parser.py b/scripts/cli_parser.py index 0c91c5e..bf39731 100644 --- a/scripts/cli_parser.py +++ b/scripts/cli_parser.py @@ -3,6 +3,20 @@ from uuid import UUID from typing import Optional from argparse import ArgumentParser + +def add_logging_option(parser: ArgumentParser) -> ArgumentParser: + """Add optional log-level option""" + parser.add_argument( + "--log-level", + "--loglevel", + type=str, + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", + "debug", "info", "warning", "error", "critical"], + help="The severity of events to track with the logger.") + return parser + + def init_cli_parser(program: str, description: Optional[str] = None) -> ArgumentParser: """Initialise the CLI arguments parser.""" parser = ArgumentParser(prog=program, description=description) @@ -19,14 +33,8 @@ def init_cli_parser(program: str, description: Optional[str] = None) -> Argument type=int, default=86400, help="How long to keep any redis keys around.") - parser.add_argument( - "--loglevel", - type=str, - default="INFO", - choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", - "debug", "info", "warning", "error", "critical"], - help="The severity of events to track with the logger.") - return parser + return add_logging_option(parser) + def add_global_data_arguments(parser: ArgumentParser) -> ArgumentParser: """Add the global (present in nearly ALL scripts) CLI arguments.""" diff --git a/scripts/compute_phenotype_means.py b/scripts/compute_phenotype_means.py new file mode 100644 index 0000000..ef2fabc --- /dev/null +++ b/scripts/compute_phenotype_means.py @@ -0,0 +1,101 @@ +"""Compute phenotype means.""" +import sys +import logging +from pathlib import Path +from typing import TypeVar +from argparse import Namespace, ArgumentParser + +import MySQLdb + +from gn_libs import mysqldb +from uploader import setup_modules_logging + +from .cli_parser import add_logging_option +from .load_phenotypes_to_db import update_means + +logger = logging.getLogger(__name__) +logging.basicConfig( + encoding="utf-8", + format="%(asctime)s - %(name)s - %(levelname)s — %(message)s", + level=logging.INFO) + + +def fetch_xref_id(conn: mysqldb.Connection, population_id: int) -> tuple[int, ...]: + """Fetch a population's cross-reference IDs.""" + logger.debug("Fetching the xref IDs.") + with conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) as cursor: + query = "SELECT Id FROM PublishXRef WHERE InbredSetId=%(population_id)s" + cursor.execute(query, {"population_id": population_id}) + return tuple(int(row["Id"]) for row in cursor.fetchall()) + + +def run(args) -> int: + """Run the script.""" + logger.debug("Running the script!") + with mysqldb.database_connection(args.db_uri) as mariadb_conn: + xref_ids = args.cross_ref_ids or fetch_xref_id(mariadb_conn, args.population_id) + if len(xref_ids): + update_means(mariadb_conn, + args.population_id, + xref_ids) + logger.debug("Successfully computed means for %02d phenotypes.", + len(xref_ids)) + return 0 + _reasons = ( + f"no population exists with the ID {args.population_id}", + "the population exists but it has no phenotypes linked to it yet") + logger.error( + "No cross-reference IDs to run against. Likely causes are: %s", + " OR ".join(_reasons) + ".") + return 1 + + +T = TypeVar("T") +def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]: + """Convert val into a list of items of type 'itemstype'.""" + return tuple(itemstype(item.strip()) for item in val.split(",")) + + +def comma_separated_list_of_integers(val: str) -> tuple[int, ...]: + """Convert 'val' into list of items of type 'int'.""" + return comma_separated_list(val, int) + + +if __name__ == "__main__": + def parse_args() -> Namespace: + """Define and parse the CLI parsers accepted by this script.""" + parser = ArgumentParser( + "compute-phenotype-means", + description="Compute/Recompute the phenotype means.") + parser.add_argument("db_uri", + metavar="db-uri", + type=str, + help="MariaDB/MySQL connection URL") + parser.add_argument("jobs_db_path", + metavar="jobs-db-path", + type=Path, + help="Path to jobs' SQLite database.") + parser.add_argument("population_id", + metavar="population-id", + type=int, + help=("Identifier for the InbredSet group/" + "population to run means against.")) + ## Optional arguments + parser = add_logging_option(parser) + parser.add_argument( + "--cross-ref-ids", + type=comma_separated_list_of_integers, + help=("Provide cross-reference IDs to narrow the number of " + "phenotypes that the means are computed against."), + default=[]) + + return parser.parse_args() + + def main() -> int: + """compute-phenotype-means: Entry-point function.""" + args = parse_args() + logger.setLevel(getattr(logging, args.log_level.upper())) + setup_modules_logging(logger, ("scripts.load_phenotypes_to_db",)) + return run(args) + + sys.exit(main()) diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 9158307..e449b82 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -414,6 +414,7 @@ def update_means( xref_ids: tuple[int, ...] ): """Compute the means from the data and update them in the database.""" + logger.info("Computing means for %02d phenotypes.", len(xref_ids)) query = ( "UPDATE PublishXRef SET mean = " "(SELECT AVG(value) FROM PublishData" @@ -426,6 +427,7 @@ def update_means( batch = take(_xref_iterator, 10000) if len(batch) == 0: break + logger.info("\tComputing means for batch of %02d phenotypes.", len(batch)) cursor.executemany( query, tuple({ |
