"""Compute phenotype means.""" import sys import logging from pathlib import Path from typing import TypeVar from argparse import Namespace, ArgumentParser import MySQLdb from gn_libs import mysqldb from uploader import setup_modules_logging from .cli_parser import add_logging_option from .load_phenotypes_to_db import update_means logger = logging.getLogger(__name__) logging.basicConfig( encoding="utf-8", format="%(asctime)s - %(name)s - %(levelname)s — %(message)s", level=logging.INFO) def fetch_xref_id(conn: mysqldb.Connection, population_id: int) -> tuple[int, ...]: """Fetch a population's cross-reference IDs.""" logger.debug("Fetching the xref IDs.") with conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) as cursor: query = "SELECT Id FROM PublishXRef WHERE InbredSetId=%(population_id)s" cursor.execute(query, {"population_id": population_id}) return tuple(int(row["Id"]) for row in cursor.fetchall()) def run(args) -> int: """Run the script.""" logger.debug("Running the script!") with mysqldb.database_connection(args.db_uri) as mariadb_conn: xref_ids = args.cross_ref_ids or fetch_xref_id(mariadb_conn, args.population_id) if len(xref_ids): update_means(mariadb_conn, args.population_id, xref_ids) logger.debug("Successfully computed means for %02d phenotypes.", len(xref_ids)) return 0 _reasons = ( f"no population exists with the ID {args.population_id}", "the population exists but it has no phenotypes linked to it yet") logger.error( "No cross-reference IDs to run against. Likely causes are: %s", " OR ".join(_reasons) + ".") return 1 T = TypeVar("T") def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]: """Convert val into a list of items of type 'itemstype'.""" return tuple(itemstype(item.strip()) for item in val.split(",")) def comma_separated_list_of_integers(val: str) -> tuple[int, ...]: """Convert 'val' into list of items of type 'int'.""" return comma_separated_list(val, int) if __name__ == "__main__": def parse_args() -> Namespace: """Define and parse the CLI parsers accepted by this script.""" parser = ArgumentParser( "compute-phenotype-means", description="Compute/Recompute the phenotype means.") parser.add_argument("db_uri", metavar="db-uri", type=str, help="MariaDB/MySQL connection URL") parser.add_argument("jobs_db_path", metavar="jobs-db-path", type=Path, help="Path to jobs' SQLite database.") parser.add_argument("population_id", metavar="population-id", type=int, help=("Identifier for the InbredSet group/" "population to run means against.")) ## Optional arguments parser = add_logging_option(parser) parser.add_argument( "--cross-ref-ids", type=comma_separated_list_of_integers, help=("Provide cross-reference IDs to narrow the number of " "phenotypes that the means are computed against."), default=[]) return parser.parse_args() def main() -> int: """compute-phenotype-means: Entry-point function.""" args = parse_args() logger.setLevel(getattr(logging, args.log_level.upper())) setup_modules_logging(logger, ("scripts.load_phenotypes_to_db",)) return run(args) sys.exit(main())