diff options
Diffstat (limited to 'scripts/compute_phenotype_means.py')
| -rw-r--r-- | scripts/compute_phenotype_means.py | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/scripts/compute_phenotype_means.py b/scripts/compute_phenotype_means.py new file mode 100644 index 0000000..ef2fabc --- /dev/null +++ b/scripts/compute_phenotype_means.py @@ -0,0 +1,101 @@ +"""Compute phenotype means.""" +import sys +import logging +from pathlib import Path +from typing import TypeVar +from argparse import Namespace, ArgumentParser + +import MySQLdb + +from gn_libs import mysqldb +from uploader import setup_modules_logging + +from .cli_parser import add_logging_option +from .load_phenotypes_to_db import update_means + +logger = logging.getLogger(__name__) +logging.basicConfig( + encoding="utf-8", + format="%(asctime)s - %(name)s - %(levelname)s — %(message)s", + level=logging.INFO) + + +def fetch_xref_id(conn: mysqldb.Connection, population_id: int) -> tuple[int, ...]: + """Fetch a population's cross-reference IDs.""" + logger.debug("Fetching the xref IDs.") + with conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) as cursor: + query = "SELECT Id FROM PublishXRef WHERE InbredSetId=%(population_id)s" + cursor.execute(query, {"population_id": population_id}) + return tuple(int(row["Id"]) for row in cursor.fetchall()) + + +def run(args) -> int: + """Run the script.""" + logger.debug("Running the script!") + with mysqldb.database_connection(args.db_uri) as mariadb_conn: + xref_ids = args.cross_ref_ids or fetch_xref_id(mariadb_conn, args.population_id) + if len(xref_ids): + update_means(mariadb_conn, + args.population_id, + xref_ids) + logger.debug("Successfully computed means for %02d phenotypes.", + len(xref_ids)) + return 0 + _reasons = ( + f"no population exists with the ID {args.population_id}", + "the population exists but it has no phenotypes linked to it yet") + logger.error( + "No cross-reference IDs to run against. Likely causes are: %s", + " OR ".join(_reasons) + ".") + return 1 + + +T = TypeVar("T") +def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]: + """Convert val into a list of items of type 'itemstype'.""" + return tuple(itemstype(item.strip()) for item in val.split(",")) + + +def comma_separated_list_of_integers(val: str) -> tuple[int, ...]: + """Convert 'val' into list of items of type 'int'.""" + return comma_separated_list(val, int) + + +if __name__ == "__main__": + def parse_args() -> Namespace: + """Define and parse the CLI parsers accepted by this script.""" + parser = ArgumentParser( + "compute-phenotype-means", + description="Compute/Recompute the phenotype means.") + parser.add_argument("db_uri", + metavar="db-uri", + type=str, + help="MariaDB/MySQL connection URL") + parser.add_argument("jobs_db_path", + metavar="jobs-db-path", + type=Path, + help="Path to jobs' SQLite database.") + parser.add_argument("population_id", + metavar="population-id", + type=int, + help=("Identifier for the InbredSet group/" + "population to run means against.")) + ## Optional arguments + parser = add_logging_option(parser) + parser.add_argument( + "--cross-ref-ids", + type=comma_separated_list_of_integers, + help=("Provide cross-reference IDs to narrow the number of " + "phenotypes that the means are computed against."), + default=[]) + + return parser.parse_args() + + def main() -> int: + """compute-phenotype-means: Entry-point function.""" + args = parse_args() + logger.setLevel(getattr(logging, args.log_level.upper())) + setup_modules_logging(logger, ("scripts.load_phenotypes_to_db",)) + return run(args) + + sys.exit(main()) |
