From 8fe991c20f27702ee34ffcdd0cbc96e411db8c90 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 15 Jan 2024 06:07:05 +0300 Subject: Extract common structure into separate modules. --- scripts/cli_parser.py | 26 +++++++++++++ scripts/redis_logger.py | 13 +++++++ scripts/rqtl2/cli_parser.py | 13 +++++++ scripts/rqtl2/entry.py | 37 ++++++++++++++++++ scripts/rqtl2/install_genotypes.py | 80 +++++++++----------------------------- 5 files changed, 107 insertions(+), 62 deletions(-) create mode 100644 scripts/cli_parser.py create mode 100644 scripts/rqtl2/cli_parser.py create mode 100644 scripts/rqtl2/entry.py diff --git a/scripts/cli_parser.py b/scripts/cli_parser.py new file mode 100644 index 0000000..bceb3f4 --- /dev/null +++ b/scripts/cli_parser.py @@ -0,0 +1,26 @@ +"""Common utilities for CLI parsers""" +from uuid import UUID +from typing import Optional +from argparse import ArgumentParser + +def init_cli_parser(program: str, description: Optional[str] = None) -> ArgumentParser: + """Initialise the CLI arguments parser.""" + parser = ArgumentParser(prog=program, description=description) + + parser.add_argument("databaseuri", help="URL to MariaDB") + parser.add_argument("redisuri", help="URL to Redis") + parser.add_argument("jobid", + help="Job ID that this belongs to.", + type=UUID) + parser.add_argument("--redisexpiry", + help="How long to keep any redis keys around.", + type=int, + default=86400) + + parser.add_argument("speciesid", + type=int, + help="Species to which bundle relates.") + parser.add_argument("populationid", + type=int, + help="Population to group data under") + return parser diff --git a/scripts/redis_logger.py b/scripts/redis_logger.py index 97ffe26..76b8ba6 100644 --- a/scripts/redis_logger.py +++ b/scripts/redis_logger.py @@ -31,3 +31,16 @@ class RedisLogger(logging.Handler): self.redisconnection.rpush( self.messages_list_name(), self.format(record)) self.redisconnection.expire(self.messages_list_name(), self.expiry) + +def setup_redis_logger(rconn: Redis, + jobid: uuid.UUID, + job_messagelist: str, + expiry: int = 86400) -> RedisLogger: + """Setup a default RedisLogger logger.""" + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s: %(message)s") + # job_messagelist = f"{str(args.jobid)}:log-messages" + rconn.hset(name=str(jobid), key="log-messagelist", value=job_messagelist) + redislogger = RedisLogger(rconn, jobid, expiry=expiry) + redislogger.setFormatter(formatter) + return redislogger diff --git a/scripts/rqtl2/cli_parser.py b/scripts/rqtl2/cli_parser.py new file mode 100644 index 0000000..bcc7a4f --- /dev/null +++ b/scripts/rqtl2/cli_parser.py @@ -0,0 +1,13 @@ +"""Utilities for CLI parsers specific to R/qtl2 parsing scripts.""" +from pathlib import Path +from argparse import ArgumentParser + +def add_common_arguments(parser: ArgumentParser) -> ArgumentParser: + """Add common arguments to the CLI parser.""" + parser.add_argument("datasetid", + type=int, + help="The dataset to which the data belongs.") + parser.add_argument("rqtl2bundle", + type=Path, + help="Path to R/qtl2 bundle zip file.") + return parser diff --git a/scripts/rqtl2/entry.py b/scripts/rqtl2/entry.py new file mode 100644 index 0000000..e2d70a6 --- /dev/null +++ b/scripts/rqtl2/entry.py @@ -0,0 +1,37 @@ +"""Build common script-entry structure.""" +from logging import Logger +from typing import Callable +from argparse import Namespace + +from redis import Redis +from MySQLdb import Connection + +from qc_app.db_utils import database_connection +from qc_app.check_connections import check_db, check_redis + +from scripts.redis_logger import setup_redis_logger + +def build_main(cli_args: Callable[[], Namespace], + run_fn: Callable[[Connection, Namespace], int], + logger: Logger, + loglevel: str = "INFO") -> Callable[[],int]: + """Build a function to be used as an entry-point for scripts.""" + def main(): + args = cli_args() + check_db(args.databaseuri) + check_redis(args.redisuri) + if not args.rqtl2bundle.exists(): + logger.error("File not found: '%s'.", args.rqtl2bundle) + return 2 + + with (Redis.from_url(args.redisuri, decode_responses=True) as rconn, + database_connection(args.databaseuri) as dbconn): + logger.addHandler(setup_redis_logger( + rconn, + args.jobid, + f"{str(args.jobid)}:log-messages", + args.redisexpiry)) + logger.setLevel(loglevel) + return run_fn(dbconn, args) + + return main diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py index a1609a0..354bff0 100644 --- a/scripts/rqtl2/install_genotypes.py +++ b/scripts/rqtl2/install_genotypes.py @@ -1,26 +1,22 @@ """Load genotypes from R/qtl2 bundle into the database.""" import sys -import uuid import logging import traceback from pathlib import Path from zipfile import ZipFile from functools import reduce -from typing import Union, Iterator -from argparse import ArgumentParser +from typing import Iterator, Optional import MySQLdb as mdb -from redis import Redis from MySQLdb.cursors import DictCursor from r_qtl import r_qtl2 as rqtl2 from quality_control.parsing import take -from qc_app.db_utils import database_connection -from qc_app.check_connections import check_db, check_redis - -from scripts.redis_logger import RedisLogger +from scripts.rqtl2.entry import build_main +from scripts.cli_parser import init_cli_parser +from scripts.rqtl2.cli_parser import add_common_arguments stderr_handler = logging.StreamHandler(stream=sys.stderr) logger = logging.getLogger("install_genotypes") @@ -29,7 +25,7 @@ logger.addHandler(stderr_handler) def insert_markers(dbconn: mdb.Connection, speciesid: int, markers: tuple[str, ...], - pmapdata: Union[Iterator[dict], None]) -> int: + pmapdata: Optional[Iterator[dict]]) -> int: """Insert genotype and genotype values into the database.""" mdata = reduce(#type: ignore[var-annotated] lambda acc, row: ({#type: ignore[arg-type, return-value] @@ -129,7 +125,7 @@ def cross_reference_genotypes(dbconn: mdb.Connection, speciesid: int, datasetid: int, dataids: tuple[dict, ...], - gmapdata: Union[Iterator[dict], None]) -> int: + gmapdata: Optional[Iterator[dict]]) -> int: """Cross-reference the data to the relevant dataset.""" _rows, markers, mdata = reduce(#type: ignore[var-annotated] lambda acc, row: (#type: ignore[return-value,arg-type] @@ -221,59 +217,19 @@ if __name__ == "__main__": def cli_args(): """Process command-line arguments for install_genotypes""" - parser = ArgumentParser( - prog="install_genotypes", - description="Parse genotypes from R/qtl2 bundle into the database.") - - parser.add_argument("databaseuri", help="URL to MariaDB") - parser.add_argument("redisuri", help="URL to Redis") - parser.add_argument("jobid", - help="Job ID that this belongs to.", - type=uuid.UUID) - - parser.add_argument("speciesid", - help="Species to which bundle relates.") - parser.add_argument("populationid", - help="Population to group data under") - parser.add_argument("datasetid", - help="The dataset to which the data belongs.") - parser.add_argument("rqtl2bundle", - help="Path to R/qtl2 bundle zip file.", - type=Path) - - parser.add_argument("--redisexpiry", - help="How long to keep any redis keys around.", - type=int, - default=86400) + parser = add_common_arguments(init_cli_parser( + "install_genotypes", + "Parse genotypes from R/qtl2 bundle into the database.")) return parser.parse_args() - def main(): - """Run `install_genotypes` scripts.""" - args = cli_args() - check_db(args.databaseuri) - check_redis(args.redisuri) - if not args.rqtl2bundle.exists(): - logging.error("File not found: '%s'.", args.rqtl2bundle) - return 2 - - with (Redis.from_url(args.redisuri, decode_responses=True) as rconn, - database_connection(args.databaseuri) as dbconn): - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s: %(message)s") - job_messagelist = f"{str(args.jobid)}:log-messages" - rconn.hset(name=str(args.jobid), - key="log-messagelist", - value=job_messagelist) - redislogger = RedisLogger( - rconn, args.jobid, expiry=args.redisexpiry) - redislogger.setFormatter(formatter) - logger.addHandler(redislogger) - logger.setLevel("INFO") - return install_genotypes(dbconn, - args.speciesid, - args.populationid, - args.datasetid, - args.rqtl2bundle) - + main = build_main( + cli_args, + lambda dbconn, args: install_genotypes(dbconn, + args.speciesid, + args.populationid, + args.datasetid, + args.rqtl2bundle), + logger, + "INFO") sys.exit(main()) -- cgit v1.2.3