aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-15 06:07:05 +0300
committerFrederick Muriuki Muriithi2024-01-15 06:07:05 +0300
commit8fe991c20f27702ee34ffcdd0cbc96e411db8c90 (patch)
tree509c0d1a00bf96bc3308802765e5ae63cfeb123c
parentf89c08c392182b669d058a4c21feffde64b15ebb (diff)
downloadgn-uploader-8fe991c20f27702ee34ffcdd0cbc96e411db8c90.tar.gz
Extract common structure into separate modules.
-rw-r--r--scripts/cli_parser.py26
-rw-r--r--scripts/redis_logger.py13
-rw-r--r--scripts/rqtl2/cli_parser.py13
-rw-r--r--scripts/rqtl2/entry.py37
-rw-r--r--scripts/rqtl2/install_genotypes.py80
5 files changed, 107 insertions, 62 deletions
diff --git a/scripts/cli_parser.py b/scripts/cli_parser.py
new file mode 100644
index 0000000..bceb3f4
--- /dev/null
+++ b/scripts/cli_parser.py
@@ -0,0 +1,26 @@
+"""Common utilities for CLI parsers"""
+from uuid import UUID
+from typing import Optional
+from argparse import ArgumentParser
+
+def init_cli_parser(program: str, description: Optional[str] = None) -> ArgumentParser:
+ """Initialise the CLI arguments parser."""
+ parser = ArgumentParser(prog=program, description=description)
+
+ parser.add_argument("databaseuri", help="URL to MariaDB")
+ parser.add_argument("redisuri", help="URL to Redis")
+ parser.add_argument("jobid",
+ help="Job ID that this belongs to.",
+ type=UUID)
+ parser.add_argument("--redisexpiry",
+ help="How long to keep any redis keys around.",
+ type=int,
+ default=86400)
+
+ parser.add_argument("speciesid",
+ type=int,
+ help="Species to which bundle relates.")
+ parser.add_argument("populationid",
+ type=int,
+ help="Population to group data under")
+ return parser
diff --git a/scripts/redis_logger.py b/scripts/redis_logger.py
index 97ffe26..76b8ba6 100644
--- a/scripts/redis_logger.py
+++ b/scripts/redis_logger.py
@@ -31,3 +31,16 @@ class RedisLogger(logging.Handler):
self.redisconnection.rpush(
self.messages_list_name(), self.format(record))
self.redisconnection.expire(self.messages_list_name(), self.expiry)
+
+def setup_redis_logger(rconn: Redis,
+ jobid: uuid.UUID,
+ job_messagelist: str,
+ expiry: int = 86400) -> RedisLogger:
+ """Setup a default RedisLogger logger."""
+ formatter = logging.Formatter(
+ "%(asctime)s - %(name)s - %(levelname)s: %(message)s")
+ # job_messagelist = f"{str(args.jobid)}:log-messages"
+ rconn.hset(name=str(jobid), key="log-messagelist", value=job_messagelist)
+ redislogger = RedisLogger(rconn, jobid, expiry=expiry)
+ redislogger.setFormatter(formatter)
+ return redislogger
diff --git a/scripts/rqtl2/cli_parser.py b/scripts/rqtl2/cli_parser.py
new file mode 100644
index 0000000..bcc7a4f
--- /dev/null
+++ b/scripts/rqtl2/cli_parser.py
@@ -0,0 +1,13 @@
+"""Utilities for CLI parsers specific to R/qtl2 parsing scripts."""
+from pathlib import Path
+from argparse import ArgumentParser
+
+def add_common_arguments(parser: ArgumentParser) -> ArgumentParser:
+ """Add common arguments to the CLI parser."""
+ parser.add_argument("datasetid",
+ type=int,
+ help="The dataset to which the data belongs.")
+ parser.add_argument("rqtl2bundle",
+ type=Path,
+ help="Path to R/qtl2 bundle zip file.")
+ return parser
diff --git a/scripts/rqtl2/entry.py b/scripts/rqtl2/entry.py
new file mode 100644
index 0000000..e2d70a6
--- /dev/null
+++ b/scripts/rqtl2/entry.py
@@ -0,0 +1,37 @@
+"""Build common script-entry structure."""
+from logging import Logger
+from typing import Callable
+from argparse import Namespace
+
+from redis import Redis
+from MySQLdb import Connection
+
+from qc_app.db_utils import database_connection
+from qc_app.check_connections import check_db, check_redis
+
+from scripts.redis_logger import setup_redis_logger
+
+def build_main(cli_args: Callable[[], Namespace],
+ run_fn: Callable[[Connection, Namespace], int],
+ logger: Logger,
+ loglevel: str = "INFO") -> Callable[[],int]:
+ """Build a function to be used as an entry-point for scripts."""
+ def main():
+ args = cli_args()
+ check_db(args.databaseuri)
+ check_redis(args.redisuri)
+ if not args.rqtl2bundle.exists():
+ logger.error("File not found: '%s'.", args.rqtl2bundle)
+ return 2
+
+ with (Redis.from_url(args.redisuri, decode_responses=True) as rconn,
+ database_connection(args.databaseuri) as dbconn):
+ logger.addHandler(setup_redis_logger(
+ rconn,
+ args.jobid,
+ f"{str(args.jobid)}:log-messages",
+ args.redisexpiry))
+ logger.setLevel(loglevel)
+ return run_fn(dbconn, args)
+
+ return main
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index a1609a0..354bff0 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -1,26 +1,22 @@
"""Load genotypes from R/qtl2 bundle into the database."""
import sys
-import uuid
import logging
import traceback
from pathlib import Path
from zipfile import ZipFile
from functools import reduce
-from typing import Union, Iterator
-from argparse import ArgumentParser
+from typing import Iterator, Optional
import MySQLdb as mdb
-from redis import Redis
from MySQLdb.cursors import DictCursor
from r_qtl import r_qtl2 as rqtl2
from quality_control.parsing import take
-from qc_app.db_utils import database_connection
-from qc_app.check_connections import check_db, check_redis
-
-from scripts.redis_logger import RedisLogger
+from scripts.rqtl2.entry import build_main
+from scripts.cli_parser import init_cli_parser
+from scripts.rqtl2.cli_parser import add_common_arguments
stderr_handler = logging.StreamHandler(stream=sys.stderr)
logger = logging.getLogger("install_genotypes")
@@ -29,7 +25,7 @@ logger.addHandler(stderr_handler)
def insert_markers(dbconn: mdb.Connection,
speciesid: int,
markers: tuple[str, ...],
- pmapdata: Union[Iterator[dict], None]) -> int:
+ pmapdata: Optional[Iterator[dict]]) -> int:
"""Insert genotype and genotype values into the database."""
mdata = reduce(#type: ignore[var-annotated]
lambda acc, row: ({#type: ignore[arg-type, return-value]
@@ -129,7 +125,7 @@ def cross_reference_genotypes(dbconn: mdb.Connection,
speciesid: int,
datasetid: int,
dataids: tuple[dict, ...],
- gmapdata: Union[Iterator[dict], None]) -> int:
+ gmapdata: Optional[Iterator[dict]]) -> int:
"""Cross-reference the data to the relevant dataset."""
_rows, markers, mdata = reduce(#type: ignore[var-annotated]
lambda acc, row: (#type: ignore[return-value,arg-type]
@@ -221,59 +217,19 @@ if __name__ == "__main__":
def cli_args():
"""Process command-line arguments for install_genotypes"""
- parser = ArgumentParser(
- prog="install_genotypes",
- description="Parse genotypes from R/qtl2 bundle into the database.")
-
- parser.add_argument("databaseuri", help="URL to MariaDB")
- parser.add_argument("redisuri", help="URL to Redis")
- parser.add_argument("jobid",
- help="Job ID that this belongs to.",
- type=uuid.UUID)
-
- parser.add_argument("speciesid",
- help="Species to which bundle relates.")
- parser.add_argument("populationid",
- help="Population to group data under")
- parser.add_argument("datasetid",
- help="The dataset to which the data belongs.")
- parser.add_argument("rqtl2bundle",
- help="Path to R/qtl2 bundle zip file.",
- type=Path)
-
- parser.add_argument("--redisexpiry",
- help="How long to keep any redis keys around.",
- type=int,
- default=86400)
+ parser = add_common_arguments(init_cli_parser(
+ "install_genotypes",
+ "Parse genotypes from R/qtl2 bundle into the database."))
return parser.parse_args()
- def main():
- """Run `install_genotypes` scripts."""
- args = cli_args()
- check_db(args.databaseuri)
- check_redis(args.redisuri)
- if not args.rqtl2bundle.exists():
- logging.error("File not found: '%s'.", args.rqtl2bundle)
- return 2
-
- with (Redis.from_url(args.redisuri, decode_responses=True) as rconn,
- database_connection(args.databaseuri) as dbconn):
- formatter = logging.Formatter(
- "%(asctime)s - %(name)s - %(levelname)s: %(message)s")
- job_messagelist = f"{str(args.jobid)}:log-messages"
- rconn.hset(name=str(args.jobid),
- key="log-messagelist",
- value=job_messagelist)
- redislogger = RedisLogger(
- rconn, args.jobid, expiry=args.redisexpiry)
- redislogger.setFormatter(formatter)
- logger.addHandler(redislogger)
- logger.setLevel("INFO")
- return install_genotypes(dbconn,
- args.speciesid,
- args.populationid,
- args.datasetid,
- args.rqtl2bundle)
-
+ main = build_main(
+ cli_args,
+ lambda dbconn, args: install_genotypes(dbconn,
+ args.speciesid,
+ args.populationid,
+ args.datasetid,
+ args.rqtl2bundle),
+ logger,
+ "INFO")
sys.exit(main())