From 096ab99a2d961e864f340c39252b4c8eecc72191 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 14 Dec 2023 19:22:58 +0300 Subject: samples: Create external script and fix some bugs. --- scripts/insert_samples.py | 147 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 scripts/insert_samples.py (limited to 'scripts') diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py new file mode 100644 index 0000000..43c6a38 --- /dev/null +++ b/scripts/insert_samples.py @@ -0,0 +1,147 @@ +"""Insert samples into the database.""" +import sys +import logging +import pathlib +import argparse + +import MySQLdb as mdb +from redis import Redis + +from qc_app.db_utils import database_connection +from qc_app.check_connections import check_db, check_redis +from qc_app.samples import ( + species_by_id, + population_by_id, + save_samples_data, + read_samples_file, + cross_reference_samples) + +stderr_handler = logging.StreamHandler(stream=sys.stderr) +root_logger = logging.getLogger() +root_logger.addHandler(stderr_handler) +root_logger.setLevel("INFO") + +class SeparatorAction(argparse.Action): + """Action to handle the separator values.""" + def __init__(self, option_strings, dest, nargs=None, **kwargs): + """Init the action""" + if nargs is not None: + raise ValueError("nargs not allowed.") + super().__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + """Process the value passed in.""" + setattr(namespace, self.dest, (chr(9) if values == "\\t" else values)) + +def insert_samples(conn: mdb.Connection,# pylint: disable=[too-many-arguments] + rconn: Redis,# pylint: disable=[unused-argument] + speciesid: int, + populationid: int, + samplesfile: pathlib.Path, + separator: str, + firstlineheading: bool, + quotechar: str): + """Insert the samples into the database.""" + species = species_by_id(conn, speciesid) + if not bool(species): + logging.error("Species with id '%s' does not exist.", str(speciesid)) + return 1 + population = population_by_id(conn, populationid) + if not bool(population): + logging.error("Population with id '%s' does not exist.", + str(populationid)) + return 1 + logging.info("Inserting samples ...") + save_samples_data( + conn, + speciesid, + read_samples_file(samplesfile, separator, firstlineheading)) + logging.info("Cross-referencing samples with their populations.") + cross_reference_samples( + conn, + speciesid, + populationid, + (row["Name"] for row in + read_samples_file(samplesfile, + separator, + firstlineheading, + quotechar=quotechar))) + + return 0 + +if __name__ == "__main__": + + def cli_args(): + """Process the command-line arguments.""" + # + parser = argparse.ArgumentParser( + prog="insert_samples", + description = ( + "Script to parse and insert sample data from a file into the " + "database.")) + + # == Mandatory Arguments == + parser.add_argument( + "databaseuri", + help="URL to be used to initialise the connection to the database") + parser.add_argument("speciesid", + type=int, + help="The species identifier in the database.") + parser.add_argument( + "populationid", + type=int, + help="The grouping/population identifier in the database.") + parser.add_argument( + "samplesfile", + type=pathlib.Path, + help="Path to the CSV file containing the samples data.") + parser.add_argument( + "separator", + action=SeparatorAction, + help="The 'character' in the CSV file that separates the fields.", + default=chr(9)) + + # == Optional Arguments == + parser.add_argument( + "--firstlineheading", + action="store_true", + help=("If the first line of the file is a header row, invoke the " + "program with this flag.")) + parser.add_argument( + "--quotechar", + default='"', + help=("The character used to delimit (surround?) the value in " + "each column.")) + + # == Script-specific extras == + parser.add_argument("--redisuri", + help="URL to initialise connection to redis", + default="redis:///") + + args = parser.parse_args() + return args + + def main(): + """Run script to insert samples into the database.""" + + args = cli_args() + check_db(args.databaseuri) + check_redis(args.redisuri) + if not args.samplesfile.exists(): + logging.error("File not found: '%s'.", args.samplesfile) + return 2 + + with (Redis.from_url(args.redisuri, decode_responses=True) as rconn, + database_connection(args.databaseuri) as dbconn): + print("We got here...") + print(args) + return insert_samples(dbconn, + rconn, + args.speciesid, + args.populationid, + args.samplesfile, + args.separator, + args.firstlineheading, + args.quotechar) + + sys.exit(main()) -- cgit v1.2.3