aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/insert_samples.py147
1 files changed, 147 insertions, 0 deletions
diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py
new file mode 100644
index 0000000..43c6a38
--- /dev/null
+++ b/scripts/insert_samples.py
@@ -0,0 +1,147 @@
+"""Insert samples into the database."""
+import sys
+import logging
+import pathlib
+import argparse
+
+import MySQLdb as mdb
+from redis import Redis
+
+from qc_app.db_utils import database_connection
+from qc_app.check_connections import check_db, check_redis
+from qc_app.samples import (
+ species_by_id,
+ population_by_id,
+ save_samples_data,
+ read_samples_file,
+ cross_reference_samples)
+
+stderr_handler = logging.StreamHandler(stream=sys.stderr)
+root_logger = logging.getLogger()
+root_logger.addHandler(stderr_handler)
+root_logger.setLevel("INFO")
+
+class SeparatorAction(argparse.Action):
+ """Action to handle the separator values."""
+ def __init__(self, option_strings, dest, nargs=None, **kwargs):
+ """Init the action"""
+ if nargs is not None:
+ raise ValueError("nargs not allowed.")
+ super().__init__(option_strings, dest, nargs, **kwargs)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ """Process the value passed in."""
+ setattr(namespace, self.dest, (chr(9) if values == "\\t" else values))
+
+def insert_samples(conn: mdb.Connection,# pylint: disable=[too-many-arguments]
+ rconn: Redis,# pylint: disable=[unused-argument]
+ speciesid: int,
+ populationid: int,
+ samplesfile: pathlib.Path,
+ separator: str,
+ firstlineheading: bool,
+ quotechar: str):
+ """Insert the samples into the database."""
+ species = species_by_id(conn, speciesid)
+ if not bool(species):
+ logging.error("Species with id '%s' does not exist.", str(speciesid))
+ return 1
+ population = population_by_id(conn, populationid)
+ if not bool(population):
+ logging.error("Population with id '%s' does not exist.",
+ str(populationid))
+ return 1
+ logging.info("Inserting samples ...")
+ save_samples_data(
+ conn,
+ speciesid,
+ read_samples_file(samplesfile, separator, firstlineheading))
+ logging.info("Cross-referencing samples with their populations.")
+ cross_reference_samples(
+ conn,
+ speciesid,
+ populationid,
+ (row["Name"] for row in
+ read_samples_file(samplesfile,
+ separator,
+ firstlineheading,
+ quotechar=quotechar)))
+
+ return 0
+
+if __name__ == "__main__":
+
+ def cli_args():
+ """Process the command-line arguments."""
+ #
+ parser = argparse.ArgumentParser(
+ prog="insert_samples",
+ description = (
+ "Script to parse and insert sample data from a file into the "
+ "database."))
+
+ # == Mandatory Arguments ==
+ parser.add_argument(
+ "databaseuri",
+ help="URL to be used to initialise the connection to the database")
+ parser.add_argument("speciesid",
+ type=int,
+ help="The species identifier in the database.")
+ parser.add_argument(
+ "populationid",
+ type=int,
+ help="The grouping/population identifier in the database.")
+ parser.add_argument(
+ "samplesfile",
+ type=pathlib.Path,
+ help="Path to the CSV file containing the samples data.")
+ parser.add_argument(
+ "separator",
+ action=SeparatorAction,
+ help="The 'character' in the CSV file that separates the fields.",
+ default=chr(9))
+
+ # == Optional Arguments ==
+ parser.add_argument(
+ "--firstlineheading",
+ action="store_true",
+ help=("If the first line of the file is a header row, invoke the "
+ "program with this flag."))
+ parser.add_argument(
+ "--quotechar",
+ default='"',
+ help=("The character used to delimit (surround?) the value in "
+ "each column."))
+
+ # == Script-specific extras ==
+ parser.add_argument("--redisuri",
+ help="URL to initialise connection to redis",
+ default="redis:///")
+
+ args = parser.parse_args()
+ return args
+
+ def main():
+ """Run script to insert samples into the database."""
+
+ args = cli_args()
+ check_db(args.databaseuri)
+ check_redis(args.redisuri)
+ if not args.samplesfile.exists():
+ logging.error("File not found: '%s'.", args.samplesfile)
+ return 2
+
+ with (Redis.from_url(args.redisuri, decode_responses=True) as rconn,
+ database_connection(args.databaseuri) as dbconn):
+ print("We got here...")
+ print(args)
+ return insert_samples(dbconn,
+ rconn,
+ args.speciesid,
+ args.populationid,
+ args.samplesfile,
+ args.separator,
+ args.firstlineheading,
+ args.quotechar)
+
+ sys.exit(main())