aboutsummaryrefslogtreecommitdiff
path: root/scripts/insert_samples.py
blob: 8431462a419c7177c5dbd86d1e06f4c0040cbde8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""Insert samples into the database."""
import sys
import logging
import pathlib
import argparse

import MySQLdb as mdb
from redis import Redis

from qc_app.db_utils import database_connection
from qc_app.check_connections import check_db, check_redis
from qc_app.db import species_by_id, population_by_id
from qc_app.samples import (
    save_samples_data,
    read_samples_file,
    cross_reference_samples)

stderr_handler = logging.StreamHandler(stream=sys.stderr)
root_logger = logging.getLogger()
root_logger.addHandler(stderr_handler)
root_logger.setLevel("INFO")

class SeparatorAction(argparse.Action):
    """Action to handle the separator values."""
    def __init__(self, option_strings, dest, nargs=None, **kwargs):
        """Init the action"""
        if nargs is not None:
            raise ValueError("nargs not allowed.")
        super().__init__(option_strings, dest, nargs, **kwargs)

    def __call__(self, parser, namespace, values, option_string=None):
        """Process the value passed in."""
        setattr(namespace, self.dest, (chr(9) if values == "\\t" else values))

def insert_samples(conn: mdb.Connection,# pylint: disable=[too-many-arguments]
                   rconn: Redis,# pylint: disable=[unused-argument]
                   speciesid: int,
                   populationid: int,
                   samplesfile: pathlib.Path,
                   separator: str,
                   firstlineheading: bool,
                   quotechar: str):
    """Insert the samples into the database."""
    print("Checking for errors:")
    species = species_by_id(conn, speciesid)
    if not bool(species):
        logging.error("Species with id '%s' does not exist.", str(speciesid))
        return 1
    print(f"\tSpecies with ID '{speciesid}' found")
    population = population_by_id(conn, populationid)
    if not bool(population):
        logging.error("Population with id '%s' does not exist.",
                      str(populationid))
        return 1
    print(f"\tPopulations with ID '{populationid}' found")
    print("No errors found. Continuing...")
    print("\nInserting samples ...")
    save_samples_data(
        conn,
        speciesid,
        read_samples_file(samplesfile, separator, firstlineheading))
    print("Cross-referencing samples with their populations.")
    cross_reference_samples(
        conn,
        speciesid,
        populationid,
        (row["Name"] for row in
         read_samples_file(samplesfile,
                           separator,
                           firstlineheading,
                           quotechar=quotechar)))
    print("Samples upload successfully completed.")
    return 0

if __name__ == "__main__":

    def cli_args():
        """Process the command-line arguments."""
        #
        parser = argparse.ArgumentParser(
            prog="insert_samples",
            description = (
                "Script to parse and insert sample data from a file into the "
                "database."))

        # == Mandatory Arguments ==
        parser.add_argument(
            "databaseuri",
            help="URL to be used to initialise the connection to the database")
        parser.add_argument("speciesid",
                            type=int,
                            help="The species identifier in the database.")
        parser.add_argument(
            "populationid",
            type=int,
            help="The grouping/population identifier in the database.")
        parser.add_argument(
            "samplesfile",
            type=pathlib.Path,
            help="Path to the CSV file containing the samples data.")
        parser.add_argument(
            "separator",
            action=SeparatorAction,
            help="The 'character' in the CSV file that separates the fields.",
            default=chr(9))

        # == Optional Arguments ==
        parser.add_argument(
            "--firstlineheading",
            action="store_true",
            help=("If the first line of the file is a header row, invoke the "
                  "program with this flag."))
        parser.add_argument(
            "--quotechar",
            default='"',
            help=("The character used to delimit (surround?) the value in "
                  "each column."))

        # == Script-specific extras ==
        parser.add_argument("--redisuri",
                            help="URL to initialise connection to redis",
                            default="redis:///")

        args = parser.parse_args()
        return args

    def main():
        """Run script to insert samples into the database."""

        args = cli_args()
        check_db(args.databaseuri)
        check_redis(args.redisuri)
        if not args.samplesfile.exists():
            logging.error("File not found: '%s'.", args.samplesfile)
            return 2

        with (Redis.from_url(args.redisuri, decode_responses=True) as rconn,
              database_connection(args.databaseuri) as dbconn):
            return insert_samples(dbconn,
                                  rconn,
                                  args.speciesid,
                                  args.populationid,
                                  args.samplesfile,
                                  args.separator,
                                  args.firstlineheading,
                                  args.quotechar)

    sys.exit(main())