From 26ca17cf2bd08a7b75e4094e2903966cfedefb0f Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 10 Jan 2024 06:43:05 +0300 Subject: Insert any new markers Insert any new markers found into the database. --- scripts/rqtl2/install_genotypes.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py index f5b6eb4..88f776b 100644 --- a/scripts/rqtl2/install_genotypes.py +++ b/scripts/rqtl2/install_genotypes.py @@ -23,21 +23,18 @@ stderr_handler = logging.StreamHandler(stream=sys.stderr) logger = logging.getLogger("install_genotypes") logger.addHandler(stderr_handler) -def insert_genotypes(dbconn: mdb.Connection, - speciesid: int, - populationid: int, - genotypes: tuple[dict]) -> int: +def insert_markers(dbconn: mdb.Connection, + speciesid: int, + markers: tuple[str, ...]) -> int: """Insert genotype and genotype values into the database.""" with dbconn.cursor() as cursor: cursor.executemany( "INSERT INTO Geno(SpeciesId, Name, Marker_Name) " "VALUES (%(speciesid)s, %(marker)s, %(marker)s) " - "ON DUPLICATE KEY UPDATE " - "SpeciesId=VALUE(SpeciesId)", - tuple({"speciesid": speciesid, "marker": geno["marker"]} - for geno in genotypes)) - # TODO: Install individuals/samples/strains: Strain - # TODO: Cross-ref samples to population: StrainXRef + "ON DUPLICATE KEY UPDATE SpeciesId=SpeciesId", + tuple({"speciesid": speciesid, "marker": marker} + for marker in markers)) + return cursor.rowcount # TODO: Install geno data: GenoData return cursor.rowcount @@ -47,7 +44,6 @@ def install_genotypes(dbconn: mdb.Connection, rqtl2bundle: Path) -> int: """Load any existing genotypes into the database.""" count = 0 - installed = 0 with ZipFile(str(rqtl2bundle.absolute()), "r") as zfile: try: logger.info("Validating bundle") @@ -55,7 +51,7 @@ def install_genotypes(dbconn: mdb.Connection, logger.info("Bundle validated successfully.") logger.info(("Loading genotypes. This could take a while. " "Please be patient.")) - + cdata = rqtl2.control_data(zfile) genotypes = rqtl2.file_data(zfile, "geno", @@ -66,14 +62,14 @@ def install_genotypes(dbconn: mdb.Connection, if len(batch) == 0: logger.info("Loading Genotypes complete!") logger.info( - f"Total genotypes installed: {installed} of {count}") + "Total rows processed: %s", count) break - curr_installed = insert_genotypes( - dbconn, speciesid, populationid, batch) - installed = installed + curr_installed + insert_markers( + dbconn, + speciesid, + tuple(key for key in batch[0].keys() if key != "id")) count = count + len(batch) - logger.info(f"Installed {curr_installed} genotypes") if "gmap" in cdata: logger.info("Loading genetic mapping info.") @@ -87,7 +83,7 @@ def install_genotypes(dbconn: mdb.Connection, except rqtl2.InvalidFormat as exc: logger.error(str(exc)) logger.info("There are no genotypes to load.") - except Exception as exc: + except Exception as _exc: logger.error("Failing with exception: %s", traceback.format_exc()) return 3 -- cgit v1.2.3