aboutsummaryrefslogtreecommitdiff
path: root/scripts/rqtl2/install_genotypes.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-10 06:43:05 +0300
committerFrederick Muriuki Muriithi2024-01-10 07:19:36 +0300
commit26ca17cf2bd08a7b75e4094e2903966cfedefb0f (patch)
treec30903b15cdbbba7f3fdc4aa09c7dd450747b972 /scripts/rqtl2/install_genotypes.py
parent9322da0f79dfa4c3f9f899f5a861ce302ce21e9c (diff)
downloadgn-uploader-26ca17cf2bd08a7b75e4094e2903966cfedefb0f.tar.gz
Insert any new markers
Insert any new markers found into the database.
Diffstat (limited to 'scripts/rqtl2/install_genotypes.py')
-rw-r--r--scripts/rqtl2/install_genotypes.py32
1 files changed, 14 insertions, 18 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index f5b6eb4..88f776b 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -23,21 +23,18 @@ stderr_handler = logging.StreamHandler(stream=sys.stderr)
logger = logging.getLogger("install_genotypes")
logger.addHandler(stderr_handler)
-def insert_genotypes(dbconn: mdb.Connection,
- speciesid: int,
- populationid: int,
- genotypes: tuple[dict]) -> int:
+def insert_markers(dbconn: mdb.Connection,
+ speciesid: int,
+ markers: tuple[str, ...]) -> int:
"""Insert genotype and genotype values into the database."""
with dbconn.cursor() as cursor:
cursor.executemany(
"INSERT INTO Geno(SpeciesId, Name, Marker_Name) "
"VALUES (%(speciesid)s, %(marker)s, %(marker)s) "
- "ON DUPLICATE KEY UPDATE "
- "SpeciesId=VALUE(SpeciesId)",
- tuple({"speciesid": speciesid, "marker": geno["marker"]}
- for geno in genotypes))
- # TODO: Install individuals/samples/strains: Strain
- # TODO: Cross-ref samples to population: StrainXRef
+ "ON DUPLICATE KEY UPDATE SpeciesId=SpeciesId",
+ tuple({"speciesid": speciesid, "marker": marker}
+ for marker in markers))
+ return cursor.rowcount
# TODO: Install geno data: GenoData
return cursor.rowcount
@@ -47,7 +44,6 @@ def install_genotypes(dbconn: mdb.Connection,
rqtl2bundle: Path) -> int:
"""Load any existing genotypes into the database."""
count = 0
- installed = 0
with ZipFile(str(rqtl2bundle.absolute()), "r") as zfile:
try:
logger.info("Validating bundle")
@@ -55,7 +51,7 @@ def install_genotypes(dbconn: mdb.Connection,
logger.info("Bundle validated successfully.")
logger.info(("Loading genotypes. This could take a while. "
"Please be patient."))
-
+
cdata = rqtl2.control_data(zfile)
genotypes = rqtl2.file_data(zfile,
"geno",
@@ -66,14 +62,14 @@ def install_genotypes(dbconn: mdb.Connection,
if len(batch) == 0:
logger.info("Loading Genotypes complete!")
logger.info(
- f"Total genotypes installed: {installed} of {count}")
+ "Total rows processed: %s", count)
break
- curr_installed = insert_genotypes(
- dbconn, speciesid, populationid, batch)
- installed = installed + curr_installed
+ insert_markers(
+ dbconn,
+ speciesid,
+ tuple(key for key in batch[0].keys() if key != "id"))
count = count + len(batch)
- logger.info(f"Installed {curr_installed} genotypes")
if "gmap" in cdata:
logger.info("Loading genetic mapping info.")
@@ -87,7 +83,7 @@ def install_genotypes(dbconn: mdb.Connection,
except rqtl2.InvalidFormat as exc:
logger.error(str(exc))
logger.info("There are no genotypes to load.")
- except Exception as exc:
+ except Exception as _exc:
logger.error("Failing with exception: %s", traceback.format_exc())
return 3