aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-11 12:38:04 +0300
committerFrederick Muriuki Muriithi2024-01-11 12:38:04 +0300
commitf89c08c392182b669d058a4c21feffde64b15ebb (patch)
treedbc0086335c7c871677ff2f3eff38d8e5b345921
parent2a29e3f0ed57414490f05790e664f94d89b5fdf9 (diff)
downloadgn-uploader-f89c08c392182b669d058a4c21feffde64b15ebb.tar.gz
Update pmap data in the database.
-rw-r--r--scripts/rqtl2/install_genotypes.py34
1 files changed, 23 insertions, 11 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index 1317c96..a1609a0 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -28,15 +28,30 @@ logger.addHandler(stderr_handler)
def insert_markers(dbconn: mdb.Connection,
speciesid: int,
- markers: tuple[str, ...]) -> int:
+ markers: tuple[str, ...],
+ pmapdata: Union[Iterator[dict], None]) -> int:
"""Insert genotype and genotype values into the database."""
+ mdata = reduce(#type: ignore[var-annotated]
+ lambda acc, row: ({#type: ignore[arg-type, return-value]
+ **acc, row["id"]: {
+ key: val
+ for key,val in row.items()
+ if key != "id"
+ }
+ }),
+ (pmapdata or tuple()),
+ {})
with dbconn.cursor() as cursor:
cursor.executemany(
- "INSERT INTO Geno(SpeciesId, Name, Marker_Name) "
- "VALUES (%(speciesid)s, %(marker)s, %(marker)s) "
+ "INSERT INTO Geno(SpeciesId, Name, Marker_Name, Chr, Mb) "
+ "VALUES (%(speciesid)s, %(marker)s, %(marker)s, %(chr)s, %(pos)s) "
"ON DUPLICATE KEY UPDATE SpeciesId=SpeciesId",
- tuple({"speciesid": speciesid, "marker": marker}
- for marker in markers))
+ tuple({
+ "speciesid": speciesid,
+ "marker": marker,
+ "chr": mdata.get(marker, {}).get("chr"),
+ "pos": mdata.get(marker, {}).get("pos")
+ } for marker in markers))
return cursor.rowcount
def insert_individuals(dbconn: mdb.Connection,
@@ -175,7 +190,9 @@ def install_genotypes(dbconn: mdb.Connection,
insert_markers(
dbconn,
speciesid,
- tuple(key for key in batch[0].keys() if key != "id"))
+ tuple(key for key in batch[0].keys() if key != "id"),
+ (rqtl2.file_data(zfile, "pmap", cdata) if "pmap" in cdata
+ else None))
individuals = tuple(row["id"] for row in batch)
insert_individuals(dbconn, speciesid, individuals)
cross_reference_individuals(
@@ -190,11 +207,6 @@ def install_genotypes(dbconn: mdb.Connection,
(rqtl2.file_data(zfile, "gmap", cdata)
if "gmap" in cdata else None))
count = count + len(batch)
-
- if "pmap" in cdata:
- logger.info("Loading physical mapping info.")
- # TODO: load pmap files
- logger.info("Successfully loaded physical mapping.")
except rqtl2.InvalidFormat as exc:
logger.error(str(exc))
logger.info("There are no genotypes to load.")