From f89c08c392182b669d058a4c21feffde64b15ebb Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 11 Jan 2024 12:38:04 +0300 Subject: Update pmap data in the database. --- scripts/rqtl2/install_genotypes.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'scripts') diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py index 1317c96..a1609a0 100644 --- a/scripts/rqtl2/install_genotypes.py +++ b/scripts/rqtl2/install_genotypes.py @@ -28,15 +28,30 @@ logger.addHandler(stderr_handler) def insert_markers(dbconn: mdb.Connection, speciesid: int, - markers: tuple[str, ...]) -> int: + markers: tuple[str, ...], + pmapdata: Union[Iterator[dict], None]) -> int: """Insert genotype and genotype values into the database.""" + mdata = reduce(#type: ignore[var-annotated] + lambda acc, row: ({#type: ignore[arg-type, return-value] + **acc, row["id"]: { + key: val + for key,val in row.items() + if key != "id" + } + }), + (pmapdata or tuple()), + {}) with dbconn.cursor() as cursor: cursor.executemany( - "INSERT INTO Geno(SpeciesId, Name, Marker_Name) " - "VALUES (%(speciesid)s, %(marker)s, %(marker)s) " + "INSERT INTO Geno(SpeciesId, Name, Marker_Name, Chr, Mb) " + "VALUES (%(speciesid)s, %(marker)s, %(marker)s, %(chr)s, %(pos)s) " "ON DUPLICATE KEY UPDATE SpeciesId=SpeciesId", - tuple({"speciesid": speciesid, "marker": marker} - for marker in markers)) + tuple({ + "speciesid": speciesid, + "marker": marker, + "chr": mdata.get(marker, {}).get("chr"), + "pos": mdata.get(marker, {}).get("pos") + } for marker in markers)) return cursor.rowcount def insert_individuals(dbconn: mdb.Connection, @@ -175,7 +190,9 @@ def install_genotypes(dbconn: mdb.Connection, insert_markers( dbconn, speciesid, - tuple(key for key in batch[0].keys() if key != "id")) + tuple(key for key in batch[0].keys() if key != "id"), + (rqtl2.file_data(zfile, "pmap", cdata) if "pmap" in cdata + else None)) individuals = tuple(row["id"] for row in batch) insert_individuals(dbconn, speciesid, individuals) cross_reference_individuals( @@ -190,11 +207,6 @@ def install_genotypes(dbconn: mdb.Connection, (rqtl2.file_data(zfile, "gmap", cdata) if "gmap" in cdata else None)) count = count + len(batch) - - if "pmap" in cdata: - logger.info("Loading physical mapping info.") - # TODO: load pmap files - logger.info("Successfully loaded physical mapping.") except rqtl2.InvalidFormat as exc: logger.error(str(exc)) logger.info("There are no genotypes to load.") -- cgit v1.2.3