From dd50c620e77561b4b507ed8ed73b2fd5cdab31ef Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 10 Jan 2024 07:04:59 +0300 Subject: Insert genotype data. --- scripts/rqtl2/install_genotypes.py | 41 +++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py index d28b3b7..63e6113 100644 --- a/scripts/rqtl2/install_genotypes.py +++ b/scripts/rqtl2/install_genotypes.py @@ -67,7 +67,44 @@ def cross_reference_individuals(dbconn: mdb.Connection, "ON DUPLICATE KEY UPDATE InbredSetId=InbredSetId", tuple(ids)) return cursor.rowcount - return cursor.rowcount + +def insert_genotype_data(dbconn: mdb.Connection, + speciesid: int, + genotypes: tuple[dict, ...], + individuals: tuple[str, ...]) -> tuple[int, tuple[int, ...]]: + """Insert the genotype data values into the database.""" + with dbconn.cursor(cursorclass=DictCursor) as cursor: + paramstr = ", ".join(["%s"] * len(individuals)) + cursor.execute( + f"SELECT Id, Name FROM Strain WHERE Name IN ({paramstr})", + individuals) + indids = {row["Name"]: row["Id"] for row in cursor.fetchall()} + markers = tuple(key for key in genotypes[0].keys() if key != "id") + paramstr = ", ".join(["%s"] * len(markers)) + cursor.execute( + f"SELECT Id, Name FROM Geno WHERE SpeciesId=%s AND Name IN ({paramstr})", + (speciesid,)+markers) + markerids = {row["Name"]: row["Id"] for row in cursor.fetchall()} + cursor.execute("SELECT MAX(Id) AS lastid FROM GenoData") + lastid = cursor.fetchone()["lastid"] + data = tuple( + {**row, "gid": gid} + for gid, row in enumerate( + ({ + "value": innerrow[marker], + "indid": indids[innerrow["id"]], + "markerid": markerids[marker] + } for innerrow in genotypes for marker in + (key for key in innerrow.keys() if key != "id")), + start=lastid+1)) + cursor.executemany( + "INSERT INTO GenoData(Id, StrainId, value) " + "VALUES(%(gid)s, %(indid)s, %(value)s)", + data) + return cursor.rowcount, tuple({ + "dataid": row["gid"], + "markerid": row["markerid"] + } for row in data) def install_genotypes(dbconn: mdb.Connection, speciesid: int, @@ -104,6 +141,8 @@ def install_genotypes(dbconn: mdb.Connection, insert_individuals(dbconn, speciesid, individuals) cross_reference_individuals( dbconn, speciesid, populationid, individuals) + _num_rows, data_ids = insert_genotype_data( + dbconn, speciesid, batch, individuals) count = count + len(batch) if "gmap" in cdata: -- cgit v1.2.3