aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-10 07:04:59 +0300
committerFrederick Muriuki Muriithi2024-01-10 08:02:51 +0300
commitdd50c620e77561b4b507ed8ed73b2fd5cdab31ef (patch)
tree501d412f474b4e1593b21ccc95669d3cb899df13 /scripts
parentbd902e747670dab5e31eaf09755ce02b278061e5 (diff)
downloadgn-uploader-dd50c620e77561b4b507ed8ed73b2fd5cdab31ef.tar.gz
Insert genotype data.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rqtl2/install_genotypes.py41
1 files changed, 40 insertions, 1 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index d28b3b7..63e6113 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -67,7 +67,44 @@ def cross_reference_individuals(dbconn: mdb.Connection,
"ON DUPLICATE KEY UPDATE InbredSetId=InbredSetId",
tuple(ids))
return cursor.rowcount
- return cursor.rowcount
+
+def insert_genotype_data(dbconn: mdb.Connection,
+ speciesid: int,
+ genotypes: tuple[dict, ...],
+ individuals: tuple[str, ...]) -> tuple[int, tuple[int, ...]]:
+ """Insert the genotype data values into the database."""
+ with dbconn.cursor(cursorclass=DictCursor) as cursor:
+ paramstr = ", ".join(["%s"] * len(individuals))
+ cursor.execute(
+ f"SELECT Id, Name FROM Strain WHERE Name IN ({paramstr})",
+ individuals)
+ indids = {row["Name"]: row["Id"] for row in cursor.fetchall()}
+ markers = tuple(key for key in genotypes[0].keys() if key != "id")
+ paramstr = ", ".join(["%s"] * len(markers))
+ cursor.execute(
+ f"SELECT Id, Name FROM Geno WHERE SpeciesId=%s AND Name IN ({paramstr})",
+ (speciesid,)+markers)
+ markerids = {row["Name"]: row["Id"] for row in cursor.fetchall()}
+ cursor.execute("SELECT MAX(Id) AS lastid FROM GenoData")
+ lastid = cursor.fetchone()["lastid"]
+ data = tuple(
+ {**row, "gid": gid}
+ for gid, row in enumerate(
+ ({
+ "value": innerrow[marker],
+ "indid": indids[innerrow["id"]],
+ "markerid": markerids[marker]
+ } for innerrow in genotypes for marker in
+ (key for key in innerrow.keys() if key != "id")),
+ start=lastid+1))
+ cursor.executemany(
+ "INSERT INTO GenoData(Id, StrainId, value) "
+ "VALUES(%(gid)s, %(indid)s, %(value)s)",
+ data)
+ return cursor.rowcount, tuple({
+ "dataid": row["gid"],
+ "markerid": row["markerid"]
+ } for row in data)
def install_genotypes(dbconn: mdb.Connection,
speciesid: int,
@@ -104,6 +141,8 @@ def install_genotypes(dbconn: mdb.Connection,
insert_individuals(dbconn, speciesid, individuals)
cross_reference_individuals(
dbconn, speciesid, populationid, individuals)
+ _num_rows, data_ids = insert_genotype_data(
+ dbconn, speciesid, batch, individuals)
count = count + len(batch)
if "gmap" in cdata: