about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-10 07:04:59 +0300
committerFrederick Muriuki Muriithi2024-01-10 08:02:51 +0300
commitdd50c620e77561b4b507ed8ed73b2fd5cdab31ef (patch)
tree501d412f474b4e1593b21ccc95669d3cb899df13 /scripts
parentbd902e747670dab5e31eaf09755ce02b278061e5 (diff)
downloadgn-uploader-dd50c620e77561b4b507ed8ed73b2fd5cdab31ef.tar.gz
Insert genotype data.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rqtl2/install_genotypes.py41
1 files changed, 40 insertions, 1 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index d28b3b7..63e6113 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -67,7 +67,44 @@ def cross_reference_individuals(dbconn: mdb.Connection,
             "ON DUPLICATE KEY UPDATE InbredSetId=InbredSetId",
             tuple(ids))
         return cursor.rowcount
-        return cursor.rowcount
+
+def insert_genotype_data(dbconn: mdb.Connection,
+                         speciesid: int,
+                         genotypes: tuple[dict, ...],
+                         individuals: tuple[str, ...]) -> tuple[int, tuple[int, ...]]:
+    """Insert the genotype data values into the database."""
+    with dbconn.cursor(cursorclass=DictCursor) as cursor:
+        paramstr = ", ".join(["%s"] * len(individuals))
+        cursor.execute(
+            f"SELECT Id, Name FROM Strain WHERE Name IN ({paramstr})",
+            individuals)
+        indids = {row["Name"]: row["Id"] for row in cursor.fetchall()}
+        markers = tuple(key for key in genotypes[0].keys() if key != "id")
+        paramstr = ", ".join(["%s"] * len(markers))
+        cursor.execute(
+            f"SELECT Id, Name FROM Geno WHERE SpeciesId=%s AND Name IN ({paramstr})",
+            (speciesid,)+markers)
+        markerids = {row["Name"]: row["Id"] for row in cursor.fetchall()}
+        cursor.execute("SELECT MAX(Id) AS lastid FROM GenoData")
+        lastid = cursor.fetchone()["lastid"]
+        data = tuple(
+            {**row, "gid": gid}
+            for gid, row in enumerate(
+                    ({
+                        "value": innerrow[marker],
+                        "indid": indids[innerrow["id"]],
+                        "markerid": markerids[marker]
+                    } for innerrow in genotypes for marker in
+                     (key for key in innerrow.keys() if key != "id")),
+                    start=lastid+1))
+        cursor.executemany(
+            "INSERT INTO GenoData(Id, StrainId, value) "
+            "VALUES(%(gid)s, %(indid)s, %(value)s)",
+            data)
+        return cursor.rowcount, tuple({
+            "dataid": row["gid"],
+            "markerid": row["markerid"]
+        } for row in data)
 
 def install_genotypes(dbconn: mdb.Connection,
                       speciesid: int,
@@ -104,6 +141,8 @@ def install_genotypes(dbconn: mdb.Connection,
                 insert_individuals(dbconn, speciesid, individuals)
                 cross_reference_individuals(
                     dbconn, speciesid, populationid, individuals)
+                _num_rows, data_ids = insert_genotype_data(
+                    dbconn, speciesid, batch, individuals)
                 count = count + len(batch)
 
             if "gmap" in cdata: