about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-10 06:54:13 +0300
committerFrederick Muriuki Muriithi2024-01-10 07:19:38 +0300
commitbd902e747670dab5e31eaf09755ce02b278061e5 (patch)
tree7251404431ee6635fb630bd0c3220057b9018760 /scripts
parent4f2934d16a6dba52a7676fab48acc41e59b5bf29 (diff)
downloadgn-uploader-bd902e747670dab5e31eaf09755ce02b278061e5.tar.gz
Cross-reference individuals to populations.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rqtl2/install_genotypes.py22
1 files changed, 21 insertions, 1 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index 3016f1f..d28b3b7 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -9,6 +9,7 @@ from argparse import ArgumentParser
 
 import MySQLdb as mdb
 from redis import Redis
+from MySQLdb.cursors import DictCursor
 
 from r_qtl import r_qtl2 as rqtl2
 
@@ -48,7 +49,24 @@ def insert_individuals(dbconn: mdb.Connection,
             tuple({"speciesid": speciesid, "id": individual}
                   for individual in individuals))
         return cursor.rowcount
-        # TODO: Install geno data: GenoData
+
+def cross_reference_individuals(dbconn: mdb.Connection,
+                                speciesid: int,
+                                populationid: int,
+                                individuals: tuple[str, ...]) -> int:
+    """Cross reference any inserted individuals."""
+    with dbconn.cursor(cursorclass=DictCursor) as cursor:
+        paramstr = ", ".join(["%s"] * len(individuals))
+        cursor.execute(f"SELECT Id FROM Strain WHERE Name IN ({paramstr})",
+                       individuals)
+        ids = ({"popid": populationid, "indid": row["Id"]}
+               for row in cursor.fetchall())
+        cursor.executemany(
+            "INSERT INTO StrainXRef(InbredSetId, StrainId) "
+            "VALUES(%(popid)s, %(indid)s) "
+            "ON DUPLICATE KEY UPDATE InbredSetId=InbredSetId",
+            tuple(ids))
+        return cursor.rowcount
         return cursor.rowcount
 
 def install_genotypes(dbconn: mdb.Connection,
@@ -84,6 +102,8 @@ def install_genotypes(dbconn: mdb.Connection,
                     tuple(key for key in batch[0].keys() if key != "id"))
                 individuals = tuple(row["id"] for row in batch)
                 insert_individuals(dbconn, speciesid, individuals)
+                cross_reference_individuals(
+                    dbconn, speciesid, populationid, individuals)
                 count = count + len(batch)
 
             if "gmap" in cdata: