aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-10 06:54:13 +0300
committerFrederick Muriuki Muriithi2024-01-10 07:19:38 +0300
commitbd902e747670dab5e31eaf09755ce02b278061e5 (patch)
tree7251404431ee6635fb630bd0c3220057b9018760 /scripts
parent4f2934d16a6dba52a7676fab48acc41e59b5bf29 (diff)
downloadgn-uploader-bd902e747670dab5e31eaf09755ce02b278061e5.tar.gz
Cross-reference individuals to populations.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rqtl2/install_genotypes.py22
1 files changed, 21 insertions, 1 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index 3016f1f..d28b3b7 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -9,6 +9,7 @@ from argparse import ArgumentParser
import MySQLdb as mdb
from redis import Redis
+from MySQLdb.cursors import DictCursor
from r_qtl import r_qtl2 as rqtl2
@@ -48,7 +49,24 @@ def insert_individuals(dbconn: mdb.Connection,
tuple({"speciesid": speciesid, "id": individual}
for individual in individuals))
return cursor.rowcount
- # TODO: Install geno data: GenoData
+
+def cross_reference_individuals(dbconn: mdb.Connection,
+ speciesid: int,
+ populationid: int,
+ individuals: tuple[str, ...]) -> int:
+ """Cross reference any inserted individuals."""
+ with dbconn.cursor(cursorclass=DictCursor) as cursor:
+ paramstr = ", ".join(["%s"] * len(individuals))
+ cursor.execute(f"SELECT Id FROM Strain WHERE Name IN ({paramstr})",
+ individuals)
+ ids = ({"popid": populationid, "indid": row["Id"]}
+ for row in cursor.fetchall())
+ cursor.executemany(
+ "INSERT INTO StrainXRef(InbredSetId, StrainId) "
+ "VALUES(%(popid)s, %(indid)s) "
+ "ON DUPLICATE KEY UPDATE InbredSetId=InbredSetId",
+ tuple(ids))
+ return cursor.rowcount
return cursor.rowcount
def install_genotypes(dbconn: mdb.Connection,
@@ -84,6 +102,8 @@ def install_genotypes(dbconn: mdb.Connection,
tuple(key for key in batch[0].keys() if key != "id"))
individuals = tuple(row["id"] for row in batch)
insert_individuals(dbconn, speciesid, individuals)
+ cross_reference_individuals(
+ dbconn, speciesid, populationid, individuals)
count = count + len(batch)
if "gmap" in cdata: