From bd902e747670dab5e31eaf09755ce02b278061e5 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 10 Jan 2024 06:54:13 +0300 Subject: Cross-reference individuals to populations. --- scripts/rqtl2/install_genotypes.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py index 3016f1f..d28b3b7 100644 --- a/scripts/rqtl2/install_genotypes.py +++ b/scripts/rqtl2/install_genotypes.py @@ -9,6 +9,7 @@ from argparse import ArgumentParser import MySQLdb as mdb from redis import Redis +from MySQLdb.cursors import DictCursor from r_qtl import r_qtl2 as rqtl2 @@ -48,7 +49,24 @@ def insert_individuals(dbconn: mdb.Connection, tuple({"speciesid": speciesid, "id": individual} for individual in individuals)) return cursor.rowcount - # TODO: Install geno data: GenoData + +def cross_reference_individuals(dbconn: mdb.Connection, + speciesid: int, + populationid: int, + individuals: tuple[str, ...]) -> int: + """Cross reference any inserted individuals.""" + with dbconn.cursor(cursorclass=DictCursor) as cursor: + paramstr = ", ".join(["%s"] * len(individuals)) + cursor.execute(f"SELECT Id FROM Strain WHERE Name IN ({paramstr})", + individuals) + ids = ({"popid": populationid, "indid": row["Id"]} + for row in cursor.fetchall()) + cursor.executemany( + "INSERT INTO StrainXRef(InbredSetId, StrainId) " + "VALUES(%(popid)s, %(indid)s) " + "ON DUPLICATE KEY UPDATE InbredSetId=InbredSetId", + tuple(ids)) + return cursor.rowcount return cursor.rowcount def install_genotypes(dbconn: mdb.Connection, @@ -84,6 +102,8 @@ def install_genotypes(dbconn: mdb.Connection, tuple(key for key in batch[0].keys() if key != "id")) individuals = tuple(row["id"] for row in batch) insert_individuals(dbconn, speciesid, individuals) + cross_reference_individuals( + dbconn, speciesid, populationid, individuals) count = count + len(batch) if "gmap" in cdata: -- cgit v1.2.3