diff options
author | Frederick Muriuki Muriithi | 2024-01-10 06:54:13 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2024-01-10 07:19:38 +0300 |
commit | bd902e747670dab5e31eaf09755ce02b278061e5 (patch) | |
tree | 7251404431ee6635fb630bd0c3220057b9018760 /scripts/rqtl2 | |
parent | 4f2934d16a6dba52a7676fab48acc41e59b5bf29 (diff) | |
download | gn-uploader-bd902e747670dab5e31eaf09755ce02b278061e5.tar.gz |
Cross-reference individuals to populations.
Diffstat (limited to 'scripts/rqtl2')
-rw-r--r-- | scripts/rqtl2/install_genotypes.py | 22 |
1 files changed, 21 insertions, 1 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py index 3016f1f..d28b3b7 100644 --- a/scripts/rqtl2/install_genotypes.py +++ b/scripts/rqtl2/install_genotypes.py @@ -9,6 +9,7 @@ from argparse import ArgumentParser import MySQLdb as mdb from redis import Redis +from MySQLdb.cursors import DictCursor from r_qtl import r_qtl2 as rqtl2 @@ -48,7 +49,24 @@ def insert_individuals(dbconn: mdb.Connection, tuple({"speciesid": speciesid, "id": individual} for individual in individuals)) return cursor.rowcount - # TODO: Install geno data: GenoData + +def cross_reference_individuals(dbconn: mdb.Connection, + speciesid: int, + populationid: int, + individuals: tuple[str, ...]) -> int: + """Cross reference any inserted individuals.""" + with dbconn.cursor(cursorclass=DictCursor) as cursor: + paramstr = ", ".join(["%s"] * len(individuals)) + cursor.execute(f"SELECT Id FROM Strain WHERE Name IN ({paramstr})", + individuals) + ids = ({"popid": populationid, "indid": row["Id"]} + for row in cursor.fetchall()) + cursor.executemany( + "INSERT INTO StrainXRef(InbredSetId, StrainId) " + "VALUES(%(popid)s, %(indid)s) " + "ON DUPLICATE KEY UPDATE InbredSetId=InbredSetId", + tuple(ids)) + return cursor.rowcount return cursor.rowcount def install_genotypes(dbconn: mdb.Connection, @@ -84,6 +102,8 @@ def install_genotypes(dbconn: mdb.Connection, tuple(key for key in batch[0].keys() if key != "id")) individuals = tuple(row["id"] for row in batch) insert_individuals(dbconn, speciesid, individuals) + cross_reference_individuals( + dbconn, speciesid, populationid, individuals) count = count + len(batch) if "gmap" in cdata: |