aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-10 07:16:55 +0300
committerFrederick Muriuki Muriithi2024-01-10 08:02:51 +0300
commit0d0b284a76bee863b97c46e8275cd62e02552a93 (patch)
tree39206b15683a9a943add843dc262758cb495e1cd
parentdd50c620e77561b4b507ed8ed73b2fd5cdab31ef (diff)
downloadgn-uploader-0d0b284a76bee863b97c46e8275cd62e02552a93.tar.gz
Cross-reference genotype data to the dataset.
-rw-r--r--scripts/rqtl2/install_genotypes.py15
1 files changed, 14 insertions, 1 deletions
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index 63e6113..a555d46 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -106,6 +106,18 @@ def insert_genotype_data(dbconn: mdb.Connection,
"markerid": row["markerid"]
} for row in data)
+def cross_reference_genotypes(dbconn: mdb.Connection,
+ datasetid: int,
+ dataids: tuple[int, ...]) -> int:
+ """Cross-reference the data to the relevant dataset."""
+ with dbconn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ "INSERT INTO GenoXRef(GenoFreezeId, GenoId, DataId) "
+ "VALUES(%(datasetid)s, %(markerid)s, %(dataid)s) "
+ "ON DUPLICATE KEY UPDATE GenoFreezeId=GenoFreezeId",
+ tuple({**row, "datasetid": datasetid} for row in dataids))
+ return cursor.rowcount
+
def install_genotypes(dbconn: mdb.Connection,
speciesid: int,
populationid: int,
@@ -141,8 +153,9 @@ def install_genotypes(dbconn: mdb.Connection,
insert_individuals(dbconn, speciesid, individuals)
cross_reference_individuals(
dbconn, speciesid, populationid, individuals)
- _num_rows, data_ids = insert_genotype_data(
+ _num_rows, dataids = insert_genotype_data(
dbconn, speciesid, batch, individuals)
+ cross_reference_genotypes(dbconn, datasetid, dataids)
count = count + len(batch)
if "gmap" in cdata: