From 2fcf052d9369a78e7f6cf99e73d504dcacdbbfe3 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 18 Oct 2023 12:07:34 +0300 Subject: Link to `ProbeSet.Id` in XRef Table The "ProbeSetId" value in the file is actually stored in the database in the `ProbeSet.Name` field, whereas the `ProbeSetXRef` table actually uses the `ProbeSet.Id` value to define the relationships. This commit fixes that issue. --- scripts/insert_data.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'scripts/insert_data.py') diff --git a/scripts/insert_data.py b/scripts/insert_data.py index f8b73f6..614ba4e 100644 --- a/scripts/insert_data.py +++ b/scripts/insert_data.py @@ -125,6 +125,21 @@ def __format_query__(query, params): "INSERT INTO ", "INSERT INTO\n\t") return f"{insert_str}\nVALUES\n\t{values_str};" +def __xref_params__(dbconn: mdb.Connection, means: list[dict]) -> dict: + """Process params for cross-reference table.""" + xref_names = tuple({mean["ProbeSetId"] for mean in means}) + with dbconn.cursor(cursorclass=DictCursor) as cursor: + params_str = ", ".join(["%s"] * len(xref_names)) + cursor.execute( + f"SELECT Name, Id FROM ProbeSet WHERE Name IN ({params_str})", + xref_names) + ids = {row["Name"]: row["Id"] for row in cursor.fetchall()} + return tuple({ + **mean, + "ProbeSetName": mean["ProbeSetId"], + "ProbeSetId": ids[str(mean["ProbeSetId"])] + } for mean in means) + def insert_means( filepath: str, speciesid: int, platform_id: int, datasetid: int, dbconn: mdb.Connection, rconn: Redis) -> int: # pylint: disable=[unused-argument] @@ -159,7 +174,7 @@ def insert_means( print(__format_query__(xref_query, means)) cursor.executemany(probeset_query, means) cursor.executemany(means_query, means) - cursor.executemany(xref_query, means) + cursor.executemany(xref_query, __xref_params__(dbconn, means)) return 0 def insert_se(# pylint: disable = [too-many-arguments] -- cgit v1.2.3