From 1d3f877aadcf89f82746e4dccc8d8cf767928419 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 16 Oct 2023 14:45:22 +0300 Subject: Fix bug: Insert ProbeSets if they do not exist. --- scripts/insert_data.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/scripts/insert_data.py b/scripts/insert_data.py index 7e27f93..f8b73f6 100644 --- a/scripts/insert_data.py +++ b/scripts/insert_data.py @@ -131,27 +131,33 @@ def insert_means( "Insert the means/averages data into the database" headings = read_file_headings(filepath) strains = strains_info(dbconn, headings[1:], speciesid) - check_strains(headings[1:], strains) + check_strains(headings[1:], strains), + probeset_query = ( + "INSERT IGNORE INTO ProbeSet(ChipId, Name) " + "VALUES (%(ChipId)s, %(ProbeSetId)s) ") means_query = ( "INSERT INTO ProbeSetData " "VALUES(%(ProbeSetDataId)s, %(StrainId)s, %(DataValue)s)") xref_query = ( "INSERT INTO ProbeSetXRef(ProbeSetFreezeId, ProbeSetId, DataId) " "VALUES(%(ProbeSetFreezeId)s, %(ProbeSetId)s, %(ProbeSetDataId)s)") - the_means = ( - {"ProbeSetFreezeId": datasetid, "ProbeSetDataId": data_id, **mean} - for data_id, mean in - enumerate( - read_datavalues(filepath, headings, strains), - start=(last_data_id(dbconn)+1))) + the_means = ({ + "ProbeSetFreezeId": datasetid, "ProbeSetDataId": data_id, + "ChipId": platform_id, **mean + } for data_id, mean in enumerate( + read_datavalues(filepath, headings, strains), + start=(last_data_id(dbconn)+1))) with dbconn.cursor(cursorclass=DictCursor) as cursor: while True: means = tuple(take(the_means, 1000)) if not bool(means): break + print(__format_query__(probeset_query, means)) + print() print(__format_query__(means_query, means)) print() print(__format_query__(xref_query, means)) + cursor.executemany(probeset_query, means) cursor.executemany(means_query, means) cursor.executemany(xref_query, means) return 0 -- cgit v1.2.3