From 2db9c5ef511ff6dbb09655524b51750b1a964d71 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 20 Mar 2024 11:29:28 +0300 Subject: Fix bug: correctly merge standard-error values in file to data in db `read_datavalues(…)` function returns a dict of the form: ``` { ProbeSetName01: ({…}, …), ProbeSetName02: ({…}, …), ︙ } ``` Previously, the generator would thus try to index into the keys of the datavalues, which were strings, leading to an error. This commit changes the generator to return the values of the datavalues dict as a flattened list of values. --- scripts/insert_data.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/insert_data.py b/scripts/insert_data.py index 7d3c1c6..23ae108 100644 --- a/scripts/insert_data.py +++ b/scripts/insert_data.py @@ -107,8 +107,12 @@ def check_strains(headings_strains, db_strains): file=sys.stderr) sys.exit(1) + def annotationinfo( - dbconn: mdb.Connection, platformid: int, datasetid: int) -> dict: + dbconn: mdb.Connection, + platformid: int, + datasetid: int +) -> dict[str, dict]: "Get annotation information from the database." # This is somewhat slow. Look into optimising the behaviour def __organise_annotations__(accm, item): @@ -277,8 +281,11 @@ def insert_se(# pylint: disable = [too-many-arguments] return 1 se_values = ( - {"DataId": annotations[str(item["ProbeSetId"])]["DataId"], **item} - for item in read_datavalues(filepath, headings, strains)) + {"DataId": annotations[str(item["ProbeSetName"])]["DataId"], **item} + for item in ( + row for psrows in + read_datavalues(filepath, headings, strains).values() + for row in psrows)) with dbconn.cursor(cursorclass=DictCursor) as cursor: while True: serrors = tuple(take(se_values, 1000)) -- cgit v1.2.3