From 2db9c5ef511ff6dbb09655524b51750b1a964d71 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 20 Mar 2024 11:29:28 +0300
Subject: Fix bug: correctly merge standard-error values in file to data in db

`read_datavalues(…)` function returns a dict of the form:
  ```
  {
        ProbeSetName01: ({…}, …),
        ProbeSetName02: ({…}, …),
        ︙
  }
  ```

Previously, the generator would thus try to index into the keys of the
datavalues, which were strings, leading to an error.

This commit changes the generator to return the values of the
datavalues dict as a flattened list of values.
---
 scripts/insert_data.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'scripts')

diff --git a/scripts/insert_data.py b/scripts/insert_data.py
index 7d3c1c6..23ae108 100644
--- a/scripts/insert_data.py
+++ b/scripts/insert_data.py
@@ -107,8 +107,12 @@ def check_strains(headings_strains, db_strains):
         file=sys.stderr)
     sys.exit(1)
 
+
 def annotationinfo(
-        dbconn: mdb.Connection, platformid: int, datasetid: int) -> dict:
+        dbconn: mdb.Connection,
+        platformid: int,
+        datasetid: int
+) -> dict[str, dict]:
     "Get annotation information from the database."
     # This is somewhat slow. Look into optimising the behaviour
     def __organise_annotations__(accm, item):
@@ -277,8 +281,11 @@ def insert_se(# pylint: disable = [too-many-arguments]
         return 1
 
     se_values = (
-        {"DataId": annotations[str(item["ProbeSetId"])]["DataId"], **item}
-        for item in read_datavalues(filepath, headings, strains))
+        {"DataId": annotations[str(item["ProbeSetName"])]["DataId"], **item}
+        for item in (
+                row for psrows in
+                read_datavalues(filepath, headings, strains).values()
+                for row in psrows))
     with dbconn.cursor(cursorclass=DictCursor) as cursor:
         while True:
             serrors = tuple(take(se_values, 1000))
-- 
cgit 1.4.1