aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-03-20 11:29:28 +0300
committerFrederick Muriuki Muriithi2024-03-20 11:29:28 +0300
commit2db9c5ef511ff6dbb09655524b51750b1a964d71 (patch)
tree1d4cd099dfdb6b4faebac626c201b17851fa585c /scripts
parent77fc426d35cf88af768dfac281e6a4979ed451fc (diff)
downloadgn-uploader-2db9c5ef511ff6dbb09655524b51750b1a964d71.tar.gz
Fix bug: correctly merge standard-error values in file to data in db
`read_datavalues(…)` function returns a dict of the form: ``` { ProbeSetName01: ({…}, …), ProbeSetName02: ({…}, …), ︙ } ``` Previously, the generator would thus try to index into the keys of the datavalues, which were strings, leading to an error. This commit changes the generator to return the values of the datavalues dict as a flattened list of values.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/insert_data.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/scripts/insert_data.py b/scripts/insert_data.py
index 7d3c1c6..23ae108 100644
--- a/scripts/insert_data.py
+++ b/scripts/insert_data.py
@@ -107,8 +107,12 @@ def check_strains(headings_strains, db_strains):
file=sys.stderr)
sys.exit(1)
+
def annotationinfo(
- dbconn: mdb.Connection, platformid: int, datasetid: int) -> dict:
+ dbconn: mdb.Connection,
+ platformid: int,
+ datasetid: int
+) -> dict[str, dict]:
"Get annotation information from the database."
# This is somewhat slow. Look into optimising the behaviour
def __organise_annotations__(accm, item):
@@ -277,8 +281,11 @@ def insert_se(# pylint: disable = [too-many-arguments]
return 1
se_values = (
- {"DataId": annotations[str(item["ProbeSetId"])]["DataId"], **item}
- for item in read_datavalues(filepath, headings, strains))
+ {"DataId": annotations[str(item["ProbeSetName"])]["DataId"], **item}
+ for item in (
+ row for psrows in
+ read_datavalues(filepath, headings, strains).values()
+ for row in psrows))
with dbconn.cursor(cursorclass=DictCursor) as cursor:
while True:
serrors = tuple(take(se_values, 1000))