diff options
author | Frederick Muriuki Muriithi | 2024-03-20 11:29:28 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2024-03-20 11:29:28 +0300 |
commit | 2db9c5ef511ff6dbb09655524b51750b1a964d71 (patch) | |
tree | 1d4cd099dfdb6b4faebac626c201b17851fa585c | |
parent | 77fc426d35cf88af768dfac281e6a4979ed451fc (diff) | |
download | gn-uploader-2db9c5ef511ff6dbb09655524b51750b1a964d71.tar.gz |
Fix bug: correctly merge standard-error values in file to data in db
`read_datavalues(…)` function returns a dict of the form:
```
{
ProbeSetName01: ({…}, …),
ProbeSetName02: ({…}, …),
︙
}
```
Previously, the generator would thus try to index into the keys of the
datavalues, which were strings, leading to an error.
This commit changes the generator to return the values of the
datavalues dict as a flattened list of values.
-rw-r--r-- | scripts/insert_data.py | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/scripts/insert_data.py b/scripts/insert_data.py index 7d3c1c6..23ae108 100644 --- a/scripts/insert_data.py +++ b/scripts/insert_data.py @@ -107,8 +107,12 @@ def check_strains(headings_strains, db_strains): file=sys.stderr) sys.exit(1) + def annotationinfo( - dbconn: mdb.Connection, platformid: int, datasetid: int) -> dict: + dbconn: mdb.Connection, + platformid: int, + datasetid: int +) -> dict[str, dict]: "Get annotation information from the database." # This is somewhat slow. Look into optimising the behaviour def __organise_annotations__(accm, item): @@ -277,8 +281,11 @@ def insert_se(# pylint: disable = [too-many-arguments] return 1 se_values = ( - {"DataId": annotations[str(item["ProbeSetId"])]["DataId"], **item} - for item in read_datavalues(filepath, headings, strains)) + {"DataId": annotations[str(item["ProbeSetName"])]["DataId"], **item} + for item in ( + row for psrows in + read_datavalues(filepath, headings, strains).values() + for row in psrows)) with dbconn.cursor(cursorclass=DictCursor) as cursor: while True: serrors = tuple(take(se_values, 1000)) |