diff options
author | Frederick Muriuki Muriithi | 2023-12-20 06:28:06 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2023-12-20 06:28:06 +0300 |
commit | cb7f6b866672f44d730f303639e6372d2ae2babe (patch) | |
tree | 52a868382ab81c407a738bbeef45c3f603d5293e /scripts/insert_data.py | |
parent | bf0013bceac63c852b1baf5842f9c7b560f3330d (diff) | |
download | gn-uploader-cb7f6b866672f44d730f303639e6372d2ae2babe.tar.gz |
Fix ID column key
Fix the wrong assumption that the ID key will always be
"ProbeSetID". The key for the ID column could change from study to
study or dataset to dataset.
Diffstat (limited to 'scripts/insert_data.py')
-rw-r--r-- | scripts/insert_data.py | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/scripts/insert_data.py b/scripts/insert_data.py index 63e85ef..0ac3799 100644 --- a/scripts/insert_data.py +++ b/scripts/insert_data.py @@ -67,9 +67,10 @@ def strains_info( def read_datavalues(filepath, headings, strain_info): """Read numerical, data values from the file.""" + id_key = headings[0] return { - str(row["ProbeSetID"]): tuple({ - "ProbeSetName": str(row["ProbeSetID"]), + str(row[id_key]): tuple({ + "ProbeSetName": str(row[id_key]), "StrainId": strain_info[sname]["Id"], "DataValue": float(row[sname]) } for sname in headings[1:]) @@ -79,9 +80,10 @@ def read_datavalues(filepath, headings, strain_info): def read_probesets(filepath, headings): """Read the ProbeSet names.""" + id_key = headings[0] for row in (dict(zip(headings, line)) for line in read_file_contents(filepath)): - yield {"Name": str(row["ProbeSetID"])} + yield {"Name": str(row[id_key])} def last_data_id(dbconn: mdb.Connection) -> int: "Get the last id from the database" |