aboutsummaryrefslogtreecommitdiff
path: root/scripts/insert_data.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-12-20 06:28:06 +0300
committerFrederick Muriuki Muriithi2023-12-20 06:28:06 +0300
commitcb7f6b866672f44d730f303639e6372d2ae2babe (patch)
tree52a868382ab81c407a738bbeef45c3f603d5293e /scripts/insert_data.py
parentbf0013bceac63c852b1baf5842f9c7b560f3330d (diff)
downloadgn-uploader-cb7f6b866672f44d730f303639e6372d2ae2babe.tar.gz
Fix ID column key
Fix the wrong assumption that the ID key will always be "ProbeSetID". The key for the ID column could change from study to study or dataset to dataset.
Diffstat (limited to 'scripts/insert_data.py')
-rw-r--r--scripts/insert_data.py8
1 files changed, 5 insertions, 3 deletions
diff --git a/scripts/insert_data.py b/scripts/insert_data.py
index 63e85ef..0ac3799 100644
--- a/scripts/insert_data.py
+++ b/scripts/insert_data.py
@@ -67,9 +67,10 @@ def strains_info(
def read_datavalues(filepath, headings, strain_info):
"""Read numerical, data values from the file."""
+ id_key = headings[0]
return {
- str(row["ProbeSetID"]): tuple({
- "ProbeSetName": str(row["ProbeSetID"]),
+ str(row[id_key]): tuple({
+ "ProbeSetName": str(row[id_key]),
"StrainId": strain_info[sname]["Id"],
"DataValue": float(row[sname])
} for sname in headings[1:])
@@ -79,9 +80,10 @@ def read_datavalues(filepath, headings, strain_info):
def read_probesets(filepath, headings):
"""Read the ProbeSet names."""
+ id_key = headings[0]
for row in (dict(zip(headings, line))
for line in read_file_contents(filepath)):
- yield {"Name": str(row["ProbeSetID"])}
+ yield {"Name": str(row[id_key])}
def last_data_id(dbconn: mdb.Connection) -> int:
"Get the last id from the database"