aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-06-02 14:04:12 -0500
committerFrederick Muriuki Muriithi2025-06-02 14:07:03 -0500
commit4876d8d879727de4b36b6a4f238511b788c083cf (patch)
tree160f653befb20cfe436441edf1f1d573eeffe71b
parent4c5b7b7ef118a9ad0b620ae7c7563daa9cd1c33d (diff)
downloadgn-uploader-4876d8d879727de4b36b6a4f238511b788c083cf.tar.gz
Process NA-strings in the worker script.
-rw-r--r--scripts/load_phenotypes_to_db.py22
1 files changed, 15 insertions, 7 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 9ad3a93..e044b8f 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -30,6 +30,11 @@ logging.basicConfig(
logger = logging.getLogger(__name__)
+
+def __replace_na_strings__(line, na_strings):
+ return ((None if value in na_strings else value) for value in line)
+
+
def save_phenotypes(
cursor: mysqldb.Connection,
control_data: dict[str, Any],
@@ -61,7 +66,9 @@ def save_phenotypes(
control_data["comment.char"])
return create_new_phenotypes(
cursor,
- (dict(zip(_headers, line)) for filecontent
+ (dict(zip(_headers,
+ __replace_na_strings__(line, control_data["na.strings"])))
+ for filecontent
in (rqtl2.read_csv_file(path) for path in phenofiles)
for idx, line in enumerate(filecontent)
if idx != 0))
@@ -125,14 +132,15 @@ def save_numeric_data(
_filescontents = tuple(
rqtl2.read_csv_file(path,
separator=control_data["sep"],
- comment_char=control_data["comment.char"],
- na_strings=control_data["na.strings"])
+ comment_char=control_data["comment.char"])
for path in phenofiles)
_dataitems = (
- __row_to_dataitems__(dict(zip(_headers, line)),
- dataidmap,
- pheno_name2id,
- samples)
+ __row_to_dataitems__(
+ dict(zip(_headers,
+ __replace_na_strings__(line, control_data["na.strings"]))),
+ dataidmap,
+ pheno_name2id,
+ samples)
for linenum, line in (enumline for filecontent in _filescontents
for enumline in enumerate(filecontent))
if linenum > 0)