diff options
author | Frederick Muriuki Muriithi | 2025-06-02 14:04:12 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-06-02 14:07:03 -0500 |
commit | 4876d8d879727de4b36b6a4f238511b788c083cf (patch) | |
tree | 160f653befb20cfe436441edf1f1d573eeffe71b | |
parent | 4c5b7b7ef118a9ad0b620ae7c7563daa9cd1c33d (diff) | |
download | gn-uploader-4876d8d879727de4b36b6a4f238511b788c083cf.tar.gz |
Process NA-strings in the worker script.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 22 |
1 files changed, 15 insertions, 7 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 9ad3a93..e044b8f 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -30,6 +30,11 @@ logging.basicConfig( logger = logging.getLogger(__name__) + +def __replace_na_strings__(line, na_strings): + return ((None if value in na_strings else value) for value in line) + + def save_phenotypes( cursor: mysqldb.Connection, control_data: dict[str, Any], @@ -61,7 +66,9 @@ def save_phenotypes( control_data["comment.char"]) return create_new_phenotypes( cursor, - (dict(zip(_headers, line)) for filecontent + (dict(zip(_headers, + __replace_na_strings__(line, control_data["na.strings"]))) + for filecontent in (rqtl2.read_csv_file(path) for path in phenofiles) for idx, line in enumerate(filecontent) if idx != 0)) @@ -125,14 +132,15 @@ def save_numeric_data( _filescontents = tuple( rqtl2.read_csv_file(path, separator=control_data["sep"], - comment_char=control_data["comment.char"], - na_strings=control_data["na.strings"]) + comment_char=control_data["comment.char"]) for path in phenofiles) _dataitems = ( - __row_to_dataitems__(dict(zip(_headers, line)), - dataidmap, - pheno_name2id, - samples) + __row_to_dataitems__( + dict(zip(_headers, + __replace_na_strings__(line, control_data["na.strings"]))), + dataidmap, + pheno_name2id, + samples) for linenum, line in (enumline for filecontent in _filescontents for enumline in enumerate(filecontent)) if linenum > 0) |