diff options
author | Frederick Muriuki Muriithi | 2025-06-03 07:42:08 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-06-03 07:42:08 -0500 |
commit | 59b07a41ca5181a19e631f640b38e5fd33d1d550 (patch) | |
tree | 6691a06386ca792569f6b77bb37aa0b9e50c220e | |
parent | 6da353bc7dea262e3330eba270e52ed3f7248582 (diff) | |
download | gn-uploader-59b07a41ca5181a19e631f640b38e5fd33d1d550.tar.gz |
Extract building of data items into helper function.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 64 |
1 files changed, 40 insertions, 24 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index d3d9f5a..de06d70 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -99,6 +99,39 @@ def __row_to_dataitems__( } for phenoname, phenovalue in sample_row.items() if phenoname != "id") +def __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id +): + _headers = rqtl2.read_csv_file_headers( + phenofiles[0], + control_data[f"{filetype}_transposed"], + control_data["sep"], + control_data["comment.char"]) + _filescontents = ( + rqtl2.read_csv_file(path, + separator=control_data["sep"], + comment_char=control_data["comment.char"]) + for path in phenofiles) + _linescontents = ( + __row_to_dataitems__( + dict(zip(_headers, + __replace_na_strings__(line, control_data["na.strings"]))), + dataidmap, + pheno_name2id, + samples) + for linenum, line in (enumline for filecontent in _filescontents + for enumline in enumerate(filecontent)) + if linenum > 0) + return (item for items in _linescontents + for item in items + if item["value"] is not None) + + def save_numeric_data( conn: mysqldb.Connection, dataidmap: dict, @@ -124,33 +157,16 @@ def save_numeric_data( build_line_joiner(control_data)) for _file in phenofiles) - _headers = rqtl2.read_csv_file_headers(phenofiles[0], - control_data[f"{filetype}_transposed"], - control_data["sep"], - control_data["comment.char"]) - - _filescontents = ( - rqtl2.read_csv_file(path, - separator=control_data["sep"], - comment_char=control_data["comment.char"]) - for path in phenofiles) - _dataitems = ( - __row_to_dataitems__( - dict(zip(_headers, - __replace_na_strings__(line, control_data["na.strings"]))), - dataidmap, - pheno_name2id, - samples) - for linenum, line in (enumline for filecontent in _filescontents - for enumline in enumerate(filecontent)) - if linenum > 0) - return save_phenotypes_data( conn, table, - (item for items in _dataitems - for item in items - if item["value"] is not None)) + __build_dataitems__( + filetype, + phenofiles, + control_data, + samples, + dataidmap, + pheno_name2id)) save_pheno_data = partial(save_numeric_data, |