aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-06-03 07:42:08 -0500
committerFrederick Muriuki Muriithi2025-06-03 07:42:08 -0500
commit59b07a41ca5181a19e631f640b38e5fd33d1d550 (patch)
tree6691a06386ca792569f6b77bb37aa0b9e50c220e
parent6da353bc7dea262e3330eba270e52ed3f7248582 (diff)
downloadgn-uploader-59b07a41ca5181a19e631f640b38e5fd33d1d550.tar.gz
Extract building of data items into helper function.
-rw-r--r--scripts/load_phenotypes_to_db.py64
1 files changed, 40 insertions, 24 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index d3d9f5a..de06d70 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -99,6 +99,39 @@ def __row_to_dataitems__(
} for phenoname, phenovalue in sample_row.items() if phenoname != "id")
+def __build_dataitems__(
+ filetype,
+ phenofiles,
+ control_data,
+ samples,
+ dataidmap,
+ pheno_name2id
+):
+ _headers = rqtl2.read_csv_file_headers(
+ phenofiles[0],
+ control_data[f"{filetype}_transposed"],
+ control_data["sep"],
+ control_data["comment.char"])
+ _filescontents = (
+ rqtl2.read_csv_file(path,
+ separator=control_data["sep"],
+ comment_char=control_data["comment.char"])
+ for path in phenofiles)
+ _linescontents = (
+ __row_to_dataitems__(
+ dict(zip(_headers,
+ __replace_na_strings__(line, control_data["na.strings"]))),
+ dataidmap,
+ pheno_name2id,
+ samples)
+ for linenum, line in (enumline for filecontent in _filescontents
+ for enumline in enumerate(filecontent))
+ if linenum > 0)
+ return (item for items in _linescontents
+ for item in items
+ if item["value"] is not None)
+
+
def save_numeric_data(
conn: mysqldb.Connection,
dataidmap: dict,
@@ -124,33 +157,16 @@ def save_numeric_data(
build_line_joiner(control_data))
for _file in phenofiles)
- _headers = rqtl2.read_csv_file_headers(phenofiles[0],
- control_data[f"{filetype}_transposed"],
- control_data["sep"],
- control_data["comment.char"])
-
- _filescontents = (
- rqtl2.read_csv_file(path,
- separator=control_data["sep"],
- comment_char=control_data["comment.char"])
- for path in phenofiles)
- _dataitems = (
- __row_to_dataitems__(
- dict(zip(_headers,
- __replace_na_strings__(line, control_data["na.strings"]))),
- dataidmap,
- pheno_name2id,
- samples)
- for linenum, line in (enumline for filecontent in _filescontents
- for enumline in enumerate(filecontent))
- if linenum > 0)
-
return save_phenotypes_data(
conn,
table,
- (item for items in _dataitems
- for item in items
- if item["value"] is not None))
+ __build_dataitems__(
+ filetype,
+ phenofiles,
+ control_data,
+ samples,
+ dataidmap,
+ pheno_name2id))
save_pheno_data = partial(save_numeric_data,