diff options
author | Frederick Muriuki Muriithi | 2025-05-30 13:29:22 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-05-30 13:29:22 -0500 |
commit | f8602d8e656072a280bfb1b5d4839de44a682045 (patch) | |
tree | 3e746b6569e5147939ba64f9908f726b1fcce6af | |
parent | 0f8772f572ad86e41d1dccda99e4bb1d4551b51a (diff) | |
download | gn-uploader-f8602d8e656072a280bfb1b5d4839de44a682045.tar.gz |
Refactor out common `__row_to_dataitems__` function.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 59 |
1 files changed, 19 insertions, 40 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 3a0df77..5027a9b 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -72,6 +72,19 @@ def __fetch_next_dataid__(conn: mysqldb.Connection) -> int: return int(cursor.fetchone()["CurrentMaxDataId"]) + 1 +def __row_to_dataitems__(row: dict, samples: dict) -> tuple[dict, ...]: + return tuple( + { + "phenotype_id": row["phenotype_id"], + "data_id": dataidmap[row["phenotype_id"]]["data_id"], + "sample_name": samplename, + "sample_id": samples[samplename]["Id"], + "value": value + } + for samplename, value in row.items() + if samplename in samples.keys()) + + def save_pheno_data( conn: mysqldb.Connection, dataidmap: dict, @@ -99,23 +112,13 @@ def save_pheno_data( control_data["sep"], control_data["comment.char"]) - def __row_to_data_items__(row): - return tuple( - { - "phenotype_id": row["phenotype_id"], - "data_id": dataidmap[row["phenotype_id"]]["data_id"], - "sample_name": samplename, - "sample_id": samples[samplename]["Id"], - "value": value - } - for samplename, value in row.items() - if samplename in samples.keys()) - return save_phenotypes_data( conn, "PublishData", (item for item in - (row_to_dataitems(dict(zip(_headers, line))) for filecontent + (item for items in + (__row_to_dataitems__(dict(zip(_headers, line)), samples) + for filecontent in (rqtl2.read_csv_file(path) for path in phenofiles) for idx, line in enumerate(filecontent) if idx != 0))) @@ -146,23 +149,13 @@ def save_phenotype_se( control_data["sep"], control_data["comment.char"]) - def __row_to_data_items__(row): - return tuple( - { - "phenotype_id": row["phenotype_id"], - "data_id": dataidmap[row["phenotype_id"]]["data_id"], - "sample_name": samplename, - "sample_id": samples[samplename]["Id"], - "error": value - } - for samplename, value in row.items() - if samplename in samples.keys()) - return save_phenotypes_data( conn, "PublishSE", (item for item in - (row_to_dataitems(dict(zip(_headers, line))) for filecontent + (item for items in + (__row_to_dataitems__(dict(zip(_headers, line)), samples) + for filecontent in (rqtl2.read_csv_file(path) for path in sefiles) for idx, line in enumerate(filecontent) if idx != 0))) @@ -193,24 +186,10 @@ def save_phenotype_n( control_data["sep"], control_data["comment.char"]) - def __row_to_data_items__(row): - return tuple( - { - "phenotype_id": row["phenotype_id"], - "data_id": dataidmap[row["phenotype_id"]]["data_id"], - "sample_name": samplename, - "sample_id": samples[samplename]["Id"], - "count": value - } - for samplename, value in row.items() - if samplename in samples.keys()) - return save_phenotypes_data( conn, "NStrain", (item for item in - (row_to_dataitems(dict(zip(_headers, line))) for filecontent - in (rqtl2.read_csv_file(path) for path in sefiles) for idx, line in enumerate(filecontent) if idx != 0))) |