diff options
author | Frederick Muriuki Muriithi | 2025-05-30 14:29:56 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-05-30 14:29:56 -0500 |
commit | 32f85988da8a054ed2ee9249fcd26930a88a9db4 (patch) | |
tree | 5d4b105e0beca4414a865d9edaf40465099a121c | |
parent | 2f80e1246b78841a9cde301ada104ae9370b36f1 (diff) | |
download | gn-uploader-32f85988da8a054ed2ee9249fcd26930a88a9db4.tar.gz |
Refactor: Extract common pattern into separate function
Extract the common pattern into a separate, more generalized function
and pass the new function the data it requires to perform its tasks
for the different file types.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 99 |
1 files changed, 20 insertions, 79 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 99b56d7..1e240a7 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -4,6 +4,7 @@ import logging import argparse from pathlib import Path from zipfile import ZipFile +from functools import partial from typing import Any, Union from MySQLdb.cursors import Cursor, DictCursor @@ -88,20 +89,22 @@ def __row_to_dataitems__(row: dict, samples: dict) -> tuple[dict, ...]: if samplename in samples.keys()) -def save_pheno_data( +def save_numeric_data( conn: mysqldb.Connection, dataidmap: dict, samples: tuple[dict, ...], control_data: dict, - filesdir: Path + filesdir: Path, + filetype: str, + table: str ): - """Read the `datafiles` and save the data in the database.""" + """Read data from files and save to the database.""" phenofiles = tuple( - filesdir.joinpath(_file) for _file in control_data["pheno"]) + filesdir.joinpath(_file) for _file in control_data[filetype]) if len(phenofiles) <= 0: return tuple() - if control_data["pheno_transposed"]: + if control_data[f"{filetype}_transposed"]: logger.info("Undoing transposition of the files rows and columns.") phenofiles = ( rqtl2.transpose_csv_with_rename( @@ -111,13 +114,14 @@ def save_pheno_data( for _file in phenofiles) _headers = rqtl2.read_csv_file_headers(phenofiles[0], - control_data["pheno_transposed"], + control_data[f"{filetype}_transposed"], control_data["sep"], control_data["comment.char"]) return save_phenotypes_data( conn, - "PublishData", + table, + # BUG: This seems to always be empty for some reason (item for items in (__row_to_dataitems__(dict(zip(_headers, line)), samples) for filecontent @@ -127,82 +131,19 @@ def save_pheno_data( for item in items)) -def save_phenotypes_se( - conn: mysqldb.Connection, - dataidmap: dict, - samples: tuple[dict, ...], - control_data: dict, - filesdir: Path -): - """Read the `sefiles` and save the data in the database.""" - sefiles = tuple( - filesdir.joinpath(_file) for _file in control_data["phenose"]) - if len(sefiles) <= 0: - return tuple() +save_pheno_data = partial(save_numeric_data, + filetype="pheno", + table="PublishData") - if control_data["phenose_transposed"]: - logger.info("Undoing transposition of the files rows and columns.") - sefiles = ( - rqtl2.transpose_csv_with_rename( - _file, - build_line_splitter(control_data), - build_line_joiner(control_data)) - for _file in sefiles) - _headers = rqtl2.read_csv_file_headers(sefiles[0], - control_data["phenose_transposed"], - control_data["sep"], - control_data["comment.char"]) - - return save_phenotypes_data( - conn, - "PublishSE", - (item for items in - (__row_to_dataitems__(dict(zip(_headers, line)), samples) - for filecontent - in (rqtl2.read_csv_file(path) for path in sefiles) - for idx, line in enumerate(filecontent) - if idx != 0) - for item in items)) +save_phenotypes_se = partial(save_numeric_data, + filetype="phenose", + table="PublishSE") -def save_phenotypes_n( - conn: mysqldb.Connection, - dataidmap: dict, - samples: tuple[dict, ...], - control_data: dict, - filesdir: Path -): - """Read the `nfiles` and save the data in the database.""" - nfiles = tuple( - filesdir.joinpath(_file) for _file in control_data["phenonum"]) - if len(nfiles) <= 0: - return tuple() - - if control_data["phenonum_transposed"]: - logger.info("Undoing transposition of the files rows and columns.") - nfiles = ( - rqtl2.transpose_csv_with_rename( - _file, - build_line_splitter(control_data), - build_line_joiner(control_data)) - for _file in nfiles) - - _headers = rqtl2.read_csv_file_headers(nfiles[0], - control_data["phenonum_transposed"], - control_data["sep"], - control_data["comment.char"]) - - return save_phenotypes_data( - conn, - "NStrain", - (item for items in - (__row_to_dataitems__(dict(zip(_headers, line)), samples) - for filecontent - in (rqtl2.read_csv_file(path) for path in nfiles) - for idx, line in enumerate(filecontent) - if idx != 0) - for item in items)) +save_phenotypes_n = partial(save_numeric_data, + filetype="phenonum", + table="NStrain") def cross_reference_phenotypes_publications_and_data( |