diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 59 |
1 files changed, 19 insertions, 40 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 3a0df77..5027a9b 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -72,6 +72,19 @@ def __fetch_next_dataid__(conn: mysqldb.Connection) -> int: return int(cursor.fetchone()["CurrentMaxDataId"]) + 1 +def __row_to_dataitems__(row: dict, samples: dict) -> tuple[dict, ...]: + return tuple( + { + "phenotype_id": row["phenotype_id"], + "data_id": dataidmap[row["phenotype_id"]]["data_id"], + "sample_name": samplename, + "sample_id": samples[samplename]["Id"], + "value": value + } + for samplename, value in row.items() + if samplename in samples.keys()) + + def save_pheno_data( conn: mysqldb.Connection, dataidmap: dict, @@ -99,23 +112,13 @@ def save_pheno_data( control_data["sep"], control_data["comment.char"]) - def __row_to_data_items__(row): - return tuple( - { - "phenotype_id": row["phenotype_id"], - "data_id": dataidmap[row["phenotype_id"]]["data_id"], - "sample_name": samplename, - "sample_id": samples[samplename]["Id"], - "value": value - } - for samplename, value in row.items() - if samplename in samples.keys()) - return save_phenotypes_data( conn, "PublishData", (item for item in - (row_to_dataitems(dict(zip(_headers, line))) for filecontent + (item for items in + (__row_to_dataitems__(dict(zip(_headers, line)), samples) + for filecontent in (rqtl2.read_csv_file(path) for path in phenofiles) for idx, line in enumerate(filecontent) if idx != 0))) @@ -146,23 +149,13 @@ def save_phenotype_se( control_data["sep"], control_data["comment.char"]) - def __row_to_data_items__(row): - return tuple( - { - "phenotype_id": row["phenotype_id"], - "data_id": dataidmap[row["phenotype_id"]]["data_id"], - "sample_name": samplename, - "sample_id": samples[samplename]["Id"], - "error": value - } - for samplename, value in row.items() - if samplename in samples.keys()) - return save_phenotypes_data( conn, "PublishSE", (item for item in - (row_to_dataitems(dict(zip(_headers, line))) for filecontent + (item for items in + (__row_to_dataitems__(dict(zip(_headers, line)), samples) + for filecontent in (rqtl2.read_csv_file(path) for path in sefiles) for idx, line in enumerate(filecontent) if idx != 0))) @@ -193,24 +186,10 @@ def save_phenotype_n( control_data["sep"], control_data["comment.char"]) - def __row_to_data_items__(row): - return tuple( - { - "phenotype_id": row["phenotype_id"], - "data_id": dataidmap[row["phenotype_id"]]["data_id"], - "sample_name": samplename, - "sample_id": samples[samplename]["Id"], - "count": value - } - for samplename, value in row.items() - if samplename in samples.keys()) - return save_phenotypes_data( conn, "NStrain", (item for item in - (row_to_dataitems(dict(zip(_headers, line))) for filecontent - in (rqtl2.read_csv_file(path) for path in sefiles) for idx, line in enumerate(filecontent) if idx != 0))) |