aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-05-30 14:29:56 -0500
committerFrederick Muriuki Muriithi2025-05-30 14:29:56 -0500
commit32f85988da8a054ed2ee9249fcd26930a88a9db4 (patch)
tree5d4b105e0beca4414a865d9edaf40465099a121c
parent2f80e1246b78841a9cde301ada104ae9370b36f1 (diff)
downloadgn-uploader-32f85988da8a054ed2ee9249fcd26930a88a9db4.tar.gz
Refactor: Extract common pattern into separate function
Extract the common pattern into a separate, more generalized function and pass the new function the data it requires to perform its tasks for the different file types.
-rw-r--r--scripts/load_phenotypes_to_db.py99
1 files changed, 20 insertions, 79 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 99b56d7..1e240a7 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -4,6 +4,7 @@ import logging
import argparse
from pathlib import Path
from zipfile import ZipFile
+from functools import partial
from typing import Any, Union
from MySQLdb.cursors import Cursor, DictCursor
@@ -88,20 +89,22 @@ def __row_to_dataitems__(row: dict, samples: dict) -> tuple[dict, ...]:
if samplename in samples.keys())
-def save_pheno_data(
+def save_numeric_data(
conn: mysqldb.Connection,
dataidmap: dict,
samples: tuple[dict, ...],
control_data: dict,
- filesdir: Path
+ filesdir: Path,
+ filetype: str,
+ table: str
):
- """Read the `datafiles` and save the data in the database."""
+ """Read data from files and save to the database."""
phenofiles = tuple(
- filesdir.joinpath(_file) for _file in control_data["pheno"])
+ filesdir.joinpath(_file) for _file in control_data[filetype])
if len(phenofiles) <= 0:
return tuple()
- if control_data["pheno_transposed"]:
+ if control_data[f"{filetype}_transposed"]:
logger.info("Undoing transposition of the files rows and columns.")
phenofiles = (
rqtl2.transpose_csv_with_rename(
@@ -111,13 +114,14 @@ def save_pheno_data(
for _file in phenofiles)
_headers = rqtl2.read_csv_file_headers(phenofiles[0],
- control_data["pheno_transposed"],
+ control_data[f"{filetype}_transposed"],
control_data["sep"],
control_data["comment.char"])
return save_phenotypes_data(
conn,
- "PublishData",
+ table,
+ # BUG: This seems to always be empty for some reason
(item for items in
(__row_to_dataitems__(dict(zip(_headers, line)), samples)
for filecontent
@@ -127,82 +131,19 @@ def save_pheno_data(
for item in items))
-def save_phenotypes_se(
- conn: mysqldb.Connection,
- dataidmap: dict,
- samples: tuple[dict, ...],
- control_data: dict,
- filesdir: Path
-):
- """Read the `sefiles` and save the data in the database."""
- sefiles = tuple(
- filesdir.joinpath(_file) for _file in control_data["phenose"])
- if len(sefiles) <= 0:
- return tuple()
+save_pheno_data = partial(save_numeric_data,
+ filetype="pheno",
+ table="PublishData")
- if control_data["phenose_transposed"]:
- logger.info("Undoing transposition of the files rows and columns.")
- sefiles = (
- rqtl2.transpose_csv_with_rename(
- _file,
- build_line_splitter(control_data),
- build_line_joiner(control_data))
- for _file in sefiles)
- _headers = rqtl2.read_csv_file_headers(sefiles[0],
- control_data["phenose_transposed"],
- control_data["sep"],
- control_data["comment.char"])
-
- return save_phenotypes_data(
- conn,
- "PublishSE",
- (item for items in
- (__row_to_dataitems__(dict(zip(_headers, line)), samples)
- for filecontent
- in (rqtl2.read_csv_file(path) for path in sefiles)
- for idx, line in enumerate(filecontent)
- if idx != 0)
- for item in items))
+save_phenotypes_se = partial(save_numeric_data,
+ filetype="phenose",
+ table="PublishSE")
-def save_phenotypes_n(
- conn: mysqldb.Connection,
- dataidmap: dict,
- samples: tuple[dict, ...],
- control_data: dict,
- filesdir: Path
-):
- """Read the `nfiles` and save the data in the database."""
- nfiles = tuple(
- filesdir.joinpath(_file) for _file in control_data["phenonum"])
- if len(nfiles) <= 0:
- return tuple()
-
- if control_data["phenonum_transposed"]:
- logger.info("Undoing transposition of the files rows and columns.")
- nfiles = (
- rqtl2.transpose_csv_with_rename(
- _file,
- build_line_splitter(control_data),
- build_line_joiner(control_data))
- for _file in nfiles)
-
- _headers = rqtl2.read_csv_file_headers(nfiles[0],
- control_data["phenonum_transposed"],
- control_data["sep"],
- control_data["comment.char"])
-
- return save_phenotypes_data(
- conn,
- "NStrain",
- (item for items in
- (__row_to_dataitems__(dict(zip(_headers, line)), samples)
- for filecontent
- in (rqtl2.read_csv_file(path) for path in nfiles)
- for idx, line in enumerate(filecontent)
- if idx != 0)
- for item in items))
+save_phenotypes_n = partial(save_numeric_data,
+ filetype="phenonum",
+ table="NStrain")
def cross_reference_phenotypes_publications_and_data(