aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-05-30 13:29:22 -0500
committerFrederick Muriuki Muriithi2025-05-30 13:29:22 -0500
commitf8602d8e656072a280bfb1b5d4839de44a682045 (patch)
tree3e746b6569e5147939ba64f9908f726b1fcce6af
parent0f8772f572ad86e41d1dccda99e4bb1d4551b51a (diff)
downloadgn-uploader-f8602d8e656072a280bfb1b5d4839de44a682045.tar.gz
Refactor out common `__row_to_dataitems__` function.
-rw-r--r--scripts/load_phenotypes_to_db.py59
1 files changed, 19 insertions, 40 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 3a0df77..5027a9b 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -72,6 +72,19 @@ def __fetch_next_dataid__(conn: mysqldb.Connection) -> int:
return int(cursor.fetchone()["CurrentMaxDataId"]) + 1
+def __row_to_dataitems__(row: dict, samples: dict) -> tuple[dict, ...]:
+ return tuple(
+ {
+ "phenotype_id": row["phenotype_id"],
+ "data_id": dataidmap[row["phenotype_id"]]["data_id"],
+ "sample_name": samplename,
+ "sample_id": samples[samplename]["Id"],
+ "value": value
+ }
+ for samplename, value in row.items()
+ if samplename in samples.keys())
+
+
def save_pheno_data(
conn: mysqldb.Connection,
dataidmap: dict,
@@ -99,23 +112,13 @@ def save_pheno_data(
control_data["sep"],
control_data["comment.char"])
- def __row_to_data_items__(row):
- return tuple(
- {
- "phenotype_id": row["phenotype_id"],
- "data_id": dataidmap[row["phenotype_id"]]["data_id"],
- "sample_name": samplename,
- "sample_id": samples[samplename]["Id"],
- "value": value
- }
- for samplename, value in row.items()
- if samplename in samples.keys())
-
return save_phenotypes_data(
conn,
"PublishData",
(item for item in
- (row_to_dataitems(dict(zip(_headers, line))) for filecontent
+ (item for items in
+ (__row_to_dataitems__(dict(zip(_headers, line)), samples)
+ for filecontent
in (rqtl2.read_csv_file(path) for path in phenofiles)
for idx, line in enumerate(filecontent)
if idx != 0)))
@@ -146,23 +149,13 @@ def save_phenotype_se(
control_data["sep"],
control_data["comment.char"])
- def __row_to_data_items__(row):
- return tuple(
- {
- "phenotype_id": row["phenotype_id"],
- "data_id": dataidmap[row["phenotype_id"]]["data_id"],
- "sample_name": samplename,
- "sample_id": samples[samplename]["Id"],
- "error": value
- }
- for samplename, value in row.items()
- if samplename in samples.keys())
-
return save_phenotypes_data(
conn,
"PublishSE",
(item for item in
- (row_to_dataitems(dict(zip(_headers, line))) for filecontent
+ (item for items in
+ (__row_to_dataitems__(dict(zip(_headers, line)), samples)
+ for filecontent
in (rqtl2.read_csv_file(path) for path in sefiles)
for idx, line in enumerate(filecontent)
if idx != 0)))
@@ -193,24 +186,10 @@ def save_phenotype_n(
control_data["sep"],
control_data["comment.char"])
- def __row_to_data_items__(row):
- return tuple(
- {
- "phenotype_id": row["phenotype_id"],
- "data_id": dataidmap[row["phenotype_id"]]["data_id"],
- "sample_name": samplename,
- "sample_id": samples[samplename]["Id"],
- "count": value
- }
- for samplename, value in row.items()
- if samplename in samples.keys())
-
return save_phenotypes_data(
conn,
"NStrain",
(item for item in
- (row_to_dataitems(dict(zip(_headers, line))) for filecontent
- in (rqtl2.read_csv_file(path) for path in sefiles)
for idx, line in enumerate(filecontent)
if idx != 0)))