about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-05-30 14:29:56 -0500
committerFrederick Muriuki Muriithi2025-05-30 14:29:56 -0500
commit32f85988da8a054ed2ee9249fcd26930a88a9db4 (patch)
tree5d4b105e0beca4414a865d9edaf40465099a121c /scripts
parent2f80e1246b78841a9cde301ada104ae9370b36f1 (diff)
downloadgn-uploader-32f85988da8a054ed2ee9249fcd26930a88a9db4.tar.gz
Refactor: Extract common pattern into separate function
Extract the common pattern into a separate, more generalized function
and pass the new function the data it requires to perform its tasks
for the different file types.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py99
1 files changed, 20 insertions, 79 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 99b56d7..1e240a7 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -4,6 +4,7 @@ import logging
 import argparse
 from pathlib import Path
 from zipfile import ZipFile
+from functools import partial
 from typing import Any, Union
 
 from MySQLdb.cursors import Cursor, DictCursor
@@ -88,20 +89,22 @@ def __row_to_dataitems__(row: dict, samples: dict) -> tuple[dict, ...]:
         if samplename in samples.keys())
 
 
-def save_pheno_data(
+def save_numeric_data(
         conn: mysqldb.Connection,
         dataidmap: dict,
         samples: tuple[dict, ...],
         control_data: dict,
-        filesdir: Path
+        filesdir: Path,
+        filetype: str,
+        table: str
 ):
-    """Read the `datafiles` and save the data in the database."""
+    """Read data from files and save to the database."""
     phenofiles = tuple(
-        filesdir.joinpath(_file) for _file in control_data["pheno"])
+        filesdir.joinpath(_file) for _file in control_data[filetype])
     if len(phenofiles) <= 0:
         return tuple()
 
-    if control_data["pheno_transposed"]:
+    if control_data[f"{filetype}_transposed"]:
         logger.info("Undoing transposition of the files rows and columns.")
         phenofiles = (
             rqtl2.transpose_csv_with_rename(
@@ -111,13 +114,14 @@ def save_pheno_data(
             for _file in phenofiles)
 
     _headers = rqtl2.read_csv_file_headers(phenofiles[0],
-                                           control_data["pheno_transposed"],
+                                           control_data[f"{filetype}_transposed"],
                                            control_data["sep"],
                                            control_data["comment.char"])
 
     return save_phenotypes_data(
         conn,
-        "PublishData",
+        table,
+        # BUG: This seems to always be empty for some reason
         (item for items in
          (__row_to_dataitems__(dict(zip(_headers, line)), samples)
           for filecontent
@@ -127,82 +131,19 @@ def save_pheno_data(
          for item in items))
 
 
-def save_phenotypes_se(
-        conn: mysqldb.Connection,
-        dataidmap: dict,
-        samples: tuple[dict, ...],
-        control_data: dict,
-        filesdir: Path
-):
-    """Read the `sefiles` and save the data in the database."""
-    sefiles = tuple(
-        filesdir.joinpath(_file) for _file in control_data["phenose"])
-    if len(sefiles) <= 0:
-        return tuple()
+save_pheno_data = partial(save_numeric_data,
+                          filetype="pheno",
+                          table="PublishData")
 
-    if control_data["phenose_transposed"]:
-        logger.info("Undoing transposition of the files rows and columns.")
-        sefiles = (
-            rqtl2.transpose_csv_with_rename(
-                _file,
-                build_line_splitter(control_data),
-                build_line_joiner(control_data))
-            for _file in sefiles)
 
-    _headers = rqtl2.read_csv_file_headers(sefiles[0],
-                                           control_data["phenose_transposed"],
-                                           control_data["sep"],
-                                           control_data["comment.char"])
-
-    return save_phenotypes_data(
-        conn,
-        "PublishSE",
-        (item for items in
-         (__row_to_dataitems__(dict(zip(_headers, line)), samples)
-          for filecontent
-          in (rqtl2.read_csv_file(path) for path in sefiles)
-         for idx, line in enumerate(filecontent)
-         if idx != 0)
-         for item in items))
+save_phenotypes_se = partial(save_numeric_data,
+                             filetype="phenose",
+                             table="PublishSE")
 
 
-def save_phenotypes_n(
-        conn: mysqldb.Connection,
-        dataidmap: dict,
-        samples: tuple[dict, ...],
-        control_data: dict,
-        filesdir: Path
-):
-    """Read the `nfiles` and save the data in the database."""
-    nfiles = tuple(
-        filesdir.joinpath(_file) for _file in control_data["phenonum"])
-    if len(nfiles) <= 0:
-        return tuple()
-
-    if control_data["phenonum_transposed"]:
-        logger.info("Undoing transposition of the files rows and columns.")
-        nfiles = (
-            rqtl2.transpose_csv_with_rename(
-                _file,
-                build_line_splitter(control_data),
-                build_line_joiner(control_data))
-            for _file in nfiles)
-
-    _headers = rqtl2.read_csv_file_headers(nfiles[0],
-                                           control_data["phenonum_transposed"],
-                                           control_data["sep"],
-                                           control_data["comment.char"])
-
-    return save_phenotypes_data(
-        conn,
-        "NStrain",
-        (item for items in
-         (__row_to_dataitems__(dict(zip(_headers, line)), samples)
-          for filecontent
-          in (rqtl2.read_csv_file(path) for path in nfiles)
-         for idx, line in enumerate(filecontent)
-         if idx != 0)
-         for item in items))
+save_phenotypes_n = partial(save_numeric_data,
+                             filetype="phenonum",
+                             table="NStrain")
 
 
 def cross_reference_phenotypes_publications_and_data(