about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-06-03 07:42:08 -0500
committerFrederick Muriuki Muriithi2025-06-03 07:42:08 -0500
commit59b07a41ca5181a19e631f640b38e5fd33d1d550 (patch)
tree6691a06386ca792569f6b77bb37aa0b9e50c220e /scripts
parent6da353bc7dea262e3330eba270e52ed3f7248582 (diff)
downloadgn-uploader-59b07a41ca5181a19e631f640b38e5fd33d1d550.tar.gz
Extract building of data items into helper function.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py64
1 files changed, 40 insertions, 24 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index d3d9f5a..de06d70 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -99,6 +99,39 @@ def __row_to_dataitems__(
     } for phenoname, phenovalue in sample_row.items() if phenoname != "id")
 
 
+def __build_dataitems__(
+        filetype,
+        phenofiles,
+        control_data,
+        samples,
+        dataidmap,
+        pheno_name2id
+):
+    _headers = rqtl2.read_csv_file_headers(
+        phenofiles[0],
+        control_data[f"{filetype}_transposed"],
+        control_data["sep"],
+        control_data["comment.char"])
+    _filescontents = (
+        rqtl2.read_csv_file(path,
+                            separator=control_data["sep"],
+                            comment_char=control_data["comment.char"])
+        for path in phenofiles)
+    _linescontents = (
+        __row_to_dataitems__(
+            dict(zip(_headers,
+                     __replace_na_strings__(line, control_data["na.strings"]))),
+            dataidmap,
+            pheno_name2id,
+            samples)
+        for linenum, line in (enumline for filecontent in _filescontents
+                              for enumline in enumerate(filecontent))
+        if linenum > 0)
+    return (item for items in _linescontents
+            for item in items
+            if item["value"] is not None)
+
+
 def save_numeric_data(
         conn: mysqldb.Connection,
         dataidmap: dict,
@@ -124,33 +157,16 @@ def save_numeric_data(
                 build_line_joiner(control_data))
             for _file in phenofiles)
 
-    _headers = rqtl2.read_csv_file_headers(phenofiles[0],
-                                           control_data[f"{filetype}_transposed"],
-                                           control_data["sep"],
-                                           control_data["comment.char"])
-
-    _filescontents = (
-        rqtl2.read_csv_file(path,
-                            separator=control_data["sep"],
-                            comment_char=control_data["comment.char"])
-        for path in phenofiles)
-    _dataitems = (
-        __row_to_dataitems__(
-            dict(zip(_headers,
-                     __replace_na_strings__(line, control_data["na.strings"]))),
-            dataidmap,
-            pheno_name2id,
-            samples)
-        for linenum, line in (enumline for filecontent in _filescontents
-                              for enumline in enumerate(filecontent))
-        if linenum > 0)
-
     return save_phenotypes_data(
         conn,
         table,
-        (item for items in _dataitems
-         for item in items
-         if item["value"] is not None))
+        __build_dataitems__(
+            filetype,
+            phenofiles,
+            control_data,
+            samples,
+            dataidmap,
+            pheno_name2id))
 
 
 save_pheno_data = partial(save_numeric_data,