about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-05-30 13:29:22 -0500
committerFrederick Muriuki Muriithi2025-05-30 13:29:22 -0500
commitf8602d8e656072a280bfb1b5d4839de44a682045 (patch)
tree3e746b6569e5147939ba64f9908f726b1fcce6af /scripts
parent0f8772f572ad86e41d1dccda99e4bb1d4551b51a (diff)
downloadgn-uploader-f8602d8e656072a280bfb1b5d4839de44a682045.tar.gz
Refactor out common `__row_to_dataitems__` function.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py59
1 files changed, 19 insertions, 40 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 3a0df77..5027a9b 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -72,6 +72,19 @@ def __fetch_next_dataid__(conn: mysqldb.Connection) -> int:
         return int(cursor.fetchone()["CurrentMaxDataId"]) + 1
 
 
+def __row_to_dataitems__(row: dict, samples: dict) -> tuple[dict, ...]:
+    return tuple(
+        {
+            "phenotype_id": row["phenotype_id"],
+            "data_id": dataidmap[row["phenotype_id"]]["data_id"],
+            "sample_name": samplename,
+            "sample_id": samples[samplename]["Id"],
+            "value": value
+        }
+        for samplename, value in row.items()
+        if samplename in samples.keys())
+
+
 def save_pheno_data(
         conn: mysqldb.Connection,
         dataidmap: dict,
@@ -99,23 +112,13 @@ def save_pheno_data(
                                            control_data["sep"],
                                            control_data["comment.char"])
 
-    def __row_to_data_items__(row):
-        return tuple(
-            {
-                "phenotype_id": row["phenotype_id"],
-                "data_id": dataidmap[row["phenotype_id"]]["data_id"],
-                "sample_name": samplename,
-                "sample_id": samples[samplename]["Id"],
-                "value": value
-            }
-            for samplename, value in row.items()
-            if samplename in samples.keys())
-
     return save_phenotypes_data(
         conn,
         "PublishData",
         (item for item in
-         (row_to_dataitems(dict(zip(_headers, line))) for filecontent
+        (item for items in
+         (__row_to_dataitems__(dict(zip(_headers, line)), samples)
+          for filecontent
           in (rqtl2.read_csv_file(path) for path in phenofiles)
          for idx, line in enumerate(filecontent)
          if idx != 0)))
@@ -146,23 +149,13 @@ def save_phenotype_se(
                                            control_data["sep"],
                                            control_data["comment.char"])
 
-    def __row_to_data_items__(row):
-        return tuple(
-            {
-                "phenotype_id": row["phenotype_id"],
-                "data_id": dataidmap[row["phenotype_id"]]["data_id"],
-                "sample_name": samplename,
-                "sample_id": samples[samplename]["Id"],
-                "error": value
-            }
-            for samplename, value in row.items()
-            if samplename in samples.keys())
-
     return save_phenotypes_data(
         conn,
         "PublishSE",
         (item for item in
-         (row_to_dataitems(dict(zip(_headers, line))) for filecontent
+        (item for items in
+         (__row_to_dataitems__(dict(zip(_headers, line)), samples)
+          for filecontent
           in (rqtl2.read_csv_file(path) for path in sefiles)
          for idx, line in enumerate(filecontent)
          if idx != 0)))
@@ -193,24 +186,10 @@ def save_phenotype_n(
                                            control_data["sep"],
                                            control_data["comment.char"])
 
-    def __row_to_data_items__(row):
-        return tuple(
-            {
-                "phenotype_id": row["phenotype_id"],
-                "data_id": dataidmap[row["phenotype_id"]]["data_id"],
-                "sample_name": samplename,
-                "sample_id": samples[samplename]["Id"],
-                "count": value
-            }
-            for samplename, value in row.items()
-            if samplename in samples.keys())
-
     return save_phenotypes_data(
         conn,
         "NStrain",
         (item for item in
-         (row_to_dataitems(dict(zip(_headers, line))) for filecontent
-          in (rqtl2.read_csv_file(path) for path in sefiles)
          for idx, line in enumerate(filecontent)
          if idx != 0)))