about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-06-02 12:15:00 -0500
committerFrederick Muriuki Muriithi2025-06-02 12:15:00 -0500
commit44de01f047010f868b20f3b1fa01b1c66288325f (patch)
tree4504faa3d146a8572a361d41271cb31356e28b8e /scripts
parent32f85988da8a054ed2ee9249fcd26930a88a9db4 (diff)
downloadgn-uploader-44de01f047010f868b20f3b1fa01b1c66288325f.tar.gz
Compute the map from a phenotype's name to its ID.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/load_phenotypes_to_db.py27
1 files changed, 18 insertions, 9 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 1e240a7..de0bfbb 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -195,15 +195,24 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
         _files = rqtl2.extract(zfile, _outdir)
     logger.info("Saving basic phenotype data.")
     _phenos = save_phenotypes(conn, _control_data, _outdir)
-    dataidmap = {
-        row["phenotype_id"]: {
-            "population_id": _population["Id"],
-            "phenotype_id": row["phenotype_id"],
-            "data_id": dataid,
-            "publication_id": _publication["Id"],
-        }
-        for dataid, row in enumerate(_phenos, start=__fetch_next_dataid__(conn))
-    }
+    def __build_phenos_maps__(accumulator, current):
+        dataid, row = current
+        return ({
+            **accumulator[0],
+            row["phenotype_id"]: {
+                "population_id": _population["Id"],
+                "phenotype_id": row["phenotype_id"],
+                "data_id": dataid,
+                "publication_id": _publication["Id"],
+            }
+        }, {
+            **accumulator[1],
+            row["id"]: row["phenotype_id"]
+        })
+    dataidmap, pheno_name2id = reduce(
+        __build_phenos_maps__,
+        enumerate(_phenos, start=__fetch_next_dataid__(conn)),
+        ({},{}))
     # 3. a. Fetch the strain names and IDS: create name->ID map
     samples = {
         row["Name"]: row