aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-06-02 12:15:00 -0500
committerFrederick Muriuki Muriithi2025-06-02 12:15:00 -0500
commit44de01f047010f868b20f3b1fa01b1c66288325f (patch)
tree4504faa3d146a8572a361d41271cb31356e28b8e
parent32f85988da8a054ed2ee9249fcd26930a88a9db4 (diff)
downloadgn-uploader-44de01f047010f868b20f3b1fa01b1c66288325f.tar.gz
Compute the map from a phenotype's name to its ID.
-rw-r--r--scripts/load_phenotypes_to_db.py27
1 files changed, 18 insertions, 9 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 1e240a7..de0bfbb 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -195,15 +195,24 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
_files = rqtl2.extract(zfile, _outdir)
logger.info("Saving basic phenotype data.")
_phenos = save_phenotypes(conn, _control_data, _outdir)
- dataidmap = {
- row["phenotype_id"]: {
- "population_id": _population["Id"],
- "phenotype_id": row["phenotype_id"],
- "data_id": dataid,
- "publication_id": _publication["Id"],
- }
- for dataid, row in enumerate(_phenos, start=__fetch_next_dataid__(conn))
- }
+ def __build_phenos_maps__(accumulator, current):
+ dataid, row = current
+ return ({
+ **accumulator[0],
+ row["phenotype_id"]: {
+ "population_id": _population["Id"],
+ "phenotype_id": row["phenotype_id"],
+ "data_id": dataid,
+ "publication_id": _publication["Id"],
+ }
+ }, {
+ **accumulator[1],
+ row["id"]: row["phenotype_id"]
+ })
+ dataidmap, pheno_name2id = reduce(
+ __build_phenos_maps__,
+ enumerate(_phenos, start=__fetch_next_dataid__(conn)),
+ ({},{}))
# 3. a. Fetch the strain names and IDS: create name->ID map
samples = {
row["Name"]: row