diff options
author | Frederick Muriuki Muriithi | 2025-06-02 12:15:00 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-06-02 12:15:00 -0500 |
commit | 44de01f047010f868b20f3b1fa01b1c66288325f (patch) | |
tree | 4504faa3d146a8572a361d41271cb31356e28b8e | |
parent | 32f85988da8a054ed2ee9249fcd26930a88a9db4 (diff) | |
download | gn-uploader-44de01f047010f868b20f3b1fa01b1c66288325f.tar.gz |
Compute the map from a phenotype's name to its ID.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 1e240a7..de0bfbb 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -195,15 +195,24 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: _files = rqtl2.extract(zfile, _outdir) logger.info("Saving basic phenotype data.") _phenos = save_phenotypes(conn, _control_data, _outdir) - dataidmap = { - row["phenotype_id"]: { - "population_id": _population["Id"], - "phenotype_id": row["phenotype_id"], - "data_id": dataid, - "publication_id": _publication["Id"], - } - for dataid, row in enumerate(_phenos, start=__fetch_next_dataid__(conn)) - } + def __build_phenos_maps__(accumulator, current): + dataid, row = current + return ({ + **accumulator[0], + row["phenotype_id"]: { + "population_id": _population["Id"], + "phenotype_id": row["phenotype_id"], + "data_id": dataid, + "publication_id": _publication["Id"], + } + }, { + **accumulator[1], + row["id"]: row["phenotype_id"] + }) + dataidmap, pheno_name2id = reduce( + __build_phenos_maps__, + enumerate(_phenos, start=__fetch_next_dataid__(conn)), + ({},{})) # 3. a. Fetch the strain names and IDS: create name->ID map samples = { row["Name"]: row |