diff options
author | Frederick Muriuki Muriithi | 2025-03-25 15:30:10 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-03-25 15:30:10 -0500 |
commit | 060b04bb7457fbdb9f4a23dfab79b98ead4b0cc0 (patch) | |
tree | 0be9d9605e2c7d4263793f8470ecb7cd0bf50509 | |
parent | fa86b2d93918e6e7ed857e32bf0da0d08b927869 (diff) | |
download | gn-uploader-060b04bb7457fbdb9f4a23dfab79b98ead4b0cc0.tar.gz |
Add function to read the file and do basic data processing.
-rw-r--r-- | scripts/phenotypes_bulk_edit.py | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index 67bf65a..cc58b02 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -3,6 +3,7 @@ import uuid import logging import argparse from pathlib import Path +from typing import Iterator from gn_libs import jobs, mysqldb, sqlite3 @@ -68,6 +69,29 @@ def parse_args(): return parser.parse_args() +def read_file(filepath: Path) -> Iterator[str]: + """Read the file, one line at a time.""" + with filepath.open(mode="r", encoding="utf-8") as infile: + count = 0 + headers = None + for line in infile: + if line.startswith("#"): # ignore comments + continue; + + fields = line.strip().split("\t") + if count == 0: + headers = fields + count = count + 1 + continue + + _dict = dict(zip(headers, fields)) + _pheno, _xref = _dict.pop("UniqueIdentifier").split("::") + _dict["phenotype_id"] = _pheno.split(":")[1] + _dict["xref_id"] = _xref.split(":")[1] + yield _dict + count = count + 1 + + def run(conn, job): """Process the data and update it.""" check_ids() |