Add function to read the file and do basic data processing.

author: Frederick Muriuki Muriithi 2025-03-25 15:30:10 -0500
committer: Frederick Muriuki Muriithi 2025-03-25 15:30:10 -0500
commit: 060b04bb7457fbdb9f4a23dfab79b98ead4b0cc0 (patch)
tree: 0be9d9605e2c7d4263793f8470ecb7cd0bf50509
parent: fa86b2d93918e6e7ed857e32bf0da0d08b927869 (diff)
download: gn-uploader-060b04bb7457fbdb9f4a23dfab79b98ead4b0cc0.tar.gz
1 files changed, 24 insertions, 0 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 67bf65a..cc58b02 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -3,6 +3,7 @@ import uuid
 import logging
 import argparse
 from pathlib import Path
+from typing import Iterator
 
 from gn_libs import jobs, mysqldb, sqlite3
 
@@ -68,6 +69,29 @@ def parse_args():
     return parser.parse_args()
 
 
+def read_file(filepath: Path) -> Iterator[str]:
+    """Read the file, one line at a time."""
+    with filepath.open(mode="r", encoding="utf-8") as infile:
+        count = 0
+        headers = None
+        for line in infile:
+            if line.startswith("#"): # ignore comments
+                continue;
+
+            fields = line.strip().split("\t")
+            if count == 0:
+                headers = fields
+                count = count + 1
+                continue
+
+            _dict = dict(zip(headers, fields))
+            _pheno, _xref = _dict.pop("UniqueIdentifier").split("::")
+            _dict["phenotype_id"] = _pheno.split(":")[1]
+            _dict["xref_id"] = _xref.split(":")[1]
+            yield _dict
+            count = count + 1
+
+
 def run(conn, job):
     """Process the data and update it."""
     check_ids()
author	Frederick Muriuki Muriithi	2025-03-25 15:30:10 -0500
committer	Frederick Muriuki Muriithi	2025-03-25 15:30:10 -0500
commit	060b04bb7457fbdb9f4a23dfab79b98ead4b0cc0 (patch)
tree	0be9d9605e2c7d4263793f8470ecb7cd0bf50509
parent	fa86b2d93918e6e7ed857e32bf0da0d08b927869 (diff)
download	gn-uploader-060b04bb7457fbdb9f4a23dfab79b98ead4b0cc0.tar.gz