aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/phenotypes_bulk_edit.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index 67bf65a..cc58b02 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -3,6 +3,7 @@ import uuid
import logging
import argparse
from pathlib import Path
+from typing import Iterator
from gn_libs import jobs, mysqldb, sqlite3
@@ -68,6 +69,29 @@ def parse_args():
return parser.parse_args()
+def read_file(filepath: Path) -> Iterator[str]:
+ """Read the file, one line at a time."""
+ with filepath.open(mode="r", encoding="utf-8") as infile:
+ count = 0
+ headers = None
+ for line in infile:
+ if line.startswith("#"): # ignore comments
+ continue;
+
+ fields = line.strip().split("\t")
+ if count == 0:
+ headers = fields
+ count = count + 1
+ continue
+
+ _dict = dict(zip(headers, fields))
+ _pheno, _xref = _dict.pop("UniqueIdentifier").split("::")
+ _dict["phenotype_id"] = _pheno.split(":")[1]
+ _dict["xref_id"] = _xref.split(":")[1]
+ yield _dict
+ count = count + 1
+
+
def run(conn, job):
"""Process the data and update it."""
check_ids()