diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/phenotypes_bulk_edit.py | 29 |
1 files changed, 26 insertions, 3 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py index cc58b02..1d6689e 100644 --- a/scripts/phenotypes_bulk_edit.py +++ b/scripts/phenotypes_bulk_edit.py @@ -5,6 +5,8 @@ import argparse from pathlib import Path from typing import Iterator +from MySQLdb.cursors import DictCursor + from gn_libs import jobs, mysqldb, sqlite3 logging.basicConfig( @@ -12,10 +14,30 @@ logging.basicConfig( logger = logging.getLogger(__name__) -def check_ids(): +def check_ids(conn, contents): """Verify that all the `UniqueIdentifier` values are valid.""" logger.info("Checking the 'UniqueIdentifier' values.") - pass + with conn.cursor(cursorclass=DictCursor) as cursor: + ids = tuple((row["phenotype_id"], row["xref_id"]) for row in contents) + paramstr = ",".join(["(%s, %s)"] * len(ids)) + cursor.execute( + "SELECT PhenotypeId AS phenotype_id, Id AS xref_id " + "FROM PublishXRef " + f"WHERE (PhenotypeId, Id) IN ({paramstr})", + tuple(item for row in ids for item in row)) + mysqldb.debug_query(cursor, logger) + found = tuple((str(row["phenotype_id"]), str(row["xref_id"])) + for row in cursor.fetchall()) + + not_found = tuple(item for item in ids if item not in found) + if len(not_found) == 0: + logger.info("All 'UniqueIdentifier' are valid.") + return True + + for item in not_found: + logger.error(f"Invalid 'UniqueIdentifier' value: phId:%s::xrId:%s", item[0], item[1]) + + return False def check_for_mandatory_fields(): @@ -94,7 +116,8 @@ def read_file(filepath: Path) -> Iterator[str]: def run(conn, job): """Process the data and update it.""" - check_ids() + file_contents = tuple(read_file(Path(job["metadata"]["edit-file"]))) + check_ids(conn, file_contents) check_for_mandatory_fields() # stop running here if any errors are found. compute_differences() |