aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py29
1 files changed, 26 insertions, 3 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index cc58b02..1d6689e 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -5,6 +5,8 @@ import argparse
from pathlib import Path
from typing import Iterator
+from MySQLdb.cursors import DictCursor
+
from gn_libs import jobs, mysqldb, sqlite3
logging.basicConfig(
@@ -12,10 +14,30 @@ logging.basicConfig(
logger = logging.getLogger(__name__)
-def check_ids():
+def check_ids(conn, contents):
"""Verify that all the `UniqueIdentifier` values are valid."""
logger.info("Checking the 'UniqueIdentifier' values.")
- pass
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ ids = tuple((row["phenotype_id"], row["xref_id"]) for row in contents)
+ paramstr = ",".join(["(%s, %s)"] * len(ids))
+ cursor.execute(
+ "SELECT PhenotypeId AS phenotype_id, Id AS xref_id "
+ "FROM PublishXRef "
+ f"WHERE (PhenotypeId, Id) IN ({paramstr})",
+ tuple(item for row in ids for item in row))
+ mysqldb.debug_query(cursor, logger)
+ found = tuple((str(row["phenotype_id"]), str(row["xref_id"]))
+ for row in cursor.fetchall())
+
+ not_found = tuple(item for item in ids if item not in found)
+ if len(not_found) == 0:
+ logger.info("All 'UniqueIdentifier' are valid.")
+ return True
+
+ for item in not_found:
+ logger.error(f"Invalid 'UniqueIdentifier' value: phId:%s::xrId:%s", item[0], item[1])
+
+ return False
def check_for_mandatory_fields():
@@ -94,7 +116,8 @@ def read_file(filepath: Path) -> Iterator[str]:
def run(conn, job):
"""Process the data and update it."""
- check_ids()
+ file_contents = tuple(read_file(Path(job["metadata"]["edit-file"])))
+ check_ids(conn, file_contents)
check_for_mandatory_fields()
# stop running here if any errors are found.
compute_differences()