about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-03-26 10:01:17 -0500
committerFrederick Muriuki Muriithi2025-03-26 10:01:17 -0500
commit7265c208d71e9eeba3e7146b11b2c890377aedfb (patch)
tree38e5cf0584d2f9813d1e00af642d9290365896d2 /scripts
parent060b04bb7457fbdb9f4a23dfab79b98ead4b0cc0 (diff)
downloadgn-uploader-7265c208d71e9eeba3e7146b11b2c890377aedfb.tar.gz
Check whether the IDs are valid.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/phenotypes_bulk_edit.py29
1 files changed, 26 insertions, 3 deletions
diff --git a/scripts/phenotypes_bulk_edit.py b/scripts/phenotypes_bulk_edit.py
index cc58b02..1d6689e 100644
--- a/scripts/phenotypes_bulk_edit.py
+++ b/scripts/phenotypes_bulk_edit.py
@@ -5,6 +5,8 @@ import argparse
 from pathlib import Path
 from typing import Iterator
 
+from MySQLdb.cursors import DictCursor
+
 from gn_libs import jobs, mysqldb, sqlite3
 
 logging.basicConfig(
@@ -12,10 +14,30 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 
 
-def check_ids():
+def check_ids(conn, contents):
     """Verify that all the `UniqueIdentifier` values are valid."""
     logger.info("Checking the 'UniqueIdentifier' values.")
-    pass
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        ids = tuple((row["phenotype_id"], row["xref_id"]) for row in contents)
+        paramstr = ",".join(["(%s, %s)"] * len(ids))
+        cursor.execute(
+            "SELECT PhenotypeId AS phenotype_id, Id AS xref_id "
+            "FROM PublishXRef "
+            f"WHERE (PhenotypeId, Id) IN ({paramstr})",
+            tuple(item for row in ids for item in row))
+        mysqldb.debug_query(cursor, logger)
+        found = tuple((str(row["phenotype_id"]), str(row["xref_id"]))
+                 for row in cursor.fetchall())
+
+    not_found = tuple(item for item in ids if item not in found)
+    if len(not_found) == 0:
+        logger.info("All 'UniqueIdentifier' are valid.")
+        return True
+
+    for item in not_found:
+        logger.error(f"Invalid 'UniqueIdentifier' value: phId:%s::xrId:%s", item[0], item[1])
+
+    return False
 
 
 def check_for_mandatory_fields():
@@ -94,7 +116,8 @@ def read_file(filepath: Path) -> Iterator[str]:
 
 def run(conn, job):
     """Process the data and update it."""
-    check_ids()
+    file_contents = tuple(read_file(Path(job["metadata"]["edit-file"])))
+    check_ids(conn, file_contents)
     check_for_mandatory_fields()
     # stop running here if any errors are found.
     compute_differences()