aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2025-06-05 13:29:12 -0500
committerFrederick Muriuki Muriithi2025-06-05 15:09:16 -0500
commit650dbc306101a4c2bd0e910e3a5c744f2f09929c (patch)
treea1f32729ad92deefb7efcc21ad5e395308ca696a
parentfd997d55b745a192f9946de1a7652fd28c700280 (diff)
downloadgn-uploader-650dbc306101a4c2bd0e910e3a5c744f2f09929c.tar.gz
Only update authorisation after all data is entered.
Pull the auth update function out of the database connection contextmanager to prevent any problems with an auth update from causing a rollback of all the data. We can always manually update the authorisation later, therefore, we do not want a failure in that causing the (potentially) time-consuming data entry process from being rolled back.
-rw-r--r--scripts/load_phenotypes_to_db.py29
1 files changed, 20 insertions, 9 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index 0b1f738..d5d2d16 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -1,3 +1,4 @@
+import sys
import uuid
import json
import logging
@@ -224,7 +225,7 @@ def cross_reference_phenotypes_publications_and_data(
return tuple()
-def update_auth(token, species, population, dataset, phenos):
+def update_auth(authserver, token, species, population, dataset, xrefdata):
"""Grant the user access to their data."""
raise NotImplemented("Please implement this!")
@@ -285,7 +286,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
logger.info("Saved %s new phenotype data rows.", _num_data_rows)
# 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef
logger.info("Cross-referencing new phenotypes to their data and publications.")
- xrefs = cross_reference_phenotypes_publications_and_data(
+ _xrefs = cross_reference_phenotypes_publications_and_data(
conn, tuple(dataidmap.values()))
# 5. If standard errors and N exist, save them too
# (use IDs returned in `3. b.` above).
@@ -306,10 +307,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:
control_data=_control_data,
filesdir=_outdir)
logger.info("Saved %s new phenotype sample counts rows.", num_n_rows)
- # 6. If entirely new data, update authorisations (break this down)
- logger.info("Updating authorisation.")
- update_auth(_token, _species, _population, _dataset, _phenos)
- return 0
+ return (_species, _population, _dataset, _xrefs)
if __name__ == "__main__":
@@ -344,6 +342,8 @@ if __name__ == "__main__":
with (mysqldb.database_connection(args.db_uri) as conn,
conn.cursor(cursorclass=DictCursor) as cursor,
sqlite3.connection(args.jobs_db_path) as jobs_conn):
+ job = jobs.job(jobs_conn, args.job_id)
+
# Lock the PublishXRef/PublishData/PublishSE/NStrain here: Why?
# The `DataId` values are sequential, but not auto-increment
# Can't convert `PublishXRef`.`DataId` to AUTO_INCREMENT.
@@ -371,12 +371,23 @@ if __name__ == "__main__":
cursor.execute(# Lock the tables to avoid race conditions
"LOCK TABLES " + ", ".join(
f"{_table} WRITE" for _table in _db_tables_))
- return load_data(conn, jobs.job(jobs_conn, args.job_id))
+
+ db_results = load_data(conn, job)
logger.debug("Unlocking all database tables.")
cursor.execute("UNLOCK TABLES")
- return 1
+ # Update authorisations (break this down) — maybe loop until it works?
+ logger.info("Updating authorisation.")
+ _job_metadata = job["metadata"]
+ return update_auth(_job_metadata["authserver"],
+ _job_metadata["token"],
+ *db_results)
- main()
+ try:
+ sys.exit(main())
+ except:
+ logger.debug("Data loading failed… Halting!",
+ exc_info=True)
+ sys.exit(1)