diff options
author | Frederick Muriuki Muriithi | 2025-06-05 13:29:12 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2025-06-05 15:09:16 -0500 |
commit | 650dbc306101a4c2bd0e910e3a5c744f2f09929c (patch) | |
tree | a1f32729ad92deefb7efcc21ad5e395308ca696a | |
parent | fd997d55b745a192f9946de1a7652fd28c700280 (diff) | |
download | gn-uploader-650dbc306101a4c2bd0e910e3a5c744f2f09929c.tar.gz |
Only update authorisation after all data is entered.
Pull the auth update function out of the database connection
contextmanager to prevent any problems with an auth update from
causing a rollback of all the data.
We can always manually update the authorisation later, therefore, we
do not want a failure in that causing the (potentially) time-consuming
data entry process from being rolled back.
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index 0b1f738..d5d2d16 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -1,3 +1,4 @@ +import sys import uuid import json import logging @@ -224,7 +225,7 @@ def cross_reference_phenotypes_publications_and_data( return tuple() -def update_auth(token, species, population, dataset, phenos): +def update_auth(authserver, token, species, population, dataset, xrefdata): """Grant the user access to their data.""" raise NotImplemented("Please implement this!") @@ -285,7 +286,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: logger.info("Saved %s new phenotype data rows.", _num_data_rows) # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef logger.info("Cross-referencing new phenotypes to their data and publications.") - xrefs = cross_reference_phenotypes_publications_and_data( + _xrefs = cross_reference_phenotypes_publications_and_data( conn, tuple(dataidmap.values())) # 5. If standard errors and N exist, save them too # (use IDs returned in `3. b.` above). @@ -306,10 +307,7 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int: control_data=_control_data, filesdir=_outdir) logger.info("Saved %s new phenotype sample counts rows.", num_n_rows) - # 6. If entirely new data, update authorisations (break this down) - logger.info("Updating authorisation.") - update_auth(_token, _species, _population, _dataset, _phenos) - return 0 + return (_species, _population, _dataset, _xrefs) if __name__ == "__main__": @@ -344,6 +342,8 @@ if __name__ == "__main__": with (mysqldb.database_connection(args.db_uri) as conn, conn.cursor(cursorclass=DictCursor) as cursor, sqlite3.connection(args.jobs_db_path) as jobs_conn): + job = jobs.job(jobs_conn, args.job_id) + # Lock the PublishXRef/PublishData/PublishSE/NStrain here: Why? # The `DataId` values are sequential, but not auto-increment # Can't convert `PublishXRef`.`DataId` to AUTO_INCREMENT. @@ -371,12 +371,23 @@ if __name__ == "__main__": cursor.execute(# Lock the tables to avoid race conditions "LOCK TABLES " + ", ".join( f"{_table} WRITE" for _table in _db_tables_)) - return load_data(conn, jobs.job(jobs_conn, args.job_id)) + + db_results = load_data(conn, job) logger.debug("Unlocking all database tables.") cursor.execute("UNLOCK TABLES") - return 1 + # Update authorisations (break this down) — maybe loop until it works? + logger.info("Updating authorisation.") + _job_metadata = job["metadata"] + return update_auth(_job_metadata["authserver"], + _job_metadata["token"], + *db_results) - main() + try: + sys.exit(main()) + except: + logger.debug("Data loading failed… Halting!", + exc_info=True) + sys.exit(1) |