diff options
-rw-r--r-- | scripts/load_phenotypes_to_db.py | 75 | ||||
-rw-r--r-- | uploader/static/js/debug.js | 40 |
2 files changed, 78 insertions, 37 deletions
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index b78c648..3a0df77 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -3,6 +3,7 @@ import json import logging import argparse from pathlib import Path +from zipfile import ZipFile from typing import Any, Union from MySQLdb.cursors import Cursor, DictCursor @@ -10,8 +11,10 @@ from MySQLdb.cursors import Cursor, DictCursor from gn_libs import jobs, mysqldb, sqlite3 from r_qtl import r_qtl2 as rqtl2 -from uploader.phenotypes.models import save_phenotypes_data from uploader.samples.models import samples_by_species_and_population +from uploader.phenotypes.models import ( + save_phenotypes_data, + create_new_phenotypes) from uploader.publications.models import ( create_new_publications, fetch_publication_by_id) @@ -25,18 +28,18 @@ logger = logging.getLogger(__name__) def save_phenotypes( cursor: mysqldb.Connection, - control_data: dict[str, Any] + control_data: dict[str, Any], + filesdir: Path ) -> tuple[dict, ...]: """Read `phenofiles` and save the phenotypes therein.""" logger.info("Saving new phenotypes.") - logger.debug("Processing %s 'pheno' files.", len(phenofiles)) ## TODO: Replace with something like this: ## # phenofiles = control_data["phenocovar"] + control_data.get( # "gn-metadata", {}).get("pheno", []) # # This is meant to load (and merge) data from the "phenocovar" and # "gn-metadata -> pheno" files into a single collection of phenotypes. - phenofiles = control_data["phenocovar"] + phenofiles = tuple(filesdir.joinpath(_file) for _file in control_data["phenocovar"]) if len(phenofiles) <= 0: return tuple() @@ -47,9 +50,9 @@ def save_phenotypes( _file, build_line_splitter(control_data), build_line_joiner(control_data)) - for _file in control_data["phenocovar"]) + for _file in phenofiles) - _headers = rqtl2.read_csv_file_headers(control_data["phenocovar"][0], + _headers = rqtl2.read_csv_file_headers(phenofiles[0], control_data["phenocovar_transposed"], control_data["sep"], control_data["comment.char"]) @@ -66,17 +69,19 @@ def __fetch_next_dataid__(conn: mysqldb.Connection) -> int: with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute( "SELECT MAX(DataId) AS CurrentMaxDataId FROM PublishXRef") - return int(cursor.fetchone()) + 1 + return int(cursor.fetchone()["CurrentMaxDataId"]) + 1 def save_pheno_data( conn: mysqldb.Connection, dataidmap: dict, samples: tuple[dict, ...], - control_data: dict + control_data: dict, + filesdir: Path ): """Read the `datafiles` and save the data in the database.""" - phenofiles = control_data["pheno"] + phenofiles = tuple( + filesdir.joinpath(_file) for file in control_data["pheno"]) if len(phenofiles) <= 0: return tuple() @@ -87,9 +92,9 @@ def save_pheno_data( _file, build_line_splitter(control_data), build_line_joiner(control_data)) - for _file in control_data["pheno"]) + for _file in phenofiles) - _headers = rqtl2.read_csv_file_headers(control_data["pheno"][0], + _headers = rqtl2.read_csv_file_headers(phenofiles[0], control_data["pheno_transposed"], control_data["sep"], control_data["comment.char"]) @@ -108,7 +113,7 @@ def save_pheno_data( return save_phenotypes_data( conn, - "PublishData" + "PublishData", (item for item in (row_to_dataitems(dict(zip(_headers, line))) for filecontent in (rqtl2.read_csv_file(path) for path in phenofiles) @@ -155,7 +160,7 @@ def save_phenotype_se( return save_phenotypes_data( conn, - "PublishSE" + "PublishSE", (item for item in (row_to_dataitems(dict(zip(_headers, line))) for filecontent in (rqtl2.read_csv_file(path) for path in sefiles) @@ -202,7 +207,7 @@ def save_phenotype_n( return save_phenotypes_data( conn, - "NStrain" + "NStrain", (item for item in (row_to_dataitems(dict(zip(_headers, line))) for filecontent in (rqtl2.read_csv_file(path) for path in sefiles) @@ -243,10 +248,14 @@ def load_data(conn, job): conn, int(_job_metadata.get("publicationid", "0"))) or {"Id": 0} # 2. Save all new phenotypes: # -> return phenotype IDs - _control_data = rqtl.control_data(job["job-metadata"]["bundle-file"]) + bundle = Path(_job_metadata["bundle_file"]) + _control_data = rqtl2.control_data(bundle) + logger.info("Extracting the zipped bundle of files.") + _outdir = Path(bundle.parent, f"bundle_{bundle.stem}") + with ZipFile(str(bundle), "r") as zfile: + _files = rqtl2.extract(zfile, _outdir) logger.info("Saving basic phenotype data.") - - _phenos = save_phenotypes(cursor, _control_data) + _phenos = save_phenotypes(conn, _control_data, _outdir) dataidmap = { row["phenotype_id"]: { "population_id": population["Id"], @@ -262,14 +271,14 @@ def load_data(conn, job): for row in samples_by_species_and_population( conn, species["SpeciesId"], population["PopulationId"])} # b. Save all the data items (DataIds are vibes), return new IDs - data = save_pheno_data(conn, dataidmap, samples, _control_data) + data = save_pheno_data(conn, dataidmap, samples, _control_data, _outdir) # 4. Cross-reference Phenotype, Publication, and PublishData in PublishXRef xrefs = cross_reference_phenotypes_publications_and_data( conn, tuple(dataidmap.values())) # 5. If standard errors and N exist, save them too # (use IDs returned in `3. b.` above). - data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data) - data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data) + data_se = save_phenotypes_se(conn, dataidmap, samples, _control_data, _outdir) + data_n = save_phenotypes_n(conn, dataidmap, samples, _control_data, _outdir) # 6. If entirely new data, update authorisations (break this down) update_auth(_user, _species, _population, _dataset, _phenos) return 0 @@ -314,29 +323,21 @@ if __name__ == "__main__": # How do you check for a table lock? # https://oracle-base.com/articles/mysql/mysql-identify-locked-tables # `SHOW OPEN TABLES LIKE 'Publish%';` - logger.debug( - ("Locking database tables for the connection:" - "\n\t- %s\n\t- %s\n\t- %s\n\t- %s\n\t- %s\n"), + _db_tables_ = ( "Publication", + "Phenotype", "PublishXRef", "PublishData", "PublishSE", "NStrain") + + logger.debug( + ("Locking database tables for the connection:" + + "".join("\n\t- %s" for _ in _db_tables_) + "\n"), + *_db_tables_) cursor.execute(# Lock the tables to avoid race conditions - "LOCK TABLES " - "Publication WRITE, " - "PublishXRef WRITE, " - "PublishData WRITE, " - "PublishSE WRITE, " - "NStrain WRITE") - try: - return load_data(conn, jobs.job(jobs_conn, args.job_id)) - except jobs.jobs.JobNotFound as _jne: - logger.error("Could not find job with ID: %s", args.job_id) - except Exception as _exc: - logger.error("Loading failed with general exception!", - exc_info=True, - stack_info=True) + "LOCK TABLES " + ", ".join( + f"{_table} WRITE" for _table in _db_tables_)) logger.debug("Unlocking all database tables.") cursor.execute("UNLOCK TABLES") diff --git a/uploader/static/js/debug.js b/uploader/static/js/debug.js new file mode 100644 index 0000000..eb01209 --- /dev/null +++ b/uploader/static/js/debug.js @@ -0,0 +1,40 @@ +/** + * The entire purpose of this function is for use to debug values inline + * without changing the flow of the code too much. + * + * This **MUST** be a non-arrow function to allow access to the `arguments` + * object. + * + * This function expects at least one argument. + * + * If more than one argument is provided, then: + * a) the last argument is considered the value, and will be returned + * b) all other arguments will be converted to string and output + * + * If only one argument is provided, it is considered the value, and will be + * returned. + * + * Zero arguments is an error condition. + **/ +function __pk__(val) { + /* Handle zero arguments */ + if (arguments.length < 1) { + throw new Error("Invalid arguments: Expected at least one argument."); + } + + msg = "/********** DEBUG **********/"; + if (arguments.length > 1) { + msg = Array.from( + arguments + ).slice( + 0, + arguments.length - 1 + ).map((val) => { + return String(val); + }).join("; ") + } + + value = arguments[arguments.length - 1]; + console.debug("/********** " + msg + " **********/", value); + return value; +} |