"""Module to handle uploading of R/qtl2 bundles."""#pylint: disable=[too-many-lines] import sys import json import traceback from pathlib import Path from uuid import UUID, uuid4 from functools import partial from zipfile import ZipFile, is_zipfile from typing import Union, Callable, Optional import MySQLdb as mdb from redis import Redis from MySQLdb.cursors import DictCursor from werkzeug.utils import secure_filename from gn_libs.mysqldb import database_connection from flask import ( flash, escape, request, jsonify, url_for, redirect, Response, Blueprint, render_template, current_app as app) from r_qtl import r_qtl2 from uploader import jobs from uploader.files import save_file, fullpath from uploader.species.models import all_species from uploader.db_utils import with_db_connection from uploader.authorisation import require_login from uploader.platforms.models import platform_by_id, platforms_by_species from uploader.db.averaging import averaging_methods, averaging_method_by_id from uploader.db.tissues import all_tissues, tissue_by_id, create_new_tissue from uploader.population.models import (populations_by_species, population_by_species_and_id) from uploader.species.models import species_by_id from uploader.db.datasets import ( geno_dataset_by_id, geno_datasets_by_species_and_population, probeset_study_by_id, probeset_create_study, probeset_dataset_by_id, probeset_create_dataset, probeset_datasets_by_study, probeset_studies_by_species_and_population) rqtl2 = Blueprint("rqtl2", __name__) @rqtl2.route("/", methods=["GET", "POST"]) @rqtl2.route("/select-species", methods=["GET", "POST"]) @require_login def select_species(): """Select the species.""" if request.method == "GET": return render_template("expression-data/rqtl2/index.html", species=with_db_connection(all_species)) species_id = request.form.get("species_id") species = with_db_connection( lambda conn: species_by_id(conn, species_id)) if bool(species): return redirect(url_for( "species.populations.expression-data.rqtl2.select_population", species_id=species_id)) flash("Invalid species or no species selected!", "alert-error error-rqtl2") return redirect(url_for("expression-data.rqtl2.select_species")) @rqtl2.route("/expression-data/rqtl2/select-population", methods=["GET", "POST"]) @require_login def select_population(species_id: int): """Select/Create the population to organise data under.""" with database_connection(app.config["SQL_URI"]) as conn: species = species_by_id(conn, species_id) if not bool(species): flash("Invalid species selected!", "alert-error error-rqtl2") return redirect(url_for("expression-data.rqtl2.select_species")) if request.method == "GET": return render_template( "expression-data/rqtl2/select-population.html", species=species, populations=populations_by_species(conn, species_id)) population = population_by_species_and_id( conn, species["SpeciesId"], request.form.get("inbredset_id")) if not bool(population): flash("Invalid Population!", "alert-error error-rqtl2") return redirect( url_for("expression-data.rqtl2.select_population", pgsrc="error"), code=307) return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle", species_id=species["SpeciesId"], population_id=population["InbredSetId"])) class __RequestError__(Exception): #pylint: disable=[invalid-name] """Internal class to avoid pylint's `too-many-return-statements` error.""" @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle"), methods=["GET", "POST"]) @require_login def upload_rqtl2_bundle(species_id: int, population_id: int): """Allow upload of R/qtl2 bundle.""" with database_connection(app.config["SQL_URI"]) as conn: species = species_by_id(conn, species_id) population = population_by_species_and_id( conn, species["SpeciesId"], population_id) if not bool(species): flash("Invalid species!", "alert-error error-rqtl2") return redirect(url_for("expression-data.rqtl2.select_species")) if not bool(population): flash("Invalid Population!", "alert-error error-rqtl2") return redirect( url_for("expression-data.rqtl2.select_population", pgsrc="error"), code=307) if request.method == "GET" or ( request.method == "POST" and bool(request.args.get("pgsrc"))): return render_template( "expression-data/rqtl2/upload-rqtl2-bundle-step-01.html", species=species, population=population) try: app.logger.debug("Files in the form: %s", request.files) the_file = save_file(request.files["rqtl2_bundle_file"], Path(app.config["UPLOAD_FOLDER"])) except AssertionError: app.logger.debug(traceback.format_exc()) flash("Please provide a valid R/qtl2 zip bundle.", "alert-error error-rqtl2") return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle", species_id=species_id, population_id=population_id)) if not is_zipfile(str(the_file)): app.logger.debug("The file is not a zip file.") raise __RequestError__("Invalid file! Expected a zip file.") jobid = trigger_rqtl2_bundle_qc( species_id, population_id, the_file, request.files["rqtl2_bundle_file"].filename)#type: ignore[arg-type] return redirect(url_for( "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)) def trigger_rqtl2_bundle_qc( species_id: int, population_id: int, rqtl2bundle: Path, originalfilename: str ) -> UUID: """Trigger QC on the R/qtl2 bundle.""" redisuri = app.config["REDIS_URL"] with Redis.from_url(redisuri, decode_responses=True) as rconn: jobid = uuid4() redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] jobs.launch_job( jobs.initialise_job( rconn, jobs.jobsnamespace(), str(jobid), [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle", app.config["SQL_URI"], app.config["REDIS_URL"], jobs.jobsnamespace(), str(jobid), str(species_id), str(population_id), "--redisexpiry", str(redis_ttl_seconds)], "rqtl2-bundle-qc-job", redis_ttl_seconds, {"job-metadata": json.dumps({ "speciesid": species_id, "populationid": population_id, "rqtl2-bundle-file": str(rqtl2bundle.absolute()), "original-filename": originalfilename})}), redisuri, f"{app.config['UPLOAD_FOLDER']}/job_errors") return jobid def chunk_name(uploadfilename: str, chunkno: int) -> str: """Generate chunk name from original filename and chunk number""" if uploadfilename == "": raise ValueError("Name cannot be empty!") if chunkno < 1: raise ValueError("Chunk number must be greater than zero") return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}" def chunks_directory(uniqueidentifier: str) -> Path: """Compute the directory where chunks are temporarily stored.""" if uniqueidentifier == "": raise ValueError("Unique identifier cannot be empty!") return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}") @rqtl2.route(("/populations//rqtl2/" "/rqtl2-bundle-chunked"), methods=["GET"]) @require_login def upload_rqtl2_bundle_chunked_get(# pylint: disable=["unused-argument"] species_id: int, population_id: int ): """ Extension to the `upload_rqtl2_bundle` endpoint above that provides a way for testing whether all the chunks have been uploaded and to assist with resuming a failed expression-data. """ fileid = request.args.get("resumableIdentifier", type=str) or "" filename = request.args.get("resumableFilename", type=str) or "" chunk = request.args.get("resumableChunkNumber", type=int) or 0 if not(fileid or filename or chunk): return jsonify({ "message": "At least one required query parameter is missing.", "error": "BadRequest", "statuscode": 400 }), 400 if Path(chunks_directory(fileid), chunk_name(filename, chunk)).exists(): return "OK" return jsonify({ "message": f"Chunk {chunk} was not found.", "error": "NotFound", "statuscode": 404 }), 404 def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: """Merge the chunks into a single file.""" with open(targetfile, "ab") as _target: for chunkfile in chunkpaths: with open(chunkfile, "rb") as _chunkdata: _target.write(_chunkdata.read()) chunkfile.unlink() return targetfile @rqtl2.route(("/population//rqtl2/upload/" "/rqtl2-bundle-chunked"), methods=["POST"]) @require_login def upload_rqtl2_bundle_chunked_post(species_id: int, population_id: int): """ Extension to the `upload_rqtl2_bundle` endpoint above that allows large files to be uploaded in chunks. This should hopefully speed up uploads, and if done right, even enable resumable uploads """ _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0 _chunk = request.form.get("resumableChunkNumber", default=1, type=int) _uploadfilename = request.form.get( "resumableFilename", default="", type=str) or "" _fileid = request.form.get( "resumableIdentifier", default="", type=str) or "" _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid) if _targetfile.exists(): return jsonify({ "message": ( "A file with a similar unique identifier has previously been " "uploaded and possibly is/has being/been processed."), "error": "BadRequest", "statuscode": 400 }), 400 try: # save chunk data chunks_directory(_fileid).mkdir(exist_ok=True, parents=True) request.files["file"].save(Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _chunk))) # Check whether upload is complete chunkpaths = tuple( Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) for _achunk in range(1, _totalchunks+1)) if all(_file.exists() for _file in chunkpaths): # merge_files and clean up chunks __merge_chunks__(_targetfile, chunkpaths) chunks_directory(_fileid).rmdir() jobid = trigger_rqtl2_bundle_qc( species_id, population_id, _targetfile, _uploadfilename) return url_for( "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid) except Exception as exc:# pylint: disable=[broad-except] msg = "Error processing uploaded file chunks." app.logger.error(msg, exc_info=True, stack_info=True) return jsonify({ "message": msg, "error": type(exc).__name__, "error-description": " ".join(str(arg) for arg in exc.args), "error-trace": traceback.format_exception(exc) }), 500 return "OK" @rqtl2.route("/upload/species/rqtl2-bundle/qc-status/", methods=["GET", "POST"]) @require_login def rqtl2_bundle_qc_status(jobid: UUID): """Check the status of the QC jobs.""" with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, database_connection(app.config["SQL_URI"]) as dbconn): try: thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) messagelistname = thejob.get("log-messagelist") logmessages = (rconn.lrange(messagelistname, 0, -1) if bool(messagelistname) else []) jobstatus = thejob["status"] if jobstatus == "error": return render_template( "expression-data/rqtl2/rqtl2-qc-job-error.html", job=thejob, errorsgeneric=json.loads( thejob.get("errors-generic", "[]")), errorsgeno=json.loads( thejob.get("errors-geno", "[]")), errorspheno=json.loads( thejob.get("errors-pheno", "[]")), errorsphenose=json.loads( thejob.get("errors-phenose", "[]")), errorsphenocovar=json.loads( thejob.get("errors-phenocovar", "[]")), messages=logmessages) if jobstatus == "success": jobmeta = json.loads(thejob["job-metadata"]) species = species_by_id(dbconn, jobmeta["speciesid"]) return render_template( "expression-data/rqtl2/rqtl2-qc-job-results.html", species=species, population=population_by_species_and_id( dbconn, species["SpeciesId"], jobmeta["populationid"]), rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]).name, rqtl2bundleorig=jobmeta["original-filename"]) def compute_percentage(thejob, filetype) -> Union[str, None]: if f"{filetype}-linecount" in thejob: return "100" if f"{filetype}-filesize" in thejob: percent = ((int(thejob.get(f"{filetype}-checked", 0)) / int(thejob.get(f"{filetype}-filesize", 1))) * 100) return f"{percent:.2f}" return None return render_template( "expression-data/rqtl2/rqtl2-qc-job-status.html", job=thejob, geno_percent=compute_percentage(thejob, "geno"), pheno_percent=compute_percentage(thejob, "pheno"), phenose_percent=compute_percentage(thejob, "phenose"), messages=logmessages) except jobs.JobNotFound: return render_template("expression-data/rqtl2/no-such-job.html", jobid=jobid) def redirect_on_error(flaskroute, **kwargs): """Utility to redirect on error""" return redirect(url_for(flaskroute, **kwargs, pgsrc="error"), code=(307 if request.method == "POST" else 302)) def check_species(conn: mdb.Connection, formargs: dict) -> Optional[ tuple[str, Response]]: """ Check whether the 'species_id' value is provided, and whether a corresponding species exists in the database. Maybe give the function a better name...""" speciespage = redirect_on_error("expression-data.rqtl2.select_species") if "species_id" not in formargs: return "You MUST provide the Species identifier.", speciespage if not bool(species_by_id(conn, formargs["species_id"])): return "No species with the provided identifier exists.", speciespage return None def check_population(conn: mdb.Connection, formargs: dict, species_id) -> Optional[tuple[str, Response]]: """ Check whether the 'population_id' value is provided, and whether a corresponding population exists in the database. Maybe give the function a better name...""" poppage = redirect_on_error( "expression-data.rqtl2.select_species", species_id=species_id) if "population_id" not in formargs: return "You MUST provide the Population identifier.", poppage if not bool(population_by_species_and_id( conn, species_id, formargs["population_id"])): return "No population with the provided identifier exists.", poppage return None def check_r_qtl2_bundle(formargs: dict, species_id, population_id) -> Optional[tuple[str, Response]]: """Check for the existence of the R/qtl2 bundle.""" fileuploadpage = redirect_on_error("expression-data.rqtl2.upload_rqtl2_bundle", species_id=species_id, population_id=population_id) if not "rqtl2_bundle_file" in formargs: return ( "You MUST provide a R/qtl2 zip bundle for expression-data.", fileuploadpage) if not Path(fullpath(formargs["rqtl2_bundle_file"])).exists(): return "No R/qtl2 bundle with the given name exists.", fileuploadpage return None def check_geno_dataset(conn: mdb.Connection, formargs: dict, species_id, population_id) -> Optional[tuple[str, Response]]: """Check for the Genotype dataset.""" genodsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id) if not bool(formargs.get("geno-dataset-id")): return ( "You MUST provide a valid Genotype dataset identifier", genodsetpg) with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s", (formargs["geno-dataset-id"],)) results = cursor.fetchall() if not bool(results): return ("No genotype dataset with the provided identifier exists.", genodsetpg) if len(results) > 1: return ( "Data corruption: More than one genotype dataset with the same " "identifier.", genodsetpg) return None def check_tissue( conn: mdb.Connection,formargs: dict) -> Optional[tuple[str, Response]]: """Check for tissue/organ/biological material.""" selectdsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info", species_id=formargs["species_id"], population_id=formargs["population_id"]) if not bool(formargs.get("tissueid", "").strip()): return ("No tissue/organ/biological material provided.", selectdsetpg) with conn.cursor(cursorclass=DictCursor) as cursor: cursor.execute("SELECT * FROM Tissue WHERE Id=%s", (formargs["tissueid"],)) results = cursor.fetchall() if not bool(results): return ("No tissue/organ with the provided identifier exists.", selectdsetpg) if len(results) > 1: return ( "Data corruption: More than one tissue/organ with the same " "identifier.", selectdsetpg) return None def check_probe_study(conn: mdb.Connection, formargs: dict, species_id, population_id) -> Optional[tuple[str, Response]]: """Check for the ProbeSet study.""" dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id) if not bool(formargs.get("probe-study-id")): return "No probeset study was selected!", dsetinfopg if not bool(probeset_study_by_id(conn, formargs["probe-study-id"])): return ("No probeset study with the provided identifier exists", dsetinfopg) return None def check_probe_dataset(conn: mdb.Connection, formargs: dict, species_id, population_id) -> Optional[tuple[str, Response]]: """Check for the ProbeSet dataset.""" dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id) if not bool(formargs.get("probe-dataset-id")): return "No probeset dataset was selected!", dsetinfopg if not bool(probeset_dataset_by_id(conn, formargs["probe-dataset-id"])): return ("No probeset dataset with the provided identifier exists", dsetinfopg) return None def with_errors(endpointthunk: Callable, *checkfns): """Run 'endpointthunk' with error checking.""" formargs = {**dict(request.args), **dict(request.form)} errors = tuple(item for item in (_fn(formargs=formargs) for _fn in checkfns) if item is not None) if len(errors) > 0: flash(errors[0][0], "alert-error error-rqtl2") return errors[0][1] return endpointthunk() @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/select-geno-dataset"), methods=["POST"]) @require_login def select_geno_dataset(species_id: int, population_id: int): """Select from existing geno datasets.""" with database_connection(app.config["SQL_URI"]) as conn: def __thunk__(): geno_dset = geno_datasets_by_species_and_population( conn, species_id, population_id) if not bool(geno_dset): flash("No genotype dataset was provided!", "alert-error error-rqtl2") return redirect(url_for("expression-data.rqtl2.select_geno_dataset", species_id=species_id, population_id=population_id, pgsrc="error"), code=307) flash("Genotype accepted", "alert-success error-rqtl2") return redirect(url_for("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id, pgsrc="expression-data.rqtl2.select_geno_dataset"), code=307) return with_errors(__thunk__, partial(check_species, conn=conn), partial(check_population, conn=conn, species_id=species_id), partial(check_r_qtl2_bundle, species_id=species_id, population_id=population_id), partial(check_geno_dataset, conn=conn, species_id=species_id, population_id=population_id)) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/select-tissue"), methods=["POST"]) @require_login def select_tissue(species_id: int, population_id: int): """Select from existing tissues.""" with database_connection(app.config["SQL_URI"]) as conn: def __thunk__(): if not bool(request.form.get("tissueid", "").strip()): flash("Invalid tissue selection!", "alert-error error-select-tissue error-rqtl2") return redirect(url_for("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id, pgsrc="expression-data.rqtl2.select_geno_dataset"), code=307) return with_errors(__thunk__, partial(check_species, conn=conn), partial(check_population, conn=conn, species_id=species_id), partial(check_r_qtl2_bundle, species_id=species_id, population_id=population_id), partial(check_geno_dataset, conn=conn, species_id=species_id, population_id=population_id)) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/create-tissue"), methods=["POST"]) @require_login def create_tissue(species_id: int, population_id: int): """Add new tissue, organ or biological material to the system.""" form = request.form datasetinfopage = redirect( url_for("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id, pgsrc="expression-data.rqtl2.select_geno_dataset"), code=307) with database_connection(app.config["SQL_URI"]) as conn: tissuename = form.get("tissuename", "").strip() tissueshortname = form.get("tissueshortname", "").strip() if not bool(tissuename): flash("Organ/Tissue name MUST be provided.", "alert-error error-create-tissue error-rqtl2") return datasetinfopage if not bool(tissueshortname): flash("Organ/Tissue short name MUST be provided.", "alert-error error-create-tissue error-rqtl2") return datasetinfopage try: tissue = create_new_tissue(conn, tissuename, tissueshortname) flash("Tissue created successfully!", "alert-success") return render_template( "expression-data/rqtl2/create-tissue-success.html", species=species_by_id(conn, species_id), population=population_by_species_and_id( conn, species_id, population_id), rqtl2_bundle_file=request.form["rqtl2_bundle_file"], geno_dataset=geno_dataset_by_id( conn, int(request.form["geno-dataset-id"])), tissue=tissue) except mdb.IntegrityError as _ierr: flash("Tissue/Organ with that short name already exists!", "alert-error error-create-tissue error-rqtl2") return datasetinfopage @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/select-probeset-study"), methods=["POST"]) @require_login def select_probeset_study(species_id: int, population_id: int): """Select or create a probeset study.""" with database_connection(app.config["SQL_URI"]) as conn: def __thunk__(): summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id), code=307) if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): flash("Invalid study selected!", "alert-error error-rqtl2") return summary_page return summary_page return with_errors(__thunk__, partial(check_species, conn=conn), partial(check_population, conn=conn, species_id=species_id), partial(check_r_qtl2_bundle, species_id=species_id, population_id=population_id), partial(check_geno_dataset, conn=conn, species_id=species_id, population_id=population_id), partial(check_tissue, conn=conn), partial(check_probe_study, conn=conn, species_id=species_id, population_id=population_id)) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/select-probeset-dataset"), methods=["POST"]) @require_login def select_probeset_dataset(species_id: int, population_id: int): """Select or create a probeset dataset.""" with database_connection(app.config["SQL_URI"]) as conn: def __thunk__(): summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id), code=307) if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): flash("Invalid study selected!", "alert-error error-rqtl2") return summary_page return summary_page return with_errors(__thunk__, partial(check_species, conn=conn), partial(check_population, conn=conn, species_id=species_id), partial(check_r_qtl2_bundle, species_id=species_id, population_id=population_id), partial(check_geno_dataset, conn=conn, species_id=species_id, population_id=population_id), partial(check_tissue, conn=conn), partial(check_probe_study, conn=conn, species_id=species_id, population_id=population_id), partial(check_probe_dataset, conn=conn, species_id=species_id, population_id=population_id)) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/create-probeset-study"), methods=["POST"]) @require_login def create_probeset_study(species_id: int, population_id: int): """Create a new probeset study.""" errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-study" with database_connection(app.config["SQL_URI"]) as conn: def __thunk__(): form = request.form dataset_info_page = redirect( url_for("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id), code=307) if not (bool(form.get("platformid")) and bool(platform_by_id(conn, int(form["platformid"])))): flash("Invalid platform selected.", errorclasses) return dataset_info_page if not (bool(form.get("tissueid")) and bool(tissue_by_id(conn, int(form["tissueid"])))): flash("Invalid tissue selected.", errorclasses) return dataset_info_page studyname = form["studyname"] try: study = probeset_create_study( conn, population_id, int(form["platformid"]), int(form["tissueid"]), studyname, form.get("studyfullname") or "", form.get("studyshortname") or "") except mdb.IntegrityError as _ierr: flash(f"ProbeSet study with name '{escape(studyname)}' already " "exists.", errorclasses) return dataset_info_page return render_template( "expression-data/rqtl2/create-probe-study-success.html", species=species_by_id(conn, species_id), population=population_by_species_and_id( conn, species_id, population_id), rqtl2_bundle_file=request.form["rqtl2_bundle_file"], geno_dataset=geno_dataset_by_id( conn, int(request.form["geno-dataset-id"])), study=study) return with_errors(__thunk__, partial(check_species, conn=conn), partial(check_population, conn=conn, species_id=species_id), partial(check_r_qtl2_bundle, species_id=species_id, population_id=population_id), partial(check_geno_dataset, conn=conn, species_id=species_id, population_id=population_id), partial(check_tissue, conn=conn)) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/create-probeset-dataset"), methods=["POST"]) @require_login def create_probeset_dataset(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements] """Create a new probeset dataset.""" errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-dataset" with database_connection(app.config["SQL_URI"]) as conn: def __thunk__():#pylint: disable=[too-many-return-statements] form = request.form summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id), code=307) if not bool(form.get("averageid")): flash("Averaging method not selected!", errorclasses) return summary_page if not bool(form.get("datasetname")): flash("Dataset name not provided!", errorclasses) return summary_page if not bool(form.get("datasetfullname")): flash("Dataset full name not provided!", errorclasses) return summary_page tissue = tissue_by_id(conn, form.get("tissueid", "").strip()) study = probeset_study_by_id(conn, int(form["probe-study-id"])) if not bool(study): flash("Invalid ProbeSet study provided!", errorclasses) return summary_page avgmethod = averaging_method_by_id(conn, int(form["averageid"])) if not bool(avgmethod): flash("Invalid averaging method provided!", errorclasses) return summary_page try: dset = probeset_create_dataset(conn, int(form["probe-study-id"]), int(form["averageid"]), form["datasetname"], form["datasetfullname"], form["datasetshortname"], form["datasetpublic"] == "on", form.get( "datasetdatascale", "log2")) except mdb.IntegrityError as _ierr: app.logger.debug("Possible integrity error: %s", traceback.format_exc()) flash(("IntegrityError: The data you provided has some errors: " f"{_ierr.args}"), errorclasses) return summary_page except Exception as _exc:# pylint: disable=[broad-except] app.logger.debug("Error creating ProbeSet dataset: %s", traceback.format_exc()) flash(("There was a problem creating your dataset. Please try " "again."), errorclasses) return summary_page return render_template( "expression-data/rqtl2/create-probe-dataset-success.html", species=species_by_id(conn, species_id), population=population_by_species_and_id( conn, species_id, population_id), rqtl2_bundle_file=request.form["rqtl2_bundle_file"], geno_dataset=geno_dataset_by_id( conn, int(request.form["geno-dataset-id"])), tissue=tissue, study=study, avgmethod=avgmethod, dataset=dset) return with_errors(__thunk__, partial(check_species, conn=conn), partial(check_population, conn=conn, species_id=species_id), partial(check_r_qtl2_bundle, species_id=species_id, population_id=population_id), partial(check_geno_dataset, conn=conn, species_id=species_id, population_id=population_id), partial(check_tissue, conn=conn), partial(check_probe_study, conn=conn, species_id=species_id, population_id=population_id)) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/dataset-info"), methods=["POST"]) @require_login def select_dataset_info(species_id: int, population_id: int): """ If `geno` files exist in the R/qtl2 bundle, prompt user to provide the dataset the genotypes belong to. """ form = request.form with database_connection(app.config["SQL_URI"]) as conn: def __thunk__(): species = species_by_id(conn, species_id) population = population_by_species_and_id( conn, species_id, population_id) thefile = fullpath(form["rqtl2_bundle_file"]) with ZipFile(str(thefile), "r") as zfile: cdata = r_qtl2.control_data(zfile) geno_dataset = geno_dataset_by_id( conn,form.get("geno-dataset-id", "").strip()) if "geno" in cdata and not bool(form.get("geno-dataset-id")): return render_template( "expression-data/rqtl2/select-geno-dataset.html", species=species, population=population, rqtl2_bundle_file=thefile.name, datasets=geno_datasets_by_species_and_population( conn, species_id, population_id)) tissue = tissue_by_id(conn, form.get("tissueid", "").strip()) if "pheno" in cdata and not bool(tissue): return render_template( "expression-data/rqtl2/select-tissue.html", species=species, population=population, rqtl2_bundle_file=thefile.name, geno_dataset=geno_dataset, studies=probeset_studies_by_species_and_population( conn, species_id, population_id), platforms=platforms_by_species(conn, species_id), tissues=all_tissues(conn)) probeset_study = probeset_study_by_id( conn, form.get("probe-study-id", "").strip()) if "pheno" in cdata and not bool(probeset_study): return render_template( "expression-data/rqtl2/select-probeset-study-id.html", species=species, population=population, rqtl2_bundle_file=thefile.name, geno_dataset=geno_dataset, studies=probeset_studies_by_species_and_population( conn, species_id, population_id), platforms=platforms_by_species(conn, species_id), tissue=tissue) probeset_study = probeset_study_by_id( conn, int(form["probe-study-id"])) probeset_dataset = probeset_dataset_by_id( conn, form.get("probe-dataset-id", "").strip()) if "pheno" in cdata and not bool(probeset_dataset): return render_template( "expression-data/rqtl2/select-probeset-dataset.html", species=species, population=population, rqtl2_bundle_file=thefile.name, geno_dataset=geno_dataset, probe_study=probeset_study, tissue=tissue, datasets=probeset_datasets_by_study( conn, int(form["probe-study-id"])), avgmethods=averaging_methods(conn)) return render_template("expression-data/rqtl2/summary-info.html", species=species, population=population, rqtl2_bundle_file=thefile.name, geno_dataset=geno_dataset, tissue=tissue, probe_study=probeset_study, probe_dataset=probeset_dataset) return with_errors(__thunk__, partial(check_species, conn=conn), partial(check_population, conn=conn, species_id=species_id), partial(check_r_qtl2_bundle, species_id=species_id, population_id=population_id)) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/confirm-bundle-details"), methods=["POST"]) @require_login def confirm_bundle_details(species_id: int, population_id: int): """Confirm the details and trigger R/qtl2 bundle processing...""" redisuri = app.config["REDIS_URL"] with (database_connection(app.config["SQL_URI"]) as conn, Redis.from_url(redisuri, decode_responses=True) as rconn): def __thunk__(): redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] jobid = str(uuid4()) _job = jobs.launch_job( jobs.initialise_job( rconn, jobs.jobsnamespace(), jobid, [ sys.executable, "-m", "scripts.process_rqtl2_bundle", app.config["SQL_URI"], app.config["REDIS_URL"], jobs.jobsnamespace(), jobid, "--redisexpiry", str(redis_ttl_seconds)], "R/qtl2 Bundle Upload", redis_ttl_seconds, { "bundle-metadata": json.dumps({ "speciesid": species_id, "populationid": population_id, "rqtl2-bundle-file": str(fullpath( request.form["rqtl2_bundle_file"])), "geno-dataset-id": request.form.get( "geno-dataset-id", ""), "probe-study-id": request.form.get( "probe-study-id", ""), "probe-dataset-id": request.form.get( "probe-dataset-id", ""), **({ "platformid": probeset_study_by_id( conn, int(request.form["probe-study-id"]))["ChipId"] } if bool(request.form.get("probe-study-id")) else {}) }) }), redisuri, f"{app.config['UPLOAD_FOLDER']}/job_errors") return redirect(url_for("expression-data.rqtl2.rqtl2_processing_status", jobid=jobid)) return with_errors(__thunk__, partial(check_species, conn=conn), partial(check_population, conn=conn, species_id=species_id), partial(check_r_qtl2_bundle, species_id=species_id, population_id=population_id), partial(check_geno_dataset, conn=conn, species_id=species_id, population_id=population_id), partial(check_probe_study, conn=conn, species_id=species_id, population_id=population_id), partial(check_probe_dataset, conn=conn, species_id=species_id, population_id=population_id)) @rqtl2.route("/status/") def rqtl2_processing_status(jobid: UUID): """Retrieve the status of the job processing the uploaded R/qtl2 bundle.""" with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: try: thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) messagelistname = thejob.get("log-messagelist") logmessages = (rconn.lrange(messagelistname, 0, -1) if bool(messagelistname) else []) if thejob["status"] == "error": return render_template( "expression-data/rqtl2/rqtl2-job-error.html", job=thejob, messages=logmessages) if thejob["status"] == "success": return render_template( "expression-data/rqtl2/rqtl2-job-results.html", job=thejob, messages=logmessages) return render_template( "expression-data/rqtl2/rqtl2-job-status.html", job=thejob, messages=logmessages) except jobs.JobNotFound as _exc: return render_template("expression-data/rqtl2/no-such-job.html", jobid=jobid)