diff options
Diffstat (limited to 'uploader')
-rw-r--r-- | uploader/population/rqtl2.py | 1075 |
1 files changed, 1075 insertions, 0 deletions
diff --git a/uploader/population/rqtl2.py b/uploader/population/rqtl2.py new file mode 100644 index 0000000..9968bd6 --- /dev/null +++ b/uploader/population/rqtl2.py @@ -0,0 +1,1075 @@ +"""Module to handle uploading of R/qtl2 bundles."""#pylint: disable=[too-many-lines] +import sys +import json +import traceback +from pathlib import Path +from uuid import UUID, uuid4 +from functools import partial +from zipfile import ZipFile, is_zipfile +from typing import Union, Callable, Optional + +import MySQLdb as mdb +from redis import Redis +from MySQLdb.cursors import DictCursor +from werkzeug.utils import secure_filename +from flask import ( + flash, + escape, + request, + jsonify, + url_for, + redirect, + Response, + Blueprint, + render_template, + current_app as app) + +from r_qtl import r_qtl2 + +from uploader import jobs +from uploader.files import save_file, fullpath +from uploader.species.models import all_species +from uploader.db_utils import with_db_connection, database_connection + +from uploader.authorisation import require_login +from uploader.platforms.models import platform_by_id, platforms_by_species +from uploader.db.averaging import averaging_methods, averaging_method_by_id +from uploader.db.tissues import all_tissues, tissue_by_id, create_new_tissue +from uploader.population.models import (populations_by_species, + population_by_species_and_id) +from uploader.species.models import species_by_id +from uploader.db.datasets import ( + geno_dataset_by_id, + geno_datasets_by_species_and_population, + + probeset_study_by_id, + probeset_create_study, + probeset_dataset_by_id, + probeset_create_dataset, + probeset_datasets_by_study, + probeset_studies_by_species_and_population) + +rqtl2 = Blueprint("rqtl2", __name__) + + +@rqtl2.route("/", methods=["GET", "POST"]) +@rqtl2.route("/select-species", methods=["GET", "POST"]) +@require_login +def select_species(): + """Select the species.""" + if request.method == "GET": + return render_template("expression-data/rqtl2/index.html", + species=with_db_connection(all_species)) + + species_id = request.form.get("species_id") + species = with_db_connection( + lambda conn: species_by_id(conn, species_id)) + if bool(species): + return redirect(url_for( + "species.populations.expression-data.rqtl2.select_population", + species_id=species_id)) + flash("Invalid species or no species selected!", "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_species")) + + +@rqtl2.route("<int:species_id>/expression-data/rqtl2/select-population", + methods=["GET", "POST"]) +@require_login +def select_population(species_id: int): + """Select/Create the population to organise data under.""" + with database_connection(app.config["SQL_URI"]) as conn: + species = species_by_id(conn, species_id) + if not bool(species): + flash("Invalid species selected!", "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_species")) + + if request.method == "GET": + return render_template( + "expression-data/rqtl2/select-population.html", + species=species, + populations=populations_by_species(conn, species_id)) + + population = population_by_species_and_id( + conn, species["SpeciesId"], request.form.get("inbredset_id")) + if not bool(population): + flash("Invalid Population!", "alert-error error-rqtl2") + return redirect( + url_for("expression-data.rqtl2.select_population", pgsrc="error"), + code=307) + + return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle", + species_id=species["SpeciesId"], + population_id=population["InbredSetId"])) + + +class __RequestError__(Exception): #pylint: disable=[invalid-name] + """Internal class to avoid pylint's `too-many-return-statements` error.""" + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle"), + methods=["GET", "POST"]) +@require_login +def upload_rqtl2_bundle(species_id: int, population_id: int): + """Allow upload of R/qtl2 bundle.""" + with database_connection(app.config["SQL_URI"]) as conn: + species = species_by_id(conn, species_id) + population = population_by_species_and_id( + conn, species["SpeciesId"], population_id) + if not bool(species): + flash("Invalid species!", "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_species")) + if not bool(population): + flash("Invalid Population!", "alert-error error-rqtl2") + return redirect( + url_for("expression-data.rqtl2.select_population", pgsrc="error"), + code=307) + if request.method == "GET" or ( + request.method == "POST" + and bool(request.args.get("pgsrc"))): + return render_template( + "expression-data/rqtl2/upload-rqtl2-bundle-step-01.html", + species=species, + population=population) + + try: + app.logger.debug("Files in the form: %s", request.files) + the_file = save_file(request.files["rqtl2_bundle_file"], + Path(app.config["UPLOAD_FOLDER"])) + except AssertionError: + app.logger.debug(traceback.format_exc()) + flash("Please provide a valid R/qtl2 zip bundle.", + "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle", + species_id=species_id, + population_id=population_id)) + + if not is_zipfile(str(the_file)): + app.logger.debug("The file is not a zip file.") + raise __RequestError__("Invalid file! Expected a zip file.") + + jobid = trigger_rqtl2_bundle_qc( + species_id, + population_id, + the_file, + request.files["rqtl2_bundle_file"].filename)#type: ignore[arg-type] + return redirect(url_for( + "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)) + + +def trigger_rqtl2_bundle_qc( + species_id: int, + population_id: int, + rqtl2bundle: Path, + originalfilename: str +) -> UUID: + """Trigger QC on the R/qtl2 bundle.""" + redisuri = app.config["REDIS_URL"] + with Redis.from_url(redisuri, decode_responses=True) as rconn: + jobid = uuid4() + redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] + jobs.launch_job( + jobs.initialise_job( + rconn, + jobs.jobsnamespace(), + str(jobid), + [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle", + app.config["SQL_URI"], app.config["REDIS_URL"], + jobs.jobsnamespace(), str(jobid), str(species_id), + str(population_id), "--redisexpiry", + str(redis_ttl_seconds)], + "rqtl2-bundle-qc-job", + redis_ttl_seconds, + {"job-metadata": json.dumps({ + "speciesid": species_id, + "populationid": population_id, + "rqtl2-bundle-file": str(rqtl2bundle.absolute()), + "original-filename": originalfilename})}), + redisuri, + f"{app.config['UPLOAD_FOLDER']}/job_errors") + return jobid + + +def chunk_name(uploadfilename: str, chunkno: int) -> str: + """Generate chunk name from original filename and chunk number""" + if uploadfilename == "": + raise ValueError("Name cannot be empty!") + if chunkno < 1: + raise ValueError("Chunk number must be greater than zero") + return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}" + + +def chunks_directory(uniqueidentifier: str) -> Path: + """Compute the directory where chunks are temporarily stored.""" + if uniqueidentifier == "": + raise ValueError("Unique identifier cannot be empty!") + return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}") + + +@rqtl2.route(("<int:species_id>/populations/<int:population_id>/rqtl2/" + "/rqtl2-bundle-chunked"), + methods=["GET"]) +@require_login +def upload_rqtl2_bundle_chunked_get(# pylint: disable=["unused-argument"] + species_id: int, + population_id: int +): + """ + Extension to the `upload_rqtl2_bundle` endpoint above that provides a way + for testing whether all the chunks have been uploaded and to assist with + resuming a failed expression-data. + """ + fileid = request.args.get("resumableIdentifier", type=str) or "" + filename = request.args.get("resumableFilename", type=str) or "" + chunk = request.args.get("resumableChunkNumber", type=int) or 0 + if not(fileid or filename or chunk): + return jsonify({ + "message": "At least one required query parameter is missing.", + "error": "BadRequest", + "statuscode": 400 + }), 400 + + if Path(chunks_directory(fileid), + chunk_name(filename, chunk)).exists(): + return "OK" + + return jsonify({ + "message": f"Chunk {chunk} was not found.", + "error": "NotFound", + "statuscode": 404 + }), 404 + + +def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: + """Merge the chunks into a single file.""" + with open(targetfile, "ab") as _target: + for chunkfile in chunkpaths: + with open(chunkfile, "rb") as _chunkdata: + _target.write(_chunkdata.read()) + + chunkfile.unlink() + return targetfile + + +@rqtl2.route(("<int:species_id>/population/<int:population_id>/rqtl2/upload/" + "/rqtl2-bundle-chunked"), + methods=["POST"]) +@require_login +def upload_rqtl2_bundle_chunked_post(species_id: int, population_id: int): + """ + Extension to the `upload_rqtl2_bundle` endpoint above that allows large + files to be uploaded in chunks. + + This should hopefully speed up uploads, and if done right, even enable + resumable uploads + """ + _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0 + _chunk = request.form.get("resumableChunkNumber", default=1, type=int) + _uploadfilename = request.form.get( + "resumableFilename", default="", type=str) or "" + _fileid = request.form.get( + "resumableIdentifier", default="", type=str) or "" + _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid) + + if _targetfile.exists(): + return jsonify({ + "message": ( + "A file with a similar unique identifier has previously been " + "uploaded and possibly is/has being/been processed."), + "error": "BadRequest", + "statuscode": 400 + }), 400 + + try: + # save chunk data + chunks_directory(_fileid).mkdir(exist_ok=True, parents=True) + request.files["file"].save(Path(chunks_directory(_fileid), + chunk_name(_uploadfilename, _chunk))) + + # Check whether upload is complete + chunkpaths = tuple( + Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) + for _achunk in range(1, _totalchunks+1)) + if all(_file.exists() for _file in chunkpaths): + # merge_files and clean up chunks + __merge_chunks__(_targetfile, chunkpaths) + chunks_directory(_fileid).rmdir() + jobid = trigger_rqtl2_bundle_qc( + species_id, population_id, _targetfile, _uploadfilename) + return url_for( + "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid) + except Exception as exc:# pylint: disable=[broad-except] + msg = "Error processing uploaded file chunks." + app.logger.error(msg, exc_info=True, stack_info=True) + return jsonify({ + "message": msg, + "error": type(exc).__name__, + "error-description": " ".join(str(arg) for arg in exc.args), + "error-trace": traceback.format_exception(exc) + }), 500 + + return "OK" + + +@rqtl2.route("/upload/species/rqtl2-bundle/qc-status/<uuid:jobid>", + methods=["GET", "POST"]) +@require_login +def rqtl2_bundle_qc_status(jobid: UUID): + """Check the status of the QC jobs.""" + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + database_connection(app.config["SQL_URI"]) as dbconn): + try: + thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) + messagelistname = thejob.get("log-messagelist") + logmessages = (rconn.lrange(messagelistname, 0, -1) + if bool(messagelistname) else []) + jobstatus = thejob["status"] + if jobstatus == "error": + return render_template( + "expression-data/rqtl2/rqtl2-qc-job-error.html", + job=thejob, + errorsgeneric=json.loads( + thejob.get("errors-generic", "[]")), + errorsgeno=json.loads( + thejob.get("errors-geno", "[]")), + errorspheno=json.loads( + thejob.get("errors-pheno", "[]")), + errorsphenose=json.loads( + thejob.get("errors-phenose", "[]")), + errorsphenocovar=json.loads( + thejob.get("errors-phenocovar", "[]")), + messages=logmessages) + if jobstatus == "success": + jobmeta = json.loads(thejob["job-metadata"]) + species = species_by_id(dbconn, jobmeta["speciesid"]) + return render_template( + "expression-data/rqtl2/rqtl2-qc-job-results.html", + species=species, + population=population_by_species_and_id( + dbconn, species["SpeciesId"], jobmeta["populationid"]), + rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]).name, + rqtl2bundleorig=jobmeta["original-filename"]) + + def compute_percentage(thejob, filetype) -> Union[str, None]: + if f"{filetype}-linecount" in thejob: + return "100" + if f"{filetype}-filesize" in thejob: + percent = ((int(thejob.get(f"{filetype}-checked", 0)) + / + int(thejob.get(f"{filetype}-filesize", 1))) + * 100) + return f"{percent:.2f}" + return None + + return render_template( + "expression-data/rqtl2/rqtl2-qc-job-status.html", + job=thejob, + geno_percent=compute_percentage(thejob, "geno"), + pheno_percent=compute_percentage(thejob, "pheno"), + phenose_percent=compute_percentage(thejob, "phenose"), + messages=logmessages) + except jobs.JobNotFound: + return render_template("expression-data/rqtl2/no-such-job.html", jobid=jobid) + + +def redirect_on_error(flaskroute, **kwargs): + """Utility to redirect on error""" + return redirect(url_for(flaskroute, **kwargs, pgsrc="error"), + code=(307 if request.method == "POST" else 302)) + + +def check_species(conn: mdb.Connection, formargs: dict) -> Optional[ + tuple[str, Response]]: + """ + Check whether the 'species_id' value is provided, and whether a + corresponding species exists in the database. + + Maybe give the function a better name...""" + speciespage = redirect_on_error("expression-data.rqtl2.select_species") + if "species_id" not in formargs: + return "You MUST provide the Species identifier.", speciespage + + if not bool(species_by_id(conn, formargs["species_id"])): + return "No species with the provided identifier exists.", speciespage + + return None + + +def check_population(conn: mdb.Connection, + formargs: dict, + species_id) -> Optional[tuple[str, Response]]: + """ + Check whether the 'population_id' value is provided, and whether a + corresponding population exists in the database. + + Maybe give the function a better name...""" + poppage = redirect_on_error( + "expression-data.rqtl2.select_species", species_id=species_id) + if "population_id" not in formargs: + return "You MUST provide the Population identifier.", poppage + + if not bool(population_by_species_and_id( + conn, species_id, formargs["population_id"])): + return "No population with the provided identifier exists.", poppage + + return None + + +def check_r_qtl2_bundle(formargs: dict, + species_id, + population_id) -> Optional[tuple[str, Response]]: + """Check for the existence of the R/qtl2 bundle.""" + fileuploadpage = redirect_on_error("expression-data.rqtl2.upload_rqtl2_bundle", + species_id=species_id, + population_id=population_id) + if not "rqtl2_bundle_file" in formargs: + return ( + "You MUST provide a R/qtl2 zip bundle for expression-data.", fileuploadpage) + + if not Path(fullpath(formargs["rqtl2_bundle_file"])).exists(): + return "No R/qtl2 bundle with the given name exists.", fileuploadpage + + return None + + +def check_geno_dataset(conn: mdb.Connection, + formargs: dict, + species_id, + population_id) -> Optional[tuple[str, Response]]: + """Check for the Genotype dataset.""" + genodsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id) + if not bool(formargs.get("geno-dataset-id")): + return ( + "You MUST provide a valid Genotype dataset identifier", genodsetpg) + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s", + (formargs["geno-dataset-id"],)) + results = cursor.fetchall() + if not bool(results): + return ("No genotype dataset with the provided identifier exists.", + genodsetpg) + if len(results) > 1: + return ( + "Data corruption: More than one genotype dataset with the same " + "identifier.", + genodsetpg) + + return None + +def check_tissue( + conn: mdb.Connection,formargs: dict) -> Optional[tuple[str, Response]]: + """Check for tissue/organ/biological material.""" + selectdsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info", + species_id=formargs["species_id"], + population_id=formargs["population_id"]) + if not bool(formargs.get("tissueid", "").strip()): + return ("No tissue/organ/biological material provided.", selectdsetpg) + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM Tissue WHERE Id=%s", + (formargs["tissueid"],)) + results = cursor.fetchall() + if not bool(results): + return ("No tissue/organ with the provided identifier exists.", + selectdsetpg) + + if len(results) > 1: + return ( + "Data corruption: More than one tissue/organ with the same " + "identifier.", + selectdsetpg) + + return None + + +def check_probe_study(conn: mdb.Connection, + formargs: dict, + species_id, + population_id) -> Optional[tuple[str, Response]]: + """Check for the ProbeSet study.""" + dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id) + if not bool(formargs.get("probe-study-id")): + return "No probeset study was selected!", dsetinfopg + + if not bool(probeset_study_by_id(conn, formargs["probe-study-id"])): + return ("No probeset study with the provided identifier exists", + dsetinfopg) + + return None + + +def check_probe_dataset(conn: mdb.Connection, + formargs: dict, + species_id, + population_id) -> Optional[tuple[str, Response]]: + """Check for the ProbeSet dataset.""" + dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id) + if not bool(formargs.get("probe-dataset-id")): + return "No probeset dataset was selected!", dsetinfopg + + if not bool(probeset_dataset_by_id(conn, formargs["probe-dataset-id"])): + return ("No probeset dataset with the provided identifier exists", + dsetinfopg) + + return None + + +def with_errors(endpointthunk: Callable, *checkfns): + """Run 'endpointthunk' with error checking.""" + formargs = {**dict(request.args), **dict(request.form)} + errors = tuple(item for item in (_fn(formargs=formargs) for _fn in checkfns) + if item is not None) + if len(errors) > 0: + flash(errors[0][0], "alert-error error-rqtl2") + return errors[0][1] + + return endpointthunk() + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/select-geno-dataset"), + methods=["POST"]) +@require_login +def select_geno_dataset(species_id: int, population_id: int): + """Select from existing geno datasets.""" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + geno_dset = geno_datasets_by_species_and_population( + conn, species_id, population_id) + if not bool(geno_dset): + flash("No genotype dataset was provided!", + "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_geno_dataset", + species_id=species_id, + population_id=population_id, + pgsrc="error"), + code=307) + + flash("Genotype accepted", "alert-success error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id, + pgsrc="expression-data.rqtl2.select_geno_dataset"), + code=307) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/select-tissue"), + methods=["POST"]) +@require_login +def select_tissue(species_id: int, population_id: int): + """Select from existing tissues.""" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + if not bool(request.form.get("tissueid", "").strip()): + flash("Invalid tissue selection!", + "alert-error error-select-tissue error-rqtl2") + + return redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id, + pgsrc="expression-data.rqtl2.select_geno_dataset"), + code=307) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/create-tissue"), + methods=["POST"]) +@require_login +def create_tissue(species_id: int, population_id: int): + """Add new tissue, organ or biological material to the system.""" + form = request.form + datasetinfopage = redirect( + url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id, + pgsrc="expression-data.rqtl2.select_geno_dataset"), + code=307) + with database_connection(app.config["SQL_URI"]) as conn: + tissuename = form.get("tissuename", "").strip() + tissueshortname = form.get("tissueshortname", "").strip() + if not bool(tissuename): + flash("Organ/Tissue name MUST be provided.", + "alert-error error-create-tissue error-rqtl2") + return datasetinfopage + + if not bool(tissueshortname): + flash("Organ/Tissue short name MUST be provided.", + "alert-error error-create-tissue error-rqtl2") + return datasetinfopage + + try: + tissue = create_new_tissue(conn, tissuename, tissueshortname) + flash("Tissue created successfully!", "alert-success") + return render_template( + "expression-data/rqtl2/create-tissue-success.html", + species=species_by_id(conn, species_id), + population=population_by_species_and_id( + conn, species_id, population_id), + rqtl2_bundle_file=request.form["rqtl2_bundle_file"], + geno_dataset=geno_dataset_by_id( + conn, + int(request.form["geno-dataset-id"])), + tissue=tissue) + except mdb.IntegrityError as _ierr: + flash("Tissue/Organ with that short name already exists!", + "alert-error error-create-tissue error-rqtl2") + return datasetinfopage + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/select-probeset-study"), + methods=["POST"]) +@require_login +def select_probeset_study(species_id: int, population_id: int): + """Select or create a probeset study.""" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id), + code=307) + if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): + flash("Invalid study selected!", "alert-error error-rqtl2") + return summary_page + + return summary_page + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/select-probeset-dataset"), + methods=["POST"]) +@require_login +def select_probeset_dataset(species_id: int, population_id: int): + """Select or create a probeset dataset.""" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id), + code=307) + if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): + flash("Invalid study selected!", "alert-error error-rqtl2") + return summary_page + + return summary_page + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/create-probeset-study"), + methods=["POST"]) +@require_login +def create_probeset_study(species_id: int, population_id: int): + """Create a new probeset study.""" + errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-study" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + form = request.form + dataset_info_page = redirect( + url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id), + code=307) + + if not (bool(form.get("platformid")) and + bool(platform_by_id(conn, int(form["platformid"])))): + flash("Invalid platform selected.", errorclasses) + return dataset_info_page + + if not (bool(form.get("tissueid")) and + bool(tissue_by_id(conn, int(form["tissueid"])))): + flash("Invalid tissue selected.", errorclasses) + return dataset_info_page + + studyname = form["studyname"] + try: + study = probeset_create_study( + conn, population_id, int(form["platformid"]), int(form["tissueid"]), + studyname, form.get("studyfullname") or "", + form.get("studyshortname") or "") + except mdb.IntegrityError as _ierr: + flash(f"ProbeSet study with name '{escape(studyname)}' already " + "exists.", + errorclasses) + return dataset_info_page + return render_template( + "expression-data/rqtl2/create-probe-study-success.html", + species=species_by_id(conn, species_id), + population=population_by_species_and_id( + conn, species_id, population_id), + rqtl2_bundle_file=request.form["rqtl2_bundle_file"], + geno_dataset=geno_dataset_by_id( + conn, + int(request.form["geno-dataset-id"])), + study=study) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/create-probeset-dataset"), + methods=["POST"]) +@require_login +def create_probeset_dataset(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements] + """Create a new probeset dataset.""" + errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-dataset" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__():#pylint: disable=[too-many-return-statements] + form = request.form + summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id), + code=307) + if not bool(form.get("averageid")): + flash("Averaging method not selected!", errorclasses) + return summary_page + if not bool(form.get("datasetname")): + flash("Dataset name not provided!", errorclasses) + return summary_page + if not bool(form.get("datasetfullname")): + flash("Dataset full name not provided!", errorclasses) + return summary_page + + tissue = tissue_by_id(conn, form.get("tissueid", "").strip()) + + study = probeset_study_by_id(conn, int(form["probe-study-id"])) + if not bool(study): + flash("Invalid ProbeSet study provided!", errorclasses) + return summary_page + + avgmethod = averaging_method_by_id(conn, int(form["averageid"])) + if not bool(avgmethod): + flash("Invalid averaging method provided!", errorclasses) + return summary_page + + try: + dset = probeset_create_dataset(conn, + int(form["probe-study-id"]), + int(form["averageid"]), + form["datasetname"], + form["datasetfullname"], + form["datasetshortname"], + form["datasetpublic"] == "on", + form.get( + "datasetdatascale", "log2")) + except mdb.IntegrityError as _ierr: + app.logger.debug("Possible integrity error: %s", traceback.format_exc()) + flash(("IntegrityError: The data you provided has some errors: " + f"{_ierr.args}"), + errorclasses) + return summary_page + except Exception as _exc:# pylint: disable=[broad-except] + app.logger.debug("Error creating ProbeSet dataset: %s", + traceback.format_exc()) + flash(("There was a problem creating your dataset. Please try " + "again."), + errorclasses) + return summary_page + return render_template( + "expression-data/rqtl2/create-probe-dataset-success.html", + species=species_by_id(conn, species_id), + population=population_by_species_and_id( + conn, species_id, population_id), + rqtl2_bundle_file=request.form["rqtl2_bundle_file"], + geno_dataset=geno_dataset_by_id( + conn, + int(request.form["geno-dataset-id"])), + tissue=tissue, + study=study, + avgmethod=avgmethod, + dataset=dset) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/dataset-info"), + methods=["POST"]) +@require_login +def select_dataset_info(species_id: int, population_id: int): + """ + If `geno` files exist in the R/qtl2 bundle, prompt user to provide the + dataset the genotypes belong to. + """ + form = request.form + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + species = species_by_id(conn, species_id) + population = population_by_species_and_id( + conn, species_id, population_id) + thefile = fullpath(form["rqtl2_bundle_file"]) + with ZipFile(str(thefile), "r") as zfile: + cdata = r_qtl2.control_data(zfile) + + geno_dataset = geno_dataset_by_id( + conn,form.get("geno-dataset-id", "").strip()) + if "geno" in cdata and not bool(form.get("geno-dataset-id")): + return render_template( + "expression-data/rqtl2/select-geno-dataset.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + datasets=geno_datasets_by_species_and_population( + conn, species_id, population_id)) + + tissue = tissue_by_id(conn, form.get("tissueid", "").strip()) + if "pheno" in cdata and not bool(tissue): + return render_template( + "expression-data/rqtl2/select-tissue.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + geno_dataset=geno_dataset, + studies=probeset_studies_by_species_and_population( + conn, species_id, population_id), + platforms=platforms_by_species(conn, species_id), + tissues=all_tissues(conn)) + + probeset_study = probeset_study_by_id( + conn, form.get("probe-study-id", "").strip()) + if "pheno" in cdata and not bool(probeset_study): + return render_template( + "expression-data/rqtl2/select-probeset-study-id.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + geno_dataset=geno_dataset, + studies=probeset_studies_by_species_and_population( + conn, species_id, population_id), + platforms=platforms_by_species(conn, species_id), + tissue=tissue) + probeset_study = probeset_study_by_id( + conn, int(form["probe-study-id"])) + + probeset_dataset = probeset_dataset_by_id( + conn, form.get("probe-dataset-id", "").strip()) + if "pheno" in cdata and not bool(probeset_dataset): + return render_template( + "expression-data/rqtl2/select-probeset-dataset.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + geno_dataset=geno_dataset, + probe_study=probeset_study, + tissue=tissue, + datasets=probeset_datasets_by_study( + conn, int(form["probe-study-id"])), + avgmethods=averaging_methods(conn)) + + return render_template("expression-data/rqtl2/summary-info.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + geno_dataset=geno_dataset, + tissue=tissue, + probe_study=probeset_study, + probe_dataset=probeset_dataset) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/confirm-bundle-details"), + methods=["POST"]) +@require_login +def confirm_bundle_details(species_id: int, population_id: int): + """Confirm the details and trigger R/qtl2 bundle processing...""" + redisuri = app.config["REDIS_URL"] + with (database_connection(app.config["SQL_URI"]) as conn, + Redis.from_url(redisuri, decode_responses=True) as rconn): + def __thunk__(): + redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] + jobid = str(uuid4()) + _job = jobs.launch_job( + jobs.initialise_job( + rconn, + jobs.jobsnamespace(), + jobid, + [ + sys.executable, "-m", "scripts.process_rqtl2_bundle", + app.config["SQL_URI"], app.config["REDIS_URL"], + jobs.jobsnamespace(), jobid, "--redisexpiry", + str(redis_ttl_seconds)], + "R/qtl2 Bundle Upload", + redis_ttl_seconds, + { + "bundle-metadata": json.dumps({ + "speciesid": species_id, + "populationid": population_id, + "rqtl2-bundle-file": str(fullpath( + request.form["rqtl2_bundle_file"])), + "geno-dataset-id": request.form.get( + "geno-dataset-id", ""), + "probe-study-id": request.form.get( + "probe-study-id", ""), + "probe-dataset-id": request.form.get( + "probe-dataset-id", ""), + **({ + "platformid": probeset_study_by_id( + conn, + int(request.form["probe-study-id"]))["ChipId"] + } if bool(request.form.get("probe-study-id")) else {}) + }) + }), + redisuri, + f"{app.config['UPLOAD_FOLDER']}/job_errors") + + return redirect(url_for("expression-data.rqtl2.rqtl2_processing_status", + jobid=jobid)) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route("/status/<uuid:jobid>") +def rqtl2_processing_status(jobid: UUID): + """Retrieve the status of the job processing the uploaded R/qtl2 bundle.""" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + try: + thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) + + messagelistname = thejob.get("log-messagelist") + logmessages = (rconn.lrange(messagelistname, 0, -1) + if bool(messagelistname) else []) + + if thejob["status"] == "error": + return render_template( + "expression-data/rqtl2/rqtl2-job-error.html", + job=thejob, + messages=logmessages) + if thejob["status"] == "success": + return render_template( + "expression-data/rqtl2/rqtl2-job-results.html", + job=thejob, + messages=logmessages) + + return render_template( + "expression-data/rqtl2/rqtl2-job-status.html", + job=thejob, + messages=logmessages) + except jobs.JobNotFound as _exc: + return render_template("expression-data/rqtl2/no-such-job.html", + jobid=jobid) |