diff options
Diffstat (limited to 'uploader/population')
-rw-r--r-- | uploader/population/models.py | 69 | ||||
-rw-r--r-- | uploader/population/rqtl2.py | 1075 | ||||
-rw-r--r-- | uploader/population/views.py | 72 |
3 files changed, 1153 insertions, 63 deletions
diff --git a/uploader/population/models.py b/uploader/population/models.py index 782bc9f..6dcd85e 100644 --- a/uploader/population/models.py +++ b/uploader/population/models.py @@ -44,33 +44,44 @@ def population_genetic_types(conn) -> tuple: return tuple(row["GeneticType"] for row in cursor.fetchall()) -def save_population(conn: mdb.Connection, population_details: dict) -> dict: +def save_population(cursor: mdb.cursors.Cursor, population_details: dict) -> dict: """Save the population details to the db.""" - with conn.cursor(cursorclass=DictCursor) as cursor: - #TODO: Handle FamilyOrder here - cursor.execute( - "INSERT INTO InbredSet(" - "InbredSetId, InbredSetName, Name, SpeciesId, FullName, " - "public, MappingMethodId, GeneticType, Family, MenuOrderId, " - "InbredSetCode, Description" - ") " - "VALUES (" - "%(InbredSetId)s, %(InbredSetName)s, %(Name)s, %(SpeciesId)s, " - "%(FullName)s, %(public)s, %(MappingMethodId)s, %(GeneticType)s, " - "%(Family)s, %(MenuOrderId)s, %(InbredSetCode)s, %(Description)s" - ")", - { - "MenuOrderId": 0, - "InbredSetId": 0, - "public": 2, - **population_details - }) - new_id = cursor.lastrowid - cursor.execute("UPDATE InbredSet SET InbredSetId=%s WHERE Id=%s", - (new_id, new_id)) - return { - **population_details, - "Id": new_id, - "InbredSetId": new_id, - "population_id": new_id - } + cursor.execute("SELECT DISTINCT(Family), FamilyOrder FROM InbredSet " + "WHERE Family IS NOT NULL AND Family != '' " + "AND FamilyOrder IS NOT NULL " + "ORDER BY FamilyOrder ASC") + _families = { + row["Family"]: int(row["FamilyOrder"]) + for row in cursor.fetchall() + } + params = { + "MenuOrderId": 0, + "InbredSetId": 0, + "public": 2, + **population_details, + "FamilyOrder": _families.get( + population_details["Family"], + max(_families.values())+1) + } + cursor.execute( + "INSERT INTO InbredSet(" + "InbredSetId, InbredSetName, Name, SpeciesId, FullName, " + "public, MappingMethodId, GeneticType, Family, FamilyOrder," + " MenuOrderId, InbredSetCode, Description" + ") " + "VALUES (" + "%(InbredSetId)s, %(InbredSetName)s, %(Name)s, %(SpeciesId)s, " + "%(FullName)s, %(public)s, %(MappingMethodId)s, %(GeneticType)s, " + "%(Family)s, %(FamilyOrder)s, %(MenuOrderId)s, %(InbredSetCode)s, " + "%(Description)s" + ")", + params) + new_id = cursor.lastrowid + cursor.execute("UPDATE InbredSet SET InbredSetId=%s WHERE Id=%s", + (new_id, new_id)) + return { + **params, + "Id": new_id, + "InbredSetId": new_id, + "population_id": new_id + } diff --git a/uploader/population/rqtl2.py b/uploader/population/rqtl2.py new file mode 100644 index 0000000..9968bd6 --- /dev/null +++ b/uploader/population/rqtl2.py @@ -0,0 +1,1075 @@ +"""Module to handle uploading of R/qtl2 bundles."""#pylint: disable=[too-many-lines] +import sys +import json +import traceback +from pathlib import Path +from uuid import UUID, uuid4 +from functools import partial +from zipfile import ZipFile, is_zipfile +from typing import Union, Callable, Optional + +import MySQLdb as mdb +from redis import Redis +from MySQLdb.cursors import DictCursor +from werkzeug.utils import secure_filename +from flask import ( + flash, + escape, + request, + jsonify, + url_for, + redirect, + Response, + Blueprint, + render_template, + current_app as app) + +from r_qtl import r_qtl2 + +from uploader import jobs +from uploader.files import save_file, fullpath +from uploader.species.models import all_species +from uploader.db_utils import with_db_connection, database_connection + +from uploader.authorisation import require_login +from uploader.platforms.models import platform_by_id, platforms_by_species +from uploader.db.averaging import averaging_methods, averaging_method_by_id +from uploader.db.tissues import all_tissues, tissue_by_id, create_new_tissue +from uploader.population.models import (populations_by_species, + population_by_species_and_id) +from uploader.species.models import species_by_id +from uploader.db.datasets import ( + geno_dataset_by_id, + geno_datasets_by_species_and_population, + + probeset_study_by_id, + probeset_create_study, + probeset_dataset_by_id, + probeset_create_dataset, + probeset_datasets_by_study, + probeset_studies_by_species_and_population) + +rqtl2 = Blueprint("rqtl2", __name__) + + +@rqtl2.route("/", methods=["GET", "POST"]) +@rqtl2.route("/select-species", methods=["GET", "POST"]) +@require_login +def select_species(): + """Select the species.""" + if request.method == "GET": + return render_template("expression-data/rqtl2/index.html", + species=with_db_connection(all_species)) + + species_id = request.form.get("species_id") + species = with_db_connection( + lambda conn: species_by_id(conn, species_id)) + if bool(species): + return redirect(url_for( + "species.populations.expression-data.rqtl2.select_population", + species_id=species_id)) + flash("Invalid species or no species selected!", "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_species")) + + +@rqtl2.route("<int:species_id>/expression-data/rqtl2/select-population", + methods=["GET", "POST"]) +@require_login +def select_population(species_id: int): + """Select/Create the population to organise data under.""" + with database_connection(app.config["SQL_URI"]) as conn: + species = species_by_id(conn, species_id) + if not bool(species): + flash("Invalid species selected!", "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_species")) + + if request.method == "GET": + return render_template( + "expression-data/rqtl2/select-population.html", + species=species, + populations=populations_by_species(conn, species_id)) + + population = population_by_species_and_id( + conn, species["SpeciesId"], request.form.get("inbredset_id")) + if not bool(population): + flash("Invalid Population!", "alert-error error-rqtl2") + return redirect( + url_for("expression-data.rqtl2.select_population", pgsrc="error"), + code=307) + + return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle", + species_id=species["SpeciesId"], + population_id=population["InbredSetId"])) + + +class __RequestError__(Exception): #pylint: disable=[invalid-name] + """Internal class to avoid pylint's `too-many-return-statements` error.""" + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle"), + methods=["GET", "POST"]) +@require_login +def upload_rqtl2_bundle(species_id: int, population_id: int): + """Allow upload of R/qtl2 bundle.""" + with database_connection(app.config["SQL_URI"]) as conn: + species = species_by_id(conn, species_id) + population = population_by_species_and_id( + conn, species["SpeciesId"], population_id) + if not bool(species): + flash("Invalid species!", "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_species")) + if not bool(population): + flash("Invalid Population!", "alert-error error-rqtl2") + return redirect( + url_for("expression-data.rqtl2.select_population", pgsrc="error"), + code=307) + if request.method == "GET" or ( + request.method == "POST" + and bool(request.args.get("pgsrc"))): + return render_template( + "expression-data/rqtl2/upload-rqtl2-bundle-step-01.html", + species=species, + population=population) + + try: + app.logger.debug("Files in the form: %s", request.files) + the_file = save_file(request.files["rqtl2_bundle_file"], + Path(app.config["UPLOAD_FOLDER"])) + except AssertionError: + app.logger.debug(traceback.format_exc()) + flash("Please provide a valid R/qtl2 zip bundle.", + "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle", + species_id=species_id, + population_id=population_id)) + + if not is_zipfile(str(the_file)): + app.logger.debug("The file is not a zip file.") + raise __RequestError__("Invalid file! Expected a zip file.") + + jobid = trigger_rqtl2_bundle_qc( + species_id, + population_id, + the_file, + request.files["rqtl2_bundle_file"].filename)#type: ignore[arg-type] + return redirect(url_for( + "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)) + + +def trigger_rqtl2_bundle_qc( + species_id: int, + population_id: int, + rqtl2bundle: Path, + originalfilename: str +) -> UUID: + """Trigger QC on the R/qtl2 bundle.""" + redisuri = app.config["REDIS_URL"] + with Redis.from_url(redisuri, decode_responses=True) as rconn: + jobid = uuid4() + redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] + jobs.launch_job( + jobs.initialise_job( + rconn, + jobs.jobsnamespace(), + str(jobid), + [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle", + app.config["SQL_URI"], app.config["REDIS_URL"], + jobs.jobsnamespace(), str(jobid), str(species_id), + str(population_id), "--redisexpiry", + str(redis_ttl_seconds)], + "rqtl2-bundle-qc-job", + redis_ttl_seconds, + {"job-metadata": json.dumps({ + "speciesid": species_id, + "populationid": population_id, + "rqtl2-bundle-file": str(rqtl2bundle.absolute()), + "original-filename": originalfilename})}), + redisuri, + f"{app.config['UPLOAD_FOLDER']}/job_errors") + return jobid + + +def chunk_name(uploadfilename: str, chunkno: int) -> str: + """Generate chunk name from original filename and chunk number""" + if uploadfilename == "": + raise ValueError("Name cannot be empty!") + if chunkno < 1: + raise ValueError("Chunk number must be greater than zero") + return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}" + + +def chunks_directory(uniqueidentifier: str) -> Path: + """Compute the directory where chunks are temporarily stored.""" + if uniqueidentifier == "": + raise ValueError("Unique identifier cannot be empty!") + return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}") + + +@rqtl2.route(("<int:species_id>/populations/<int:population_id>/rqtl2/" + "/rqtl2-bundle-chunked"), + methods=["GET"]) +@require_login +def upload_rqtl2_bundle_chunked_get(# pylint: disable=["unused-argument"] + species_id: int, + population_id: int +): + """ + Extension to the `upload_rqtl2_bundle` endpoint above that provides a way + for testing whether all the chunks have been uploaded and to assist with + resuming a failed expression-data. + """ + fileid = request.args.get("resumableIdentifier", type=str) or "" + filename = request.args.get("resumableFilename", type=str) or "" + chunk = request.args.get("resumableChunkNumber", type=int) or 0 + if not(fileid or filename or chunk): + return jsonify({ + "message": "At least one required query parameter is missing.", + "error": "BadRequest", + "statuscode": 400 + }), 400 + + if Path(chunks_directory(fileid), + chunk_name(filename, chunk)).exists(): + return "OK" + + return jsonify({ + "message": f"Chunk {chunk} was not found.", + "error": "NotFound", + "statuscode": 404 + }), 404 + + +def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: + """Merge the chunks into a single file.""" + with open(targetfile, "ab") as _target: + for chunkfile in chunkpaths: + with open(chunkfile, "rb") as _chunkdata: + _target.write(_chunkdata.read()) + + chunkfile.unlink() + return targetfile + + +@rqtl2.route(("<int:species_id>/population/<int:population_id>/rqtl2/upload/" + "/rqtl2-bundle-chunked"), + methods=["POST"]) +@require_login +def upload_rqtl2_bundle_chunked_post(species_id: int, population_id: int): + """ + Extension to the `upload_rqtl2_bundle` endpoint above that allows large + files to be uploaded in chunks. + + This should hopefully speed up uploads, and if done right, even enable + resumable uploads + """ + _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0 + _chunk = request.form.get("resumableChunkNumber", default=1, type=int) + _uploadfilename = request.form.get( + "resumableFilename", default="", type=str) or "" + _fileid = request.form.get( + "resumableIdentifier", default="", type=str) or "" + _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid) + + if _targetfile.exists(): + return jsonify({ + "message": ( + "A file with a similar unique identifier has previously been " + "uploaded and possibly is/has being/been processed."), + "error": "BadRequest", + "statuscode": 400 + }), 400 + + try: + # save chunk data + chunks_directory(_fileid).mkdir(exist_ok=True, parents=True) + request.files["file"].save(Path(chunks_directory(_fileid), + chunk_name(_uploadfilename, _chunk))) + + # Check whether upload is complete + chunkpaths = tuple( + Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) + for _achunk in range(1, _totalchunks+1)) + if all(_file.exists() for _file in chunkpaths): + # merge_files and clean up chunks + __merge_chunks__(_targetfile, chunkpaths) + chunks_directory(_fileid).rmdir() + jobid = trigger_rqtl2_bundle_qc( + species_id, population_id, _targetfile, _uploadfilename) + return url_for( + "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid) + except Exception as exc:# pylint: disable=[broad-except] + msg = "Error processing uploaded file chunks." + app.logger.error(msg, exc_info=True, stack_info=True) + return jsonify({ + "message": msg, + "error": type(exc).__name__, + "error-description": " ".join(str(arg) for arg in exc.args), + "error-trace": traceback.format_exception(exc) + }), 500 + + return "OK" + + +@rqtl2.route("/upload/species/rqtl2-bundle/qc-status/<uuid:jobid>", + methods=["GET", "POST"]) +@require_login +def rqtl2_bundle_qc_status(jobid: UUID): + """Check the status of the QC jobs.""" + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + database_connection(app.config["SQL_URI"]) as dbconn): + try: + thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) + messagelistname = thejob.get("log-messagelist") + logmessages = (rconn.lrange(messagelistname, 0, -1) + if bool(messagelistname) else []) + jobstatus = thejob["status"] + if jobstatus == "error": + return render_template( + "expression-data/rqtl2/rqtl2-qc-job-error.html", + job=thejob, + errorsgeneric=json.loads( + thejob.get("errors-generic", "[]")), + errorsgeno=json.loads( + thejob.get("errors-geno", "[]")), + errorspheno=json.loads( + thejob.get("errors-pheno", "[]")), + errorsphenose=json.loads( + thejob.get("errors-phenose", "[]")), + errorsphenocovar=json.loads( + thejob.get("errors-phenocovar", "[]")), + messages=logmessages) + if jobstatus == "success": + jobmeta = json.loads(thejob["job-metadata"]) + species = species_by_id(dbconn, jobmeta["speciesid"]) + return render_template( + "expression-data/rqtl2/rqtl2-qc-job-results.html", + species=species, + population=population_by_species_and_id( + dbconn, species["SpeciesId"], jobmeta["populationid"]), + rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]).name, + rqtl2bundleorig=jobmeta["original-filename"]) + + def compute_percentage(thejob, filetype) -> Union[str, None]: + if f"{filetype}-linecount" in thejob: + return "100" + if f"{filetype}-filesize" in thejob: + percent = ((int(thejob.get(f"{filetype}-checked", 0)) + / + int(thejob.get(f"{filetype}-filesize", 1))) + * 100) + return f"{percent:.2f}" + return None + + return render_template( + "expression-data/rqtl2/rqtl2-qc-job-status.html", + job=thejob, + geno_percent=compute_percentage(thejob, "geno"), + pheno_percent=compute_percentage(thejob, "pheno"), + phenose_percent=compute_percentage(thejob, "phenose"), + messages=logmessages) + except jobs.JobNotFound: + return render_template("expression-data/rqtl2/no-such-job.html", jobid=jobid) + + +def redirect_on_error(flaskroute, **kwargs): + """Utility to redirect on error""" + return redirect(url_for(flaskroute, **kwargs, pgsrc="error"), + code=(307 if request.method == "POST" else 302)) + + +def check_species(conn: mdb.Connection, formargs: dict) -> Optional[ + tuple[str, Response]]: + """ + Check whether the 'species_id' value is provided, and whether a + corresponding species exists in the database. + + Maybe give the function a better name...""" + speciespage = redirect_on_error("expression-data.rqtl2.select_species") + if "species_id" not in formargs: + return "You MUST provide the Species identifier.", speciespage + + if not bool(species_by_id(conn, formargs["species_id"])): + return "No species with the provided identifier exists.", speciespage + + return None + + +def check_population(conn: mdb.Connection, + formargs: dict, + species_id) -> Optional[tuple[str, Response]]: + """ + Check whether the 'population_id' value is provided, and whether a + corresponding population exists in the database. + + Maybe give the function a better name...""" + poppage = redirect_on_error( + "expression-data.rqtl2.select_species", species_id=species_id) + if "population_id" not in formargs: + return "You MUST provide the Population identifier.", poppage + + if not bool(population_by_species_and_id( + conn, species_id, formargs["population_id"])): + return "No population with the provided identifier exists.", poppage + + return None + + +def check_r_qtl2_bundle(formargs: dict, + species_id, + population_id) -> Optional[tuple[str, Response]]: + """Check for the existence of the R/qtl2 bundle.""" + fileuploadpage = redirect_on_error("expression-data.rqtl2.upload_rqtl2_bundle", + species_id=species_id, + population_id=population_id) + if not "rqtl2_bundle_file" in formargs: + return ( + "You MUST provide a R/qtl2 zip bundle for expression-data.", fileuploadpage) + + if not Path(fullpath(formargs["rqtl2_bundle_file"])).exists(): + return "No R/qtl2 bundle with the given name exists.", fileuploadpage + + return None + + +def check_geno_dataset(conn: mdb.Connection, + formargs: dict, + species_id, + population_id) -> Optional[tuple[str, Response]]: + """Check for the Genotype dataset.""" + genodsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id) + if not bool(formargs.get("geno-dataset-id")): + return ( + "You MUST provide a valid Genotype dataset identifier", genodsetpg) + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s", + (formargs["geno-dataset-id"],)) + results = cursor.fetchall() + if not bool(results): + return ("No genotype dataset with the provided identifier exists.", + genodsetpg) + if len(results) > 1: + return ( + "Data corruption: More than one genotype dataset with the same " + "identifier.", + genodsetpg) + + return None + +def check_tissue( + conn: mdb.Connection,formargs: dict) -> Optional[tuple[str, Response]]: + """Check for tissue/organ/biological material.""" + selectdsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info", + species_id=formargs["species_id"], + population_id=formargs["population_id"]) + if not bool(formargs.get("tissueid", "").strip()): + return ("No tissue/organ/biological material provided.", selectdsetpg) + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM Tissue WHERE Id=%s", + (formargs["tissueid"],)) + results = cursor.fetchall() + if not bool(results): + return ("No tissue/organ with the provided identifier exists.", + selectdsetpg) + + if len(results) > 1: + return ( + "Data corruption: More than one tissue/organ with the same " + "identifier.", + selectdsetpg) + + return None + + +def check_probe_study(conn: mdb.Connection, + formargs: dict, + species_id, + population_id) -> Optional[tuple[str, Response]]: + """Check for the ProbeSet study.""" + dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id) + if not bool(formargs.get("probe-study-id")): + return "No probeset study was selected!", dsetinfopg + + if not bool(probeset_study_by_id(conn, formargs["probe-study-id"])): + return ("No probeset study with the provided identifier exists", + dsetinfopg) + + return None + + +def check_probe_dataset(conn: mdb.Connection, + formargs: dict, + species_id, + population_id) -> Optional[tuple[str, Response]]: + """Check for the ProbeSet dataset.""" + dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id) + if not bool(formargs.get("probe-dataset-id")): + return "No probeset dataset was selected!", dsetinfopg + + if not bool(probeset_dataset_by_id(conn, formargs["probe-dataset-id"])): + return ("No probeset dataset with the provided identifier exists", + dsetinfopg) + + return None + + +def with_errors(endpointthunk: Callable, *checkfns): + """Run 'endpointthunk' with error checking.""" + formargs = {**dict(request.args), **dict(request.form)} + errors = tuple(item for item in (_fn(formargs=formargs) for _fn in checkfns) + if item is not None) + if len(errors) > 0: + flash(errors[0][0], "alert-error error-rqtl2") + return errors[0][1] + + return endpointthunk() + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/select-geno-dataset"), + methods=["POST"]) +@require_login +def select_geno_dataset(species_id: int, population_id: int): + """Select from existing geno datasets.""" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + geno_dset = geno_datasets_by_species_and_population( + conn, species_id, population_id) + if not bool(geno_dset): + flash("No genotype dataset was provided!", + "alert-error error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_geno_dataset", + species_id=species_id, + population_id=population_id, + pgsrc="error"), + code=307) + + flash("Genotype accepted", "alert-success error-rqtl2") + return redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id, + pgsrc="expression-data.rqtl2.select_geno_dataset"), + code=307) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/select-tissue"), + methods=["POST"]) +@require_login +def select_tissue(species_id: int, population_id: int): + """Select from existing tissues.""" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + if not bool(request.form.get("tissueid", "").strip()): + flash("Invalid tissue selection!", + "alert-error error-select-tissue error-rqtl2") + + return redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id, + pgsrc="expression-data.rqtl2.select_geno_dataset"), + code=307) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/create-tissue"), + methods=["POST"]) +@require_login +def create_tissue(species_id: int, population_id: int): + """Add new tissue, organ or biological material to the system.""" + form = request.form + datasetinfopage = redirect( + url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id, + pgsrc="expression-data.rqtl2.select_geno_dataset"), + code=307) + with database_connection(app.config["SQL_URI"]) as conn: + tissuename = form.get("tissuename", "").strip() + tissueshortname = form.get("tissueshortname", "").strip() + if not bool(tissuename): + flash("Organ/Tissue name MUST be provided.", + "alert-error error-create-tissue error-rqtl2") + return datasetinfopage + + if not bool(tissueshortname): + flash("Organ/Tissue short name MUST be provided.", + "alert-error error-create-tissue error-rqtl2") + return datasetinfopage + + try: + tissue = create_new_tissue(conn, tissuename, tissueshortname) + flash("Tissue created successfully!", "alert-success") + return render_template( + "expression-data/rqtl2/create-tissue-success.html", + species=species_by_id(conn, species_id), + population=population_by_species_and_id( + conn, species_id, population_id), + rqtl2_bundle_file=request.form["rqtl2_bundle_file"], + geno_dataset=geno_dataset_by_id( + conn, + int(request.form["geno-dataset-id"])), + tissue=tissue) + except mdb.IntegrityError as _ierr: + flash("Tissue/Organ with that short name already exists!", + "alert-error error-create-tissue error-rqtl2") + return datasetinfopage + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/select-probeset-study"), + methods=["POST"]) +@require_login +def select_probeset_study(species_id: int, population_id: int): + """Select or create a probeset study.""" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id), + code=307) + if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): + flash("Invalid study selected!", "alert-error error-rqtl2") + return summary_page + + return summary_page + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/select-probeset-dataset"), + methods=["POST"]) +@require_login +def select_probeset_dataset(species_id: int, population_id: int): + """Select or create a probeset dataset.""" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id), + code=307) + if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): + flash("Invalid study selected!", "alert-error error-rqtl2") + return summary_page + + return summary_page + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/create-probeset-study"), + methods=["POST"]) +@require_login +def create_probeset_study(species_id: int, population_id: int): + """Create a new probeset study.""" + errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-study" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + form = request.form + dataset_info_page = redirect( + url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id), + code=307) + + if not (bool(form.get("platformid")) and + bool(platform_by_id(conn, int(form["platformid"])))): + flash("Invalid platform selected.", errorclasses) + return dataset_info_page + + if not (bool(form.get("tissueid")) and + bool(tissue_by_id(conn, int(form["tissueid"])))): + flash("Invalid tissue selected.", errorclasses) + return dataset_info_page + + studyname = form["studyname"] + try: + study = probeset_create_study( + conn, population_id, int(form["platformid"]), int(form["tissueid"]), + studyname, form.get("studyfullname") or "", + form.get("studyshortname") or "") + except mdb.IntegrityError as _ierr: + flash(f"ProbeSet study with name '{escape(studyname)}' already " + "exists.", + errorclasses) + return dataset_info_page + return render_template( + "expression-data/rqtl2/create-probe-study-success.html", + species=species_by_id(conn, species_id), + population=population_by_species_and_id( + conn, species_id, population_id), + rqtl2_bundle_file=request.form["rqtl2_bundle_file"], + geno_dataset=geno_dataset_by_id( + conn, + int(request.form["geno-dataset-id"])), + study=study) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/create-probeset-dataset"), + methods=["POST"]) +@require_login +def create_probeset_dataset(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements] + """Create a new probeset dataset.""" + errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-dataset" + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__():#pylint: disable=[too-many-return-statements] + form = request.form + summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", + species_id=species_id, + population_id=population_id), + code=307) + if not bool(form.get("averageid")): + flash("Averaging method not selected!", errorclasses) + return summary_page + if not bool(form.get("datasetname")): + flash("Dataset name not provided!", errorclasses) + return summary_page + if not bool(form.get("datasetfullname")): + flash("Dataset full name not provided!", errorclasses) + return summary_page + + tissue = tissue_by_id(conn, form.get("tissueid", "").strip()) + + study = probeset_study_by_id(conn, int(form["probe-study-id"])) + if not bool(study): + flash("Invalid ProbeSet study provided!", errorclasses) + return summary_page + + avgmethod = averaging_method_by_id(conn, int(form["averageid"])) + if not bool(avgmethod): + flash("Invalid averaging method provided!", errorclasses) + return summary_page + + try: + dset = probeset_create_dataset(conn, + int(form["probe-study-id"]), + int(form["averageid"]), + form["datasetname"], + form["datasetfullname"], + form["datasetshortname"], + form["datasetpublic"] == "on", + form.get( + "datasetdatascale", "log2")) + except mdb.IntegrityError as _ierr: + app.logger.debug("Possible integrity error: %s", traceback.format_exc()) + flash(("IntegrityError: The data you provided has some errors: " + f"{_ierr.args}"), + errorclasses) + return summary_page + except Exception as _exc:# pylint: disable=[broad-except] + app.logger.debug("Error creating ProbeSet dataset: %s", + traceback.format_exc()) + flash(("There was a problem creating your dataset. Please try " + "again."), + errorclasses) + return summary_page + return render_template( + "expression-data/rqtl2/create-probe-dataset-success.html", + species=species_by_id(conn, species_id), + population=population_by_species_and_id( + conn, species_id, population_id), + rqtl2_bundle_file=request.form["rqtl2_bundle_file"], + geno_dataset=geno_dataset_by_id( + conn, + int(request.form["geno-dataset-id"])), + tissue=tissue, + study=study, + avgmethod=avgmethod, + dataset=dset) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/dataset-info"), + methods=["POST"]) +@require_login +def select_dataset_info(species_id: int, population_id: int): + """ + If `geno` files exist in the R/qtl2 bundle, prompt user to provide the + dataset the genotypes belong to. + """ + form = request.form + with database_connection(app.config["SQL_URI"]) as conn: + def __thunk__(): + species = species_by_id(conn, species_id) + population = population_by_species_and_id( + conn, species_id, population_id) + thefile = fullpath(form["rqtl2_bundle_file"]) + with ZipFile(str(thefile), "r") as zfile: + cdata = r_qtl2.control_data(zfile) + + geno_dataset = geno_dataset_by_id( + conn,form.get("geno-dataset-id", "").strip()) + if "geno" in cdata and not bool(form.get("geno-dataset-id")): + return render_template( + "expression-data/rqtl2/select-geno-dataset.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + datasets=geno_datasets_by_species_and_population( + conn, species_id, population_id)) + + tissue = tissue_by_id(conn, form.get("tissueid", "").strip()) + if "pheno" in cdata and not bool(tissue): + return render_template( + "expression-data/rqtl2/select-tissue.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + geno_dataset=geno_dataset, + studies=probeset_studies_by_species_and_population( + conn, species_id, population_id), + platforms=platforms_by_species(conn, species_id), + tissues=all_tissues(conn)) + + probeset_study = probeset_study_by_id( + conn, form.get("probe-study-id", "").strip()) + if "pheno" in cdata and not bool(probeset_study): + return render_template( + "expression-data/rqtl2/select-probeset-study-id.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + geno_dataset=geno_dataset, + studies=probeset_studies_by_species_and_population( + conn, species_id, population_id), + platforms=platforms_by_species(conn, species_id), + tissue=tissue) + probeset_study = probeset_study_by_id( + conn, int(form["probe-study-id"])) + + probeset_dataset = probeset_dataset_by_id( + conn, form.get("probe-dataset-id", "").strip()) + if "pheno" in cdata and not bool(probeset_dataset): + return render_template( + "expression-data/rqtl2/select-probeset-dataset.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + geno_dataset=geno_dataset, + probe_study=probeset_study, + tissue=tissue, + datasets=probeset_datasets_by_study( + conn, int(form["probe-study-id"])), + avgmethods=averaging_methods(conn)) + + return render_template("expression-data/rqtl2/summary-info.html", + species=species, + population=population, + rqtl2_bundle_file=thefile.name, + geno_dataset=geno_dataset, + tissue=tissue, + probe_study=probeset_study, + probe_dataset=probeset_dataset) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" + "/rqtl2-bundle/confirm-bundle-details"), + methods=["POST"]) +@require_login +def confirm_bundle_details(species_id: int, population_id: int): + """Confirm the details and trigger R/qtl2 bundle processing...""" + redisuri = app.config["REDIS_URL"] + with (database_connection(app.config["SQL_URI"]) as conn, + Redis.from_url(redisuri, decode_responses=True) as rconn): + def __thunk__(): + redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] + jobid = str(uuid4()) + _job = jobs.launch_job( + jobs.initialise_job( + rconn, + jobs.jobsnamespace(), + jobid, + [ + sys.executable, "-m", "scripts.process_rqtl2_bundle", + app.config["SQL_URI"], app.config["REDIS_URL"], + jobs.jobsnamespace(), jobid, "--redisexpiry", + str(redis_ttl_seconds)], + "R/qtl2 Bundle Upload", + redis_ttl_seconds, + { + "bundle-metadata": json.dumps({ + "speciesid": species_id, + "populationid": population_id, + "rqtl2-bundle-file": str(fullpath( + request.form["rqtl2_bundle_file"])), + "geno-dataset-id": request.form.get( + "geno-dataset-id", ""), + "probe-study-id": request.form.get( + "probe-study-id", ""), + "probe-dataset-id": request.form.get( + "probe-dataset-id", ""), + **({ + "platformid": probeset_study_by_id( + conn, + int(request.form["probe-study-id"]))["ChipId"] + } if bool(request.form.get("probe-study-id")) else {}) + }) + }), + redisuri, + f"{app.config['UPLOAD_FOLDER']}/job_errors") + + return redirect(url_for("expression-data.rqtl2.rqtl2_processing_status", + jobid=jobid)) + + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) + + +@rqtl2.route("/status/<uuid:jobid>") +def rqtl2_processing_status(jobid: UUID): + """Retrieve the status of the job processing the uploaded R/qtl2 bundle.""" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + try: + thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) + + messagelistname = thejob.get("log-messagelist") + logmessages = (rconn.lrange(messagelistname, 0, -1) + if bool(messagelistname) else []) + + if thejob["status"] == "error": + return render_template( + "expression-data/rqtl2/rqtl2-job-error.html", + job=thejob, + messages=logmessages) + if thejob["status"] == "success": + return render_template( + "expression-data/rqtl2/rqtl2-job-results.html", + job=thejob, + messages=logmessages) + + return render_template( + "expression-data/rqtl2/rqtl2-job-status.html", + job=thejob, + messages=logmessages) + except jobs.JobNotFound as _exc: + return render_template("expression-data/rqtl2/no-such-job.html", + jobid=jobid) diff --git a/uploader/population/views.py b/uploader/population/views.py index e870c85..36201ba 100644 --- a/uploader/population/views.py +++ b/uploader/population/views.py @@ -1,8 +1,8 @@ """Views dealing with populations/inbredsets""" -import re import json import base64 +from MySQLdb.cursors import DictCursor from flask import (flash, request, url_for, @@ -11,10 +11,16 @@ from flask import (flash, current_app as app) from uploader.samples.views import samplesbp +from uploader.oauth2.client import oauth2_post from uploader.ui import make_template_renderer from uploader.authorisation import require_login -from uploader.db_utils import database_connection from uploader.genotypes.views import genotypesbp +from uploader.db_utils import database_connection +from uploader.datautils import enumerate_sequence +from uploader.phenotypes.views import phenotypesbp +from uploader.expression_data.views import exprdatabp +from uploader.monadic_requests import make_either_error_handler +from uploader.input_validation import is_valid_representative_name from uploader.species.models import (all_species, species_by_id, order_species_by_family) @@ -29,6 +35,8 @@ __active_link__ = "populations" popbp = Blueprint("populations", __name__) popbp.register_blueprint(samplesbp, url_prefix="/") popbp.register_blueprint(genotypesbp, url_prefix="/") +popbp.register_blueprint(phenotypesbp, url_prefix="/") +popbp.register_blueprint(exprdatabp, url_prefix="/") render_template = make_template_renderer("populations") @@ -60,38 +68,17 @@ def list_species_populations(species_id: int): return render_template( "populations/list-populations.html", species=species, - populations=populations_by_species(conn, species_id), + populations=enumerate_sequence(populations_by_species( + conn, species_id)), activelink="list-populations") -def valid_population_name(population_name: str) -> bool: - """ - Check whether the given name is a valid population name. - - Parameters - ---------- - population_name: a string of characters. - - Checks For - ---------- - * The name MUST start with an alphabet [a-zA-Z] - * The name MUST end with an alphabet [a-zA-Z] or number [0-9] - * The name MUST be composed of alphabets [a-zA-Z], numbers [0-9], - underscores (_) and/or hyphens (-). - - Returns - ------- - Boolean indicating whether or not the name is valid. - """ - pattern = re.compile(r"^[a-zA-Z]+[a-zA-Z0-9_-]*[a-zA-Z0-9]$") - return bool(pattern.match(population_name)) - - @popbp.route("/<int:species_id>/populations/create", methods=["GET", "POST"]) @require_login def create_population(species_id: int): """Create a new population.""" - with database_connection(app.config["SQL_URI"]) as conn: + with (database_connection(app.config["SQL_URI"]) as conn, + conn.cursor(cursorclass=DictCursor) as cursor): species = species_by_id(conn, species_id) if request.method == "GET": @@ -102,7 +89,7 @@ def create_population(species_id: int): ).decode("utf8") error_values = json.loads(base64.b64decode( - error_values.encode("utf8")).decode("utf8")) + error_values.encode("utf8")).decode("utf8"))# type: ignore[union-attr] return render_template( "populations/create-population.html", species=species, @@ -121,7 +108,7 @@ def create_population(species_id: int): flash("You must select a species.", "alert-danger") return redirect(url_for("species.populations.index")) - errors = tuple() + errors: tuple[tuple[str, str], ...] = tuple() population_name = (request.form.get( "population_name") or "").strip() @@ -129,7 +116,7 @@ def create_population(species_id: int): errors = errors + (("population_name", "You must provide a name for the population!"),) - if not valid_population_name(population_name): + if not is_valid_representative_name(population_name): errors = errors + (( "population_name", "The population name can only contain letters, numbers, " @@ -151,7 +138,7 @@ def create_population(species_id: int): species_id=species["SpeciesId"], error_values=values)) - new_population = save_population(conn, { + new_population = save_population(cursor, { "SpeciesId": species["SpeciesId"], "Name": population_name, "InbredSetName": population_fullname, @@ -163,9 +150,26 @@ def create_population(species_id: int): "GeneticType": request.form.get("population_genetic_type") or None }) - return redirect(url_for("species.populations.view_population", - species_id=species["SpeciesId"], - population_id=new_population["InbredSetId"])) + def __flash_success__(_success): + flash("Successfully created resource.", "alert-success") + return redirect(url_for( + "species.populations.view_population", + species_id=species["SpeciesId"], + population_id=new_population["InbredSetId"])) + + app.logger.debug("We begin setting up the privileges hereā¦") + return oauth2_post( + "auth/resource/populations/create", + json={ + **dict(request.form), + "species_id": species_id, + "population_id": new_population["Id"], + "public": "on" + } + ).either( + make_either_error_handler( + "There was an error creating the population"), + __flash_success__) @popbp.route("/<int:species_id>/populations/<int:population_id>", |