"""Module to handle uploading of R/qtl2 bundles.""" import sys import json from typing import Union from pathlib import Path from datetime import date from uuid import UUID, uuid4 from zipfile import ZipFile, is_zipfile from redis import Redis from MySQLdb.cursors import DictCursor from flask import ( flash, request, url_for, redirect, Blueprint, render_template, current_app as app) from r_qtl import r_qtl2 from qc_app import jobs from qc_app.files import save_file, fullpath from qc_app.dbinsert import species as all_species from qc_app.db_utils import with_db_connection, database_connection from qc_app.db.tissues import all_tissues, tissue_by_id from qc_app.db.platforms import platform_by_id, platforms_by_species from qc_app.db.averaging import averaging_methods, averaging_method_by_id from qc_app.db import ( species_by_id, save_population, populations_by_species, population_by_species_and_id,) from qc_app.db.datasets import ( geno_dataset_by_id, geno_datasets_by_species_and_population, probeset_study_by_id, probeset_create_study, probeset_dataset_by_id, probeset_create_dataset, probeset_datasets_by_study, probeset_studies_by_species_and_population) rqtl2 = Blueprint("rqtl2", __name__) @rqtl2.route("/", methods=["GET", "POST"]) @rqtl2.route("/select-species", methods=["POST"]) def select_species(): """Select the species.""" if request.method == "GET": return render_template("rqtl2/index.html", species=with_db_connection(all_species)) species_id = request.form.get("species_id") species = with_db_connection( lambda conn: species_by_id(conn, species_id)) if bool(species): return redirect(url_for( "upload.rqtl2.select_population", species_id=species_id)) flash("Invalid species or no species selected!", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.select_species")) @rqtl2.route("/upload/species//select-population", methods=["GET", "POST"]) def select_population(species_id: int): """Select/Create the population to organise data under.""" with database_connection(app.config["SQL_URI"]) as conn: species = species_by_id(conn, species_id) if not bool(species): flash("Invalid species selected!", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.select_species")) if request.method == "GET": return render_template( "rqtl2/select-population.html", species=species, populations=populations_by_species(conn, species_id)) population = population_by_species_and_id( conn, species["SpeciesId"], request.form.get("inbredset_id")) if not bool(population): flash("Invalid Population!", "alert-error error-rqtl2") return redirect( url_for("upload.rqtl2.select_population", pgsrc="error"), code=307) return redirect(url_for("upload.rqtl2.upload_rqtl2_bundle", species_id=species["SpeciesId"], population_id=population["InbredSetId"])) @rqtl2.route("/upload/species//create-population", methods=["POST"]) def create_population(species_id: int): """Create a new population for the given species.""" population_page = redirect(url_for("upload.rqtl2.select_population", species_id=species_id)) with database_connection(app.config["SQL_URI"]) as conn: species = species_by_id(conn, species_id) population_name = request.form.get("inbredset_name", "").strip() population_fullname = request.form.get("inbredset_fullname", "").strip() if not bool(species): flash("Invalid species!", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.select_species")) if not bool(population_name): flash("Invalid Population Name!", "alert-error error-rqtl2") return population_page if not bool(population_fullname): flash("Invalid Population Full Name!", "alert-error error-rqtl2") return population_page new_population = save_population(conn, { "SpeciesId": species["SpeciesId"], "Name": population_name, "InbredSetName": population_fullname, "FullName": population_fullname, "Family": request.form.get("inbredset_family") or None, "Description": request.form.get("description") or None }) flash("Population created successfully.", "alert-success") return redirect( url_for("upload.rqtl2.upload_rqtl2_bundle", species_id=species_id, population_id=new_population["population_id"], pgsrc="create-population"), code=307) class __RequestError__(Exception): #pylint: disable=[invalid-name] """Internal class to avoid pylint's `too-many-return-statements` error.""" @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle"), methods=["GET", "POST"]) def upload_rqtl2_bundle(species_id: int, population_id: int): """Allow upload of R/qtl2 bundle.""" with database_connection(app.config["SQL_URI"]) as conn: species = species_by_id(conn, species_id) population = population_by_species_and_id( conn, species["SpeciesId"], population_id) if not bool(species): flash("Invalid species!", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.select_species")) if not bool(population): flash("Invalid Population!", "alert-error error-rqtl2") return redirect( url_for("upload.rqtl2.select_population", pgsrc="error"), code=307) if request.method == "GET" or ( request.method == "POST" and bool(request.args.get("pgsrc"))): return render_template("rqtl2/upload-rqtl2-bundle-step-01.html", species=species, population=population) try: the_file = save_file(request.files["rqtl2_bundle_file"], Path(app.config["UPLOAD_FOLDER"])) except AssertionError: flash("Please provide a valid R/qtl2 zip bundle.", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.upload_rqtl2_bundle", species_id=species_id, population_id=population_id)) if not is_zipfile(str(the_file)): raise __RequestError__("Invalid file! Expected a zip file.") redisuri = app.config["REDIS_URL"] with Redis.from_url(redisuri, decode_responses=True) as rconn: jobid = str(uuid4()) redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] jobs.launch_job( jobs.initialise_job( rconn, jobs.jobsnamespace(), jobid, [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle", app.config["SQL_URI"], app.config["REDIS_URL"], jobs.jobsnamespace(), jobid, "--redisexpiry", str(redis_ttl_seconds)], "rqtl2-bundle-qc-job", redis_ttl_seconds, {"job-metadata": json.dumps({ "speciesid": species_id, "populationid": population_id, "rqtl2-bundle-file": str(the_file.absolute()), "original-filename": request.files["rqtl2_bundle_file"].filename})}), redisuri, f"{app.config['UPLOAD_FOLDER']}/job_errors") return redirect(url_for( "upload.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)) @rqtl2.route("/upload/species/rqtl2-bundle/qc-status/", methods=["GET", "POST"]) def rqtl2_bundle_qc_status(jobid: UUID): """Check the status of the QC jobs.""" with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, database_connection(app.config["SQL_URI"]) as dbconn): try: thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) messagelistname = thejob.get("log-messagelist") logmessages = (rconn.lrange(messagelistname, 0, -1) if bool(messagelistname) else []) jobstatus = thejob["status"] if jobstatus == "error": return render_template("rqtl2/rqtl2-qc-job-error.html", job=thejob, errorsgeneric=json.loads( thejob.get("errors-generic", "[]")), errorsgeno=json.loads( thejob.get("errors-geno", "[]")), errorspheno=json.loads( thejob.get("errors-pheno", "[]")), errorsphenose=json.loads( thejob.get("errors-phenose", "[]")), errorsphenocovar=json.loads( thejob.get("errors-phenocovar", "[]")), messages=logmessages) if jobstatus == "success": jobmeta = json.loads(thejob["job-metadata"]) species = species_by_id(dbconn, jobmeta["speciesid"]) return render_template( "rqtl2/rqtl2-qc-job-results.html", species=species, population=population_by_species_and_id( dbconn, species["SpeciesId"], jobmeta["populationid"]), rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]), rqtl2bundleorig=jobmeta["original-filename"]) def compute_percentage(thejob, filetype) -> Union[str, None]: if f"{filetype}-linecount" in thejob: return "100" if f"{filetype}-filesize" in thejob: percent = ((int(thejob.get(f"{filetype}-checked", 0)) / int(thejob.get(f"{filetype}-filesize", 1))) * 100) return f"{percent:.2f}" return None return render_template( "rqtl2/rqtl2-qc-job-status.html", job=thejob, geno_percent=compute_percentage(thejob, "geno"), pheno_percent=compute_percentage(thejob, "pheno"), phenose_percent=compute_percentage(thejob, "phenose"), messages=tuple()) except jobs.JobNotFound: return render_template("rqtl2/no-such-job.html", jobid=jobid) def check_errors(conn, *args, **kwargs):#pylint: disable=[too-many-return-statements] """Check for select errors in the forms and return a page to redirect to.""" species_id = kwargs.get("species_id") or request.form.get("species_id") population_id = (kwargs.get("population_id") or request.form.get("population_id")) species = species_by_id(conn, species_id) population = population_by_species_and_id(conn, species_id, population_id) if "species" in args and not bool(species): flash("Invalid species!", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.select_species")) if "population" in args and not bool(population): flash("Invalid Population!", "alert-error error-rqtl2") return redirect( url_for("upload.rqtl2.select_population", pgsrc="error"), code=307) if ("rqtl2_bundle_file" in args and not bool(request.form.get("rqtl2_bundle_file"))): flash("There is no file to process.", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.upload_rqtl2_bundle", species_id=species_id, population_id=population_id, pgsrc="error"), code=307) if ("geno-dataset" in args and not bool(request.form.get("geno-dataset-id"))): flash("No genotype dataset was provided!", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.select_geno_dataset", species_id=species_id, population_id=population_id, pgsrc="error"), code=307) summary_page = redirect(url_for("upload.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id, pgsrc="error"), code=307) if ("probe-study-id" in args and not bool(request.form.get("probe-study-id"))): flash("No probeset study was selected!", "alert-error error-rqtl2") return summary_page if ("probe-dataset-id" in args and not bool(request.form.get("probe-dataset-id"))): flash("No probeset dataset was selected!", "alert-error error-rqtl2") return summary_page return None @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/select-geno-dataset"), methods=["POST"]) def select_geno_dataset(species_id: int, population_id: int): """Select from existing geno datasets.""" with database_connection(app.config["SQL_URI"]) as conn: error = check_errors( conn, "species", "population", "rqtl2_bundle_file", "geno-dataset") if bool(error): return error geno_dset = geno_datasets_by_species_and_population( conn, species_id, population_id) if not bool(geno_dset): flash("No genotype dataset was provided!", "alert-error error-rqtl2") return redirect(url_for("upload.rqtl2.select_geno_dataset", species_id=species_id, population_id=population_id, pgsrc="error"), code=307) flash("Genotype accepted", "alert-success error-rqtl2") return redirect(url_for("upload.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id, pgsrc="upload.rqtl2.select_geno_dataset"), code=307) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/create-geno-dataset"), methods=["POST"]) def create_geno_dataset(species_id: int, population_id: int): """Create a new geno dataset.""" with database_connection(app.config["SQL_URI"]) as conn: error = check_errors(conn, "species", "population", "rqtl2_bundle_file") if bool(error): return error sgeno_page = redirect(url_for("upload.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id, pgsrc="error"), code=307) errorclasses = "alert-error error-rqtl2 error-rqtl2-create-geno-dataset" if not bool(request.form.get("dataset-name")): flash("You must provide the dataset name", errorclasses) return sgeno_page if not bool(request.form.get("dataset-fullname")): flash("You must provide the dataset full name", errorclasses) return sgeno_page public = 2 if request.form.get("dataset-public") == "on" else 0 with conn.cursor(cursorclass=DictCursor) as cursor: datasetname = request.form["dataset-name"] new_dataset = { "name": datasetname, "fname": request.form.get("dataset-fullname"), "sname": request.form.get("dataset-shortname") or datasetname, "today": date.today().isoformat(), "pub": public, "isetid": population_id } cursor.execute( "INSERT INTO GenoFreeze(" "Name, FullName, ShortName, CreateTime, public, InbredSetId" ") " "VALUES(" "%(name)s, %(fname)s, %(sname)s, %(today)s, %(pub)s, %(isetid)s" ")", new_dataset) flash("Created dataset successfully.", "alert-success") return render_template( "rqtl2/create-geno-dataset-success.html", species=species_by_id(conn, species_id), population=population_by_species_and_id( conn, species_id, population_id), rqtl2_bundle_file=request.form["rqtl2_bundle_file"], geno_dataset={**new_dataset, "id": cursor.lastrowid}) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/select-probeset-study"), methods=["POST"]) def select_probeset_study(species_id: int, population_id: int): """Select or create a probeset study.""" with database_connection(app.config["SQL_URI"]) as conn: error = check_errors( conn, "species", "population", "rqtl2_bundle_file", "geno-dataset", "probe-study-id") if bool(error): return error summary_page = redirect(url_for("upload.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id), code=307) if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): flash("Invalid study selected!", "alert-error error-rqtl2") return summary_page return summary_page @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/select-probeset-dataset"), methods=["POST"]) def select_probeset_dataset(species_id: int, population_id: int): """Select or create a probeset dataset.""" with database_connection(app.config["SQL_URI"]) as conn: error = check_errors( conn, "species", "population", "rqtl2_bundle_file", "geno-dataset", "probe-study-id", "probe-dataset-id") if bool(error): return error summary_page = redirect(url_for("upload.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id), code=307) if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): flash("Invalid study selected!", "alert-error error-rqtl2") return summary_page return summary_page @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/create-probeset-study"), methods=["POST"]) def create_probeset_study(species_id: int, population_id: int): """Create a new probeset study.""" with database_connection(app.config["SQL_URI"]) as conn: error = check_errors( conn, "species", "population", "rqtl2_bundle_file", "geno-dataset") if bool(error): return error form = request.form select_study_page = redirect( url_for("upload.rqtl2.select_probeset_study", species_id=species_id, population_id=population_id), code=307) if not (bool(form.get("platformid")) and bool(platform_by_id(conn, int(form["platformid"])))): flash("Invalid platform selected.", "alert-error error-rqtl2") return select_study_page if not (bool(form.get("tissueid")) and bool(tissue_by_id(conn, int(form["tissueid"])))): flash("Invalid tissue selected.", "alert-error error-rqtl2") return select_study_page study = probeset_create_study( conn, population_id, int(form["platformid"]), int(form["tissueid"]), form["studyname"], form.get("studyfullname") or "", form.get("studyshortname") or "") return render_template( "rqtl2/create-probe-study-success.html", species=species_by_id(conn, species_id), population=population_by_species_and_id( conn, species_id, population_id), rqtl2_bundle_file=request.form["rqtl2_bundle_file"], geno_dataset=geno_dataset_by_id( conn, int(request.form["geno-dataset-id"])), study=study) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/create-probeset-dataset"), methods=["POST"]) def create_probeset_dataset(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements] """Create a new probeset dataset.""" with database_connection(app.config["SQL_URI"]) as conn: error = check_errors( conn, "species", "population", "rqtl2_bundle_file", "geno-dataset", "probe-study-id") if bool(error): return error form = request.form summary_page = redirect(url_for("upload.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id), code=307) if not bool(form.get("averageid")): flash("Averaging method not selected!", "alert-error error-rqtl2") return summary_page if not bool(form.get("datasetname")): flash("Dataset name not provided!", "alert-error error-rqtl2") return summary_page if not bool(form.get("datasetfullname")): flash("Dataset full name not provided!", "alert-error error-rqtl2") return summary_page study = probeset_study_by_id(conn, int(form["probe-study-id"])) if not bool(study): flash("Invalid ProbeSet study provided!", "alert-error error-rqtl2") return summary_page avgmethod = averaging_method_by_id(conn, int(form["averageid"])) if not bool(avgmethod): flash("Invalid averaging method provided!", "alert-error error-rqtl2") return summary_page dset = probeset_create_dataset(conn, int(form["probe-study-id"]), int(form["averageid"]), form["datasetname"], form["datasetfullname"], form["datasetshortname"], form["datasetpublic"] == "on", form.get("datasetdatascale", "log2")) return render_template( "rqtl2/create-probe-dataset-success.html", species=species_by_id(conn, species_id), population=population_by_species_and_id( conn, species_id, population_id), rqtl2_bundle_file=request.form["rqtl2_bundle_file"], geno_dataset=geno_dataset_by_id( conn, int(request.form["geno-dataset-id"])), study=study, avgmethod=avgmethod, dataset=dset) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/dataset-info"), methods=["POST"]) def select_dataset_info(species_id: int, population_id: int): """ If `geno` files exist in the R/qtl2 bundle, prompt user to provide the dataset the genotypes belong to. """ form = request.form with database_connection(app.config["SQL_URI"]) as conn: error_page = check_errors(conn, "species", "population", "rqtl2_bundle_file") if bool(error_page): return error_page species = species_by_id(conn, species_id) population = population_by_species_and_id( conn, species_id, population_id) thefile = fullpath(form["rqtl2_bundle_file"]) with ZipFile(str(thefile), "r") as zfile: cdata = r_qtl2.control_data(zfile) if "geno" in cdata and not bool(form.get("geno-dataset-id")): return render_template( "rqtl2/select-geno-dataset.html", species=species, population=population, rqtl2_bundle_file=thefile.name, datasets=geno_datasets_by_species_and_population( conn, species_id, population_id)) geno_dataset = geno_dataset_by_id(conn, int(form["geno-dataset-id"])) if "pheno" in cdata and not bool(form.get("probe-study-id")): return render_template( "rqtl2/select-probeset-study-id.html", species=species, population=population, rqtl2_bundle_file=thefile.name, geno_dataset=geno_dataset, studies=probeset_studies_by_species_and_population( conn, species_id, population_id), platforms=platforms_by_species(conn, species_id), tissues=all_tissues(conn)) probeset_study = probeset_study_by_id( conn, int(form["probe-study-id"])) if "pheno" in cdata and not bool(form.get("probe-dataset-id")): return render_template( "rqtl2/select-probeset-dataset.html", species=species, population=population, rqtl2_bundle_file=thefile.name, geno_dataset=geno_dataset, probe_study=probeset_study, datasets=probeset_datasets_by_study( conn, int(form["probe-study-id"])), avgmethods=averaging_methods(conn)) probeset_study = probeset_study_by_id( conn, int(form["probe-study-id"])) probeset_dataset = probeset_dataset_by_id( conn, int(form["probe-dataset-id"])) return render_template("rqtl2/summary-info.html", species=species, population=population, rqtl2_bundle_file=thefile.name, geno_dataset=geno_dataset, probe_study=probeset_study, probe_dataset=probeset_dataset) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/confirm-bundle-details"), methods=["POST"]) def confirm_bundle_details(species_id: int, population_id: int): """Confirm the details and trigger R/qtl2 bundle processing...""" redisuri = app.config["REDIS_URL"] with (database_connection(app.config["SQL_URI"]) as conn, Redis.from_url(redisuri, decode_responses=True) as rconn): error = check_errors( conn, "species", "population", "rqtl2_bundle_file", "geno-dataset", "probe-study-id", "probe-dataset-id") if bool(error): return error redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] jobid = str(uuid4()) _job = jobs.launch_job( jobs.initialise_job( rconn, jobs.jobsnamespace(), jobid, [ sys.executable, "-m", "scripts.process_rqtl2_bundle", app.config["SQL_URI"], app.config["REDIS_URL"], jobs.jobsnamespace(), jobid, "--redisexpiry", str(redis_ttl_seconds)], "R/qtl2 Bundle Upload", redis_ttl_seconds, { "bundle-metadata": json.dumps({ "speciesid": species_id, "populationid": population_id, "rqtl2-bundle-file": str(fullpath( request.form["rqtl2_bundle_file"])), "geno-dataset-id": request.form.get( "geno-dataset-id", ""), "probe-study-id": request.form.get( "probe-study-id", ""), "probe-dataset-id": request.form.get( "probe-dataset-id", ""), **({ "platformid": probeset_study_by_id( conn, int(request.form["probe-study-id"]))["ChipId"] } if bool(request.form.get("probe-study-id")) else {}) }) }), redisuri, f"{app.config['UPLOAD_FOLDER']}/job_errors") return redirect(url_for("upload.rqtl2.rqtl2_processing_status", jobid=jobid)) @rqtl2.route("/status/") def rqtl2_processing_status(jobid: UUID): """Retrieve the status of the job processing the uploaded R/qtl2 bundle.""" with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: try: thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) messagelistname = thejob.get("log-messagelist") logmessages = (rconn.lrange(messagelistname, 0, -1) if bool(messagelistname) else []) if thejob["status"] == "error": return render_template( "rqtl2/rqtl2-job-error.html", job=thejob, messages=logmessages) if thejob["status"] == "success": return render_template("rqtl2/rqtl2-job-results.html", job=thejob, messages=logmessages) return render_template( "rqtl2/rqtl2-job-status.html", job=thejob, messages=logmessages) except jobs.JobNotFound as _exc: return render_template("rqtl2/no-such-job.html", jobid=jobid)