diff options
Diffstat (limited to 'uploader/expression_data')
-rw-r--r-- | uploader/expression_data/__init__.py | 11 | ||||
-rw-r--r-- | uploader/expression_data/dbinsert.py | 399 | ||||
-rw-r--r-- | uploader/expression_data/index.py | 125 | ||||
-rw-r--r-- | uploader/expression_data/parse.py | 178 | ||||
-rw-r--r-- | uploader/expression_data/rqtl2.py | 1175 | ||||
-rw-r--r-- | uploader/expression_data/views.py | 384 |
6 files changed, 784 insertions, 1488 deletions
diff --git a/uploader/expression_data/__init__.py b/uploader/expression_data/__init__.py index 206a764..fc8bd41 100644 --- a/uploader/expression_data/__init__.py +++ b/uploader/expression_data/__init__.py @@ -1,11 +1,2 @@ """Package handling upload of files.""" -from flask import Blueprint - -from .rqtl2 import rqtl2 -from .index import indexbp -from .parse import parsebp - -exprdatabp = Blueprint("expression-data", __name__) -exprdatabp.register_blueprint(indexbp, url_prefix="/") -exprdatabp.register_blueprint(rqtl2, url_prefix="/rqtl2") -exprdatabp.register_blueprint(parsebp, url_prefix="/parse") +from .views import exprdatabp diff --git a/uploader/expression_data/dbinsert.py b/uploader/expression_data/dbinsert.py new file mode 100644 index 0000000..32ca359 --- /dev/null +++ b/uploader/expression_data/dbinsert.py @@ -0,0 +1,399 @@ +"Handle inserting data into the database" +import os +import json +from typing import Union +from functools import reduce +from datetime import datetime + +from redis import Redis +from MySQLdb.cursors import DictCursor +from flask import ( + flash, request, url_for, Blueprint, redirect, render_template, + current_app as app) + +from uploader import jobs +from uploader.authorisation import require_login +from uploader.population.models import populations_by_species +from uploader.species.models import all_species, species_by_id +from uploader.platforms.models import platform_by_species_and_id +from uploader.db_utils import with_db_connection, database_connection + +dbinsertbp = Blueprint("dbinsert", __name__) + +def render_error(error_msg): + "Render the generic error page" + return render_template("dbupdate_error.html", error_message=error_msg), 400 + +def make_menu_items_grouper(grouping_fn=lambda item: item): + "Build function to be used to group menu items." + def __grouper__(acc, row): + grouping = grouping_fn(row[2]) + row_values = (row[0].strip(), row[1].strip()) + if acc.get(grouping) is None: + return {**acc, grouping: (row_values,)} + return {**acc, grouping: (acc[grouping] + (row_values,))} + return __grouper__ + +def genechips(): + "Retrieve the genechip information from the database" + def __organise_by_species__(acc, chip): + speciesid = chip["SpeciesId"] + if acc.get(speciesid) is None: + return {**acc, speciesid: (chip,)} + return {**acc, speciesid: acc[speciesid] + (chip,)} + + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM GeneChip ORDER BY GeneChipName ASC") + return reduce(__organise_by_species__, cursor.fetchall(), {}) + + return {} + + +def studies_by_species_and_platform(speciesid:int, genechipid:int) -> tuple: + "Retrieve the studies by the related species and gene platform" + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + query = ( + "SELECT Species.SpeciesId, ProbeFreeze.* " + "FROM Species INNER JOIN InbredSet " + "ON Species.SpeciesId=InbredSet.SpeciesId " + "INNER JOIN ProbeFreeze " + "ON InbredSet.InbredSetId=ProbeFreeze.InbredSetId " + "WHERE Species.SpeciesId = %s " + "AND ProbeFreeze.ChipId = %s") + cursor.execute(query, (speciesid, genechipid)) + return tuple(cursor.fetchall()) + + return tuple() + +def organise_groups_by_family(acc:dict, group:dict) -> dict: + "Organise the group (InbredSet) information by the group field" + family = group["Family"] + if acc.get(family): + return {**acc, family: acc[family] + (group,)} + return {**acc, family: (group,)} + +def tissues() -> tuple: + "Retrieve type (Tissue) information from the database." + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM Tissue ORDER BY Name") + return tuple(cursor.fetchall()) + + return tuple() + +@dbinsertbp.route("/platform", methods=["POST"]) +@require_login +def select_platform(): + "Select the platform (GeneChipId) used for the data." + job_id = request.form["job_id"] + with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, + database_connection(app.config["SQL_URI"]) as conn): + job = jobs.job(rconn, jobs.jobsnamespace(), job_id) + if job: + filename = job["filename"] + filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}" + if os.path.exists(filepath): + default_species = 1 + gchips = genechips() + return render_template( + "select_platform.html", filename=filename, + filetype=job["filetype"], totallines=int(job["currentline"]), + default_species=default_species, species=all_species(conn), + genechips=gchips[default_species], + genechips_data=json.dumps(gchips)) + return render_error(f"File '{filename}' no longer exists.") + return render_error(f"Job '{job_id}' no longer exists.") + return render_error("Unknown error") + +@dbinsertbp.route("/study", methods=["POST"]) +@require_login +def select_study(): + "View to select/create the study (ProbeFreeze) associated with the data." + form = request.form + try: + assert form.get("filename"), "filename" + assert form.get("filetype"), "filetype" + assert form.get("species"), "species" + assert form.get("genechipid"), "platform" + + speciesid = form["species"] + genechipid = form["genechipid"] + + the_studies = studies_by_species_and_platform(speciesid, genechipid) + the_groups = reduce( + organise_groups_by_family, + with_db_connection( + lambda conn: populations_by_species(conn, speciesid)), + {}) + return render_template( + "select_study.html", filename=form["filename"], + filetype=form["filetype"], totallines=form["totallines"], + species=speciesid, genechipid=genechipid, studies=the_studies, + groups=the_groups, tissues = tissues(), + selected_group=int(form.get("inbredsetid", -13)), + selected_tissue=int(form.get("tissueid", -13))) + except AssertionError as aserr: + return render_error(f"Missing data: {aserr.args[0]}") + +@dbinsertbp.route("/create-study", methods=["POST"]) +@require_login +def create_study(): + "Create a new study (ProbeFreeze)." + form = request.form + try: + assert form.get("filename"), "filename" + assert form.get("filetype"), "filetype" + assert form.get("species"), "species" + assert form.get("genechipid"), "platform" + assert form.get("studyname"), "study name" + assert form.get("inbredsetid"), "group" + assert form.get("tissueid"), "type/tissue" + + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + values = ( + form["genechipid"], + form["tissueid"], + form["studyname"], + form.get("studyfullname", ""), + form.get("studyshortname", ""), + datetime.now().date().strftime("%Y-%m-%d"), + form["inbredsetid"]) + query = ( + "INSERT INTO ProbeFreeze(" + "ChipId, TissueId, Name, FullName, ShortName, CreateTime, " + "InbredSetId" + ") VALUES (%s, %s, %s, %s, %s, %s, %s)") + cursor.execute(query, values) + new_studyid = cursor.lastrowid + cursor.execute( + "UPDATE ProbeFreeze SET ProbeFreezeId=%s WHERE Id=%s", + (new_studyid, new_studyid)) + flash("Study created successfully", "alert-success") + return render_template( + "continue_from_create_study.html", + filename=form["filename"], filetype=form["filetype"], + totallines=form["totallines"], species=form["species"], + genechipid=form["genechipid"], studyid=new_studyid) + except AssertionError as aserr: + flash(f"Missing data: {aserr.args[0]}", "alert-error") + return redirect(url_for("dbinsert.select_study"), code=307) + +def datasets_by_study(studyid:int) -> tuple: + "Retrieve datasets associated with a study with the ID `studyid`." + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + query = "SELECT * FROM ProbeSetFreeze WHERE ProbeFreezeId=%s" + cursor.execute(query, (studyid,)) + return tuple(cursor.fetchall()) + + return tuple() + +def averaging_methods() -> tuple: + "Retrieve averaging methods from database" + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM AvgMethod") + return tuple(cursor.fetchall()) + + return tuple() + +def dataset_datascales() -> tuple: + "Retrieve datascales from database" + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor() as cursor: + cursor.execute( + 'SELECT DISTINCT DataScale FROM ProbeSetFreeze ' + 'WHERE DataScale IS NOT NULL AND DataScale != ""') + return tuple( + item for item in + (res[0].strip() for res in cursor.fetchall()) + if (item is not None and item != "")) + + return tuple() + +@dbinsertbp.route("/dataset", methods=["POST"]) +@require_login +def select_dataset(): + "Select the dataset to add the file contents against" + form = request.form + try: + assert form.get("filename"), "filename" + assert form.get("filetype"), "filetype" + assert form.get("species"), "species" + assert form.get("genechipid"), "platform" + assert form.get("studyid"), "study" + + studyid = form["studyid"] + datasets = datasets_by_study(studyid) + return render_template( + "select_dataset.html", **{**form, "studyid": studyid}, + datasets=datasets, avgmethods=averaging_methods(), + datascales=dataset_datascales()) + except AssertionError as aserr: + return render_error(f"Missing data: {aserr.args[0]}") + +@dbinsertbp.route("/create-dataset", methods=["POST"]) +@require_login +def create_dataset(): + "Select the dataset to add the file contents against" + form = request.form + try: + assert form.get("filename"), "filename" + assert form.get("filetype"), "filetype" + assert form.get("species"), "species" + assert form.get("genechipid"), "platform" + assert form.get("studyid"), "study" + assert form.get("avgid"), "averaging method" + assert form.get("datasetname2"), "Dataset Name 2" + assert form.get("datasetfullname"), "Dataset Full Name" + assert form.get("datasetshortname"), "Dataset Short Name" + assert form.get("datasetpublic"), "Dataset public specification" + assert form.get("datasetconfidentiality"), "Dataset confidentiality" + assert form.get("datasetdatascale"), "Dataset Datascale" + + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + datasetname = form["datasetname"] + cursor.execute("SELECT * FROM ProbeSetFreeze WHERE Name=%s", + (datasetname,)) + results = cursor.fetchall() + if bool(results): + flash("A dataset with that name already exists.", + "alert-error") + return redirect(url_for("dbinsert.select_dataset"), code=307) + values = ( + form["studyid"], form["avgid"], + datasetname, form["datasetname2"], + form["datasetfullname"], form["datasetshortname"], + datetime.now().date().strftime("%Y-%m-%d"), + form["datasetpublic"], form["datasetconfidentiality"], + "williamslab", form["datasetdatascale"]) + query = ( + "INSERT INTO ProbeSetFreeze(" + "ProbeFreezeId, AvgID, Name, Name2, FullName, " + "ShortName, CreateTime, OrderList, public, " + "confidentiality, AuthorisedUsers, DataScale) " + "VALUES" + "(%s, %s, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s)") + cursor.execute(query, values) + new_datasetid = cursor.lastrowid + return render_template( + "continue_from_create_dataset.html", + filename=form["filename"], filetype=form["filetype"], + species=form["species"], genechipid=form["genechipid"], + studyid=form["studyid"], datasetid=new_datasetid, + totallines=form["totallines"]) + except AssertionError as aserr: + flash(f"Missing data {aserr.args[0]}", "alert-error") + return redirect(url_for("dbinsert.select_dataset"), code=307) + +def study_by_id(studyid:int) -> Union[dict, None]: + "Get a study by its Id" + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT * FROM ProbeFreeze WHERE Id=%s", + (studyid,)) + return cursor.fetchone() + +def dataset_by_id(datasetid:int) -> Union[dict, None]: + "Retrieve a dataset by its id" + with database_connection(app.config["SQL_URI"]) as conn: + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + ("SELECT AvgMethod.Name AS AvgMethodName, ProbeSetFreeze.* " + "FROM ProbeSetFreeze INNER JOIN AvgMethod " + "ON ProbeSetFreeze.AvgId=AvgMethod.AvgMethodId " + "WHERE ProbeSetFreeze.Id=%s"), + (datasetid,)) + return cursor.fetchone() + +def selected_keys(original: dict, keys: tuple) -> dict: + "Return a new dict from the `original` dict with only `keys` present." + return {key: value for key,value in original.items() if key in keys} + +@dbinsertbp.route("/final-confirmation", methods=["POST"]) +@require_login +def final_confirmation(): + "Preview the data before triggering entry into the database" + with database_connection(app.config["SQL_URI"]) as conn: + form = request.form + try: + assert form.get("filename"), "filename" + assert form.get("filetype"), "filetype" + assert form.get("species"), "species" + assert form.get("genechipid"), "platform" + assert form.get("studyid"), "study" + assert form.get("datasetid"), "dataset" + + speciesid = form["species"] + genechipid = form["genechipid"] + studyid = form["studyid"] + datasetid=form["datasetid"] + return render_template( + "final_confirmation.html", filename=form["filename"], + filetype=form["filetype"], totallines=form["totallines"], + species=speciesid, genechipid=genechipid, studyid=studyid, + datasetid=datasetid, the_species=selected_keys( + with_db_connection(lambda conn: species_by_id(conn, speciesid)), + ("SpeciesName", "Name", "MenuName")), + platform=selected_keys( + platform_by_species_and_id(conn, speciesid, genechipid), + ("GeneChipName", "Name", "GeoPlatform", "Title", "GO_tree_value")), + study=selected_keys( + study_by_id(studyid), ("Name", "FullName", "ShortName")), + dataset=selected_keys( + dataset_by_id(datasetid), + ("AvgMethodName", "Name", "Name2", "FullName", "ShortName", + "DataScale"))) + except AssertionError as aserr: + return render_error(f"Missing data: {aserr.args[0]}") + +@dbinsertbp.route("/insert-data", methods=["POST"]) +@require_login +def insert_data(): + "Trigger data insertion" + form = request.form + try: + assert form.get("filename"), "filename" + assert form.get("filetype"), "filetype" + assert form.get("species"), "species" + assert form.get("genechipid"), "platform" + assert form.get("studyid"), "study" + assert form.get("datasetid"), "dataset" + + filename = form["filename"] + filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}" + redisurl = app.config["REDIS_URL"] + if os.path.exists(filepath): + with Redis.from_url(redisurl, decode_responses=True) as rconn: + job = jobs.launch_job( + jobs.data_insertion_job( + rconn, filepath, form["filetype"], form["totallines"], + form["species"], form["genechipid"], form["datasetid"], + app.config["SQL_URI"], redisurl, + app.config["JOBS_TTL_SECONDS"]), + redisurl, f"{app.config['UPLOAD_FOLDER']}/job_errors") + + return redirect(url_for("dbinsert.insert_status", job_id=job["jobid"])) + return render_error(f"File '{filename}' no longer exists.") + except AssertionError as aserr: + return render_error(f"Missing data: {aserr.args[0]}") + +@dbinsertbp.route("/status/<job_id>", methods=["GET"]) +def insert_status(job_id: str): + "Retrieve status of data insertion." + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + job = jobs.job(rconn, jobs.jobsnamespace(), job_id) + + if job: + job_status = job["status"] + if job_status == "success": + return render_template("insert_success.html", job=job) + if job["status"] == "error": + return render_template("insert_error.html", job=job) + return render_template("insert_progress.html", job=job) + return render_template("no_such_job.html", job_id=job_id), 400 diff --git a/uploader/expression_data/index.py b/uploader/expression_data/index.py deleted file mode 100644 index db23136..0000000 --- a/uploader/expression_data/index.py +++ /dev/null @@ -1,125 +0,0 @@ -"""Entry-point module""" -import os -import mimetypes -from typing import Tuple -from zipfile import ZipFile, is_zipfile - -from werkzeug.utils import secure_filename -from flask import ( - flash, - request, - url_for, - redirect, - Blueprint, - render_template, - current_app as app) - -from uploader.species.models import all_species as species -from uploader.authorisation import require_login -from uploader.db_utils import with_db_connection - -indexbp = Blueprint("index", __name__) - - -def errors(rqst) -> Tuple[str, ...]: - """Return a tuple of the errors found in the request `rqst`. If no error is - found, then an empty tuple is returned.""" - def __filetype_error__(): - return ( - ("Invalid file type provided.",) - if rqst.form.get("filetype") not in ("average", "standard-error") - else tuple()) - - def __file_missing_error__(): - return ( - ("No file was uploaded.",) - if ("qc_text_file" not in rqst.files or - rqst.files["qc_text_file"].filename == "") - else tuple()) - - def __file_mimetype_error__(): - text_file = rqst.files["qc_text_file"] - return ( - ( - ("Invalid file! Expected a tab-separated-values file, or a zip " - "file of the a tab-separated-values file."),) - if text_file.mimetype not in ( - "text/plain", "text/tab-separated-values", - "application/zip") - else tuple()) - - return ( - __filetype_error__() + - (__file_missing_error__() or __file_mimetype_error__())) - -def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]: - """Check the uploaded zip file for errors.""" - zfile_errors: Tuple[str, ...] = tuple() - if is_zipfile(filepath): - with ZipFile(filepath, "r") as zfile: - infolist = zfile.infolist() - if len(infolist) != 1: - zfile_errors = zfile_errors + ( - ("Expected exactly one (1) member file within the uploaded zip " - f"file. Got {len(infolist)} member files."),) - if len(infolist) == 1 and infolist[0].is_dir(): - zfile_errors = zfile_errors + ( - ("Expected a member text file in the uploaded zip file. Got a " - "directory/folder."),) - - if len(infolist) == 1 and not infolist[0].is_dir(): - zfile.extract(infolist[0], path=upload_dir) - mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}") - if mime[0] != "text/tab-separated-values": - zfile_errors = zfile_errors + ( - ("Expected the member text file in the uploaded zip file to" - " be a tab-separated file."),) - - return zfile_errors - - -@indexbp.route("/", methods=["GET"]) -@require_login -def index(): - """Display the expression data index page.""" - return render_template("expression-data/index.html") - - -@indexbp.route("/upload", methods=["GET", "POST"]) -@require_login -def upload_file(): - """Enables uploading the files""" - if request.method == "GET": - return render_template( - "select_species.html", species=with_db_connection(species)) - - upload_dir = app.config["UPLOAD_FOLDER"] - request_errors = errors(request) - if request_errors: - for error in request_errors: - flash(error, "alert-danger error-expr-data") - return redirect(url_for("expression-data.index.upload_file")) - - filename = secure_filename(request.files["qc_text_file"].filename) - if not os.path.exists(upload_dir): - os.mkdir(upload_dir) - - filepath = os.path.join(upload_dir, filename) - request.files["qc_text_file"].save(os.path.join(upload_dir, filename)) - - zip_errors = zip_file_errors(filepath, upload_dir) - if zip_errors: - for error in zip_errors: - flash(error, "alert-danger error-expr-data") - return redirect(url_for("expression-data.index.upload_file")) - - return redirect(url_for("expression-data.parse.parse", - speciesid=request.form["speciesid"], - filename=filename, - filetype=request.form["filetype"])) - -@indexbp.route("/data-review", methods=["GET"]) -@require_login -def data_review(): - """Provide some help on data expectations to the user.""" - return render_template("data_review.html") diff --git a/uploader/expression_data/parse.py b/uploader/expression_data/parse.py deleted file mode 100644 index fc1c3f0..0000000 --- a/uploader/expression_data/parse.py +++ /dev/null @@ -1,178 +0,0 @@ -"""File parsing module""" -import os - -import jsonpickle -from redis import Redis -from flask import flash, request, url_for, redirect, Blueprint, render_template -from flask import current_app as app - -from quality_control.errors import InvalidValue, DuplicateHeading - -from uploader import jobs -from uploader.dbinsert import species_by_id -from uploader.db_utils import with_db_connection -from uploader.authorisation import require_login - -parsebp = Blueprint("parse", __name__) - -def isinvalidvalue(item): - """Check whether item is of type InvalidValue""" - return isinstance(item, InvalidValue) - -def isduplicateheading(item): - """Check whether item is of type DuplicateHeading""" - return isinstance(item, DuplicateHeading) - -@parsebp.route("/parse", methods=["GET"]) -@require_login -def parse(): - """Trigger file parsing""" - errors = False - speciesid = request.args.get("speciesid") - filename = request.args.get("filename") - filetype = request.args.get("filetype") - if speciesid is None: - flash("No species selected", "alert-error error-expr-data") - errors = True - else: - try: - speciesid = int(speciesid) - species = with_db_connection( - lambda con: species_by_id(con, speciesid)) - if not bool(species): - flash("No such species.", "alert-error error-expr-data") - errors = True - except ValueError: - flash("Invalid speciesid provided. Expected an integer.", - "alert-error error-expr-data") - errors = True - - if filename is None: - flash("No file provided", "alert-error error-expr-data") - errors = True - - if filetype is None: - flash("No filetype provided", "alert-error error-expr-data") - errors = True - - if filetype not in ("average", "standard-error"): - flash("Invalid filetype provided", "alert-error error-expr-data") - errors = True - - if filename: - filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename) - if not os.path.exists(filepath): - flash("Selected file does not exist (any longer)", - "alert-error error-expr-data") - errors = True - - if errors: - return redirect(url_for("expression-data.index.upload_file")) - - redisurl = app.config["REDIS_URL"] - with Redis.from_url(redisurl, decode_responses=True) as rconn: - job = jobs.launch_job( - jobs.build_file_verification_job( - rconn, app.config["SQL_URI"], redisurl, - speciesid, filepath, filetype, - app.config["JOBS_TTL_SECONDS"]), - redisurl, - f"{app.config['UPLOAD_FOLDER']}/job_errors") - - return redirect(url_for("expression-data.parse.parse_status", job_id=job["jobid"])) - -@parsebp.route("/status/<job_id>", methods=["GET"]) -def parse_status(job_id: str): - "Retrieve the status of the job" - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: - try: - job = jobs.job(rconn, jobs.jobsnamespace(), job_id) - except jobs.JobNotFound as _exc: - return render_template("no_such_job.html", job_id=job_id), 400 - - error_filename = jobs.error_filename( - job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors") - if os.path.exists(error_filename): - stat = os.stat(error_filename) - if stat.st_size > 0: - return redirect(url_for("parse.fail", job_id=job_id)) - - job_id = job["jobid"] - progress = float(job["percent"]) - status = job["status"] - filename = job.get("filename", "uploaded file") - errors = jsonpickle.decode( - job.get("errors", jsonpickle.encode(tuple()))) - if status in ("success", "aborted"): - return redirect(url_for("expression-data.parse.results", job_id=job_id)) - - if status == "parse-error": - return redirect(url_for("parse.fail", job_id=job_id)) - - app.jinja_env.globals.update( - isinvalidvalue=isinvalidvalue, - isduplicateheading=isduplicateheading) - return render_template( - "job_progress.html", - job_id = job_id, - job_status = status, - progress = progress, - message = job.get("message", ""), - job_name = f"Parsing '{filename}'", - errors=errors) - -@parsebp.route("/results/<job_id>", methods=["GET"]) -def results(job_id: str): - """Show results of parsing...""" - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: - job = jobs.job(rconn, jobs.jobsnamespace(), job_id) - - if job: - filename = job["filename"] - errors = jsonpickle.decode(job.get("errors", jsonpickle.encode(tuple()))) - app.jinja_env.globals.update( - isinvalidvalue=isinvalidvalue, - isduplicateheading=isduplicateheading) - return render_template( - "parse_results.html", - errors=errors, - job_name = f"Parsing '{filename}'", - user_aborted = job.get("user_aborted"), - job_id=job["jobid"]) - - return render_template("no_such_job.html", job_id=job_id) - -@parsebp.route("/fail/<job_id>", methods=["GET"]) -def fail(job_id: str): - """Handle parsing failure""" - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: - job = jobs.job(rconn, jobs.jobsnamespace(), job_id) - - if job: - error_filename = jobs.error_filename( - job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors") - if os.path.exists(error_filename): - stat = os.stat(error_filename) - if stat.st_size > 0: - return render_template( - "worker_failure.html", job_id=job_id) - - return render_template("parse_failure.html", job=job) - - return render_template("no_such_job.html", job_id=job_id) - -@parsebp.route("/abort", methods=["POST"]) -@require_login -def abort(): - """Handle user request to abort file processing""" - job_id = request.form["job_id"] - - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: - job = jobs.job(rconn, jobs.jobsnamespace(), job_id) - - if job: - rconn.hset(name=jobs.job_key(jobs.jobsnamespace(), job_id), - key="user_aborted", - value=int(True)) - - return redirect(url_for("expression-data.parse.parse_status", job_id=job_id)) diff --git a/uploader/expression_data/rqtl2.py b/uploader/expression_data/rqtl2.py deleted file mode 100644 index a855699..0000000 --- a/uploader/expression_data/rqtl2.py +++ /dev/null @@ -1,1175 +0,0 @@ -"""Module to handle uploading of R/qtl2 bundles."""#pylint: disable=[too-many-lines] -import sys -import json -import traceback -from pathlib import Path -from datetime import date -from uuid import UUID, uuid4 -from functools import partial -from zipfile import ZipFile, is_zipfile -from typing import Union, Callable, Optional - -import MySQLdb as mdb -from redis import Redis -from MySQLdb.cursors import DictCursor -from werkzeug.utils import secure_filename -from flask import ( - flash, - escape, - request, - jsonify, - url_for, - redirect, - Response, - Blueprint, - render_template, - current_app as app) - -from r_qtl import r_qtl2 - -from uploader import jobs -from uploader.files import save_file, fullpath -from uploader.dbinsert import species as all_species -from uploader.db_utils import with_db_connection, database_connection - -from uploader.authorisation import require_login -from uploader.db.platforms import platform_by_id, platforms_by_species -from uploader.db.averaging import averaging_methods, averaging_method_by_id -from uploader.db.tissues import all_tissues, tissue_by_id, create_new_tissue -from uploader.population.models import (save_population, - populations_by_species, - population_by_species_and_id) -from uploader.species.models import species_by_id -from uploader.db.datasets import ( - geno_dataset_by_id, - geno_datasets_by_species_and_population, - - probeset_study_by_id, - probeset_create_study, - probeset_dataset_by_id, - probeset_create_dataset, - probeset_datasets_by_study, - probeset_studies_by_species_and_population) - -rqtl2 = Blueprint("rqtl2", __name__) - - -@rqtl2.route("/", methods=["GET", "POST"]) -@rqtl2.route("/select-species", methods=["GET", "POST"]) -@require_login -def select_species(): - """Select the species.""" - if request.method == "GET": - return render_template("rqtl2/index.html", species=with_db_connection(all_species)) - - species_id = request.form.get("species_id") - species = with_db_connection( - lambda conn: species_by_id(conn, species_id)) - if bool(species): - return redirect(url_for( - "expression-data.rqtl2.select_population", species_id=species_id)) - flash("Invalid species or no species selected!", "alert-error error-rqtl2") - return redirect(url_for("expression-data.rqtl2.select_species")) - - -@rqtl2.route("/upload/species/<int:species_id>/select-population", - methods=["GET", "POST"]) -@require_login -def select_population(species_id: int): - """Select/Create the population to organise data under.""" - with database_connection(app.config["SQL_URI"]) as conn: - species = species_by_id(conn, species_id) - if not bool(species): - flash("Invalid species selected!", "alert-error error-rqtl2") - return redirect(url_for("expression-data.rqtl2.select_species")) - - if request.method == "GET": - return render_template( - "rqtl2/select-population.html", - species=species, - populations=populations_by_species(conn, species_id)) - - population = population_by_species_and_id( - conn, species["SpeciesId"], request.form.get("inbredset_id")) - if not bool(population): - flash("Invalid Population!", "alert-error error-rqtl2") - return redirect( - url_for("expression-data.rqtl2.select_population", pgsrc="error"), - code=307) - - return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle", - species_id=species["SpeciesId"], - population_id=population["InbredSetId"])) - - -@rqtl2.route("/upload/species/<int:species_id>/create-population", - methods=["POST"]) -@require_login -def create_population(species_id: int): - """Create a new population for the given species.""" - population_page = redirect(url_for("expression-data.rqtl2.select_population", - species_id=species_id)) - with database_connection(app.config["SQL_URI"]) as conn: - species = species_by_id(conn, species_id) - population_name = request.form.get("inbredset_name", "").strip() - population_fullname = request.form.get("inbredset_fullname", "").strip() - if not bool(species): - flash("Invalid species!", "alert-error error-rqtl2") - return redirect(url_for("expression-data.rqtl2.select_species")) - if not bool(population_name): - flash("Invalid Population Name!", "alert-error error-rqtl2") - return population_page - if not bool(population_fullname): - flash("Invalid Population Full Name!", "alert-error error-rqtl2") - return population_page - new_population = save_population(conn, { - "SpeciesId": species["SpeciesId"], - "Name": population_name, - "InbredSetName": population_fullname, - "FullName": population_fullname, - "Family": request.form.get("inbredset_family") or None, - "Description": request.form.get("description") or None - }) - - flash("Population created successfully.", "alert-success") - return redirect( - url_for("expression-data.rqtl2.upload_rqtl2_bundle", - species_id=species_id, - population_id=new_population["population_id"], - pgsrc="create-population"), - code=307) - - -class __RequestError__(Exception): #pylint: disable=[invalid-name] - """Internal class to avoid pylint's `too-many-return-statements` error.""" - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle"), - methods=["GET", "POST"]) -@require_login -def upload_rqtl2_bundle(species_id: int, population_id: int): - """Allow upload of R/qtl2 bundle.""" - with database_connection(app.config["SQL_URI"]) as conn: - species = species_by_id(conn, species_id) - population = population_by_species_and_id( - conn, species["SpeciesId"], population_id) - if not bool(species): - flash("Invalid species!", "alert-error error-rqtl2") - return redirect(url_for("expression-data.rqtl2.select_species")) - if not bool(population): - flash("Invalid Population!", "alert-error error-rqtl2") - return redirect( - url_for("expression-data.rqtl2.select_population", pgsrc="error"), - code=307) - if request.method == "GET" or ( - request.method == "POST" - and bool(request.args.get("pgsrc"))): - return render_template("rqtl2/upload-rqtl2-bundle-step-01.html", - species=species, - population=population) - - try: - app.logger.debug("Files in the form: %s", request.files) - the_file = save_file(request.files["rqtl2_bundle_file"], - Path(app.config["UPLOAD_FOLDER"])) - except AssertionError: - app.logger.debug(traceback.format_exc()) - flash("Please provide a valid R/qtl2 zip bundle.", - "alert-error error-rqtl2") - return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle", - species_id=species_id, - population_id=population_id)) - - if not is_zipfile(str(the_file)): - app.logger.debug("The file is not a zip file.") - raise __RequestError__("Invalid file! Expected a zip file.") - - jobid = trigger_rqtl2_bundle_qc( - species_id, - population_id, - the_file, - request.files["rqtl2_bundle_file"].filename)#type: ignore[arg-type] - return redirect(url_for( - "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)) - - -def trigger_rqtl2_bundle_qc( - species_id: int, - population_id: int, - rqtl2bundle: Path, - originalfilename: str -) -> UUID: - """Trigger QC on the R/qtl2 bundle.""" - redisuri = app.config["REDIS_URL"] - with Redis.from_url(redisuri, decode_responses=True) as rconn: - jobid = uuid4() - redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] - jobs.launch_job( - jobs.initialise_job( - rconn, - jobs.jobsnamespace(), - str(jobid), - [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle", - app.config["SQL_URI"], app.config["REDIS_URL"], - jobs.jobsnamespace(), str(jobid), str(species_id), - str(population_id), "--redisexpiry", - str(redis_ttl_seconds)], - "rqtl2-bundle-qc-job", - redis_ttl_seconds, - {"job-metadata": json.dumps({ - "speciesid": species_id, - "populationid": population_id, - "rqtl2-bundle-file": str(rqtl2bundle.absolute()), - "original-filename": originalfilename})}), - redisuri, - f"{app.config['UPLOAD_FOLDER']}/job_errors") - return jobid - - -def chunk_name(uploadfilename: str, chunkno: int) -> str: - """Generate chunk name from original filename and chunk number""" - if uploadfilename == "": - raise ValueError("Name cannot be empty!") - if chunkno < 1: - raise ValueError("Chunk number must be greater than zero") - return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}" - - -def chunks_directory(uniqueidentifier: str) -> Path: - """Compute the directory where chunks are temporarily stored.""" - if uniqueidentifier == "": - raise ValueError("Unique identifier cannot be empty!") - return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}") - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle-chunked"), - methods=["GET"]) -@require_login -def upload_rqtl2_bundle_chunked_get(# pylint: disable=["unused-argument"] - species_id: int, - population_id: int -): - """ - Extension to the `upload_rqtl2_bundle` endpoint above that provides a way - for testing whether all the chunks have been uploaded and to assist with - resuming a failed expression-data. - """ - fileid = request.args.get("resumableIdentifier", type=str) or "" - filename = request.args.get("resumableFilename", type=str) or "" - chunk = request.args.get("resumableChunkNumber", type=int) or 0 - if not(fileid or filename or chunk): - return jsonify({ - "message": "At least one required query parameter is missing.", - "error": "BadRequest", - "statuscode": 400 - }), 400 - - if Path(chunks_directory(fileid), - chunk_name(filename, chunk)).exists(): - return "OK" - - return jsonify({ - "message": f"Chunk {chunk} was not found.", - "error": "NotFound", - "statuscode": 404 - }), 404 - - -def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path: - """Merge the chunks into a single file.""" - with open(targetfile, "ab") as _target: - for chunkfile in chunkpaths: - with open(chunkfile, "rb") as _chunkdata: - _target.write(_chunkdata.read()) - - chunkfile.unlink() - return targetfile - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle-chunked"), - methods=["POST"]) -@require_login -def upload_rqtl2_bundle_chunked_post(species_id: int, population_id: int): - """ - Extension to the `upload_rqtl2_bundle` endpoint above that allows large - files to be uploaded in chunks. - - This should hopefully speed up uploads, and if done right, even enable - resumable uploads - """ - _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0 - _chunk = request.form.get("resumableChunkNumber", default=1, type=int) - _uploadfilename = request.form.get( - "resumableFilename", default="", type=str) or "" - _fileid = request.form.get( - "resumableIdentifier", default="", type=str) or "" - _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid) - - if _targetfile.exists(): - return jsonify({ - "message": ( - "A file with a similar unique identifier has previously been " - "uploaded and possibly is/has being/been processed."), - "error": "BadRequest", - "statuscode": 400 - }), 400 - - try: - # save chunk data - chunks_directory(_fileid).mkdir(exist_ok=True, parents=True) - request.files["file"].save(Path(chunks_directory(_fileid), - chunk_name(_uploadfilename, _chunk))) - - # Check whether upload is complete - chunkpaths = tuple( - Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk)) - for _achunk in range(1, _totalchunks+1)) - if all(_file.exists() for _file in chunkpaths): - # merge_files and clean up chunks - __merge_chunks__(_targetfile, chunkpaths) - chunks_directory(_fileid).rmdir() - jobid = trigger_rqtl2_bundle_qc( - species_id, population_id, _targetfile, _uploadfilename) - return url_for( - "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid) - except Exception as exc:# pylint: disable=[broad-except] - msg = "Error processing uploaded file chunks." - app.logger.error(msg, exc_info=True, stack_info=True) - return jsonify({ - "message": msg, - "error": type(exc).__name__, - "error-description": " ".join(str(arg) for arg in exc.args), - "error-trace": traceback.format_exception(exc) - }), 500 - - return "OK" - - -@rqtl2.route("/upload/species/rqtl2-bundle/qc-status/<uuid:jobid>", - methods=["GET", "POST"]) -@require_login -def rqtl2_bundle_qc_status(jobid: UUID): - """Check the status of the QC jobs.""" - with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn, - database_connection(app.config["SQL_URI"]) as dbconn): - try: - thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) - messagelistname = thejob.get("log-messagelist") - logmessages = (rconn.lrange(messagelistname, 0, -1) - if bool(messagelistname) else []) - jobstatus = thejob["status"] - if jobstatus == "error": - return render_template("rqtl2/rqtl2-qc-job-error.html", - job=thejob, - errorsgeneric=json.loads( - thejob.get("errors-generic", "[]")), - errorsgeno=json.loads( - thejob.get("errors-geno", "[]")), - errorspheno=json.loads( - thejob.get("errors-pheno", "[]")), - errorsphenose=json.loads( - thejob.get("errors-phenose", "[]")), - errorsphenocovar=json.loads( - thejob.get("errors-phenocovar", "[]")), - messages=logmessages) - if jobstatus == "success": - jobmeta = json.loads(thejob["job-metadata"]) - species = species_by_id(dbconn, jobmeta["speciesid"]) - return render_template( - "rqtl2/rqtl2-qc-job-results.html", - species=species, - population=population_by_species_and_id( - dbconn, species["SpeciesId"], jobmeta["populationid"]), - rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]).name, - rqtl2bundleorig=jobmeta["original-filename"]) - - def compute_percentage(thejob, filetype) -> Union[str, None]: - if f"{filetype}-linecount" in thejob: - return "100" - if f"{filetype}-filesize" in thejob: - percent = ((int(thejob.get(f"{filetype}-checked", 0)) - / - int(thejob.get(f"{filetype}-filesize", 1))) - * 100) - return f"{percent:.2f}" - return None - - return render_template( - "rqtl2/rqtl2-qc-job-status.html", - job=thejob, - geno_percent=compute_percentage(thejob, "geno"), - pheno_percent=compute_percentage(thejob, "pheno"), - phenose_percent=compute_percentage(thejob, "phenose"), - messages=logmessages) - except jobs.JobNotFound: - return render_template("rqtl2/no-such-job.html", jobid=jobid) - - -def redirect_on_error(flaskroute, **kwargs): - """Utility to redirect on error""" - return redirect(url_for(flaskroute, **kwargs, pgsrc="error"), - code=(307 if request.method == "POST" else 302)) - - -def check_species(conn: mdb.Connection, formargs: dict) -> Optional[ - tuple[str, Response]]: - """ - Check whether the 'species_id' value is provided, and whether a - corresponding species exists in the database. - - Maybe give the function a better name...""" - speciespage = redirect_on_error("expression-data.rqtl2.select_species") - if "species_id" not in formargs: - return "You MUST provide the Species identifier.", speciespage - - if not bool(species_by_id(conn, formargs["species_id"])): - return "No species with the provided identifier exists.", speciespage - - return None - - -def check_population(conn: mdb.Connection, - formargs: dict, - species_id) -> Optional[tuple[str, Response]]: - """ - Check whether the 'population_id' value is provided, and whether a - corresponding population exists in the database. - - Maybe give the function a better name...""" - poppage = redirect_on_error( - "expression-data.rqtl2.select_species", species_id=species_id) - if "population_id" not in formargs: - return "You MUST provide the Population identifier.", poppage - - if not bool(population_by_species_and_id( - conn, species_id, formargs["population_id"])): - return "No population with the provided identifier exists.", poppage - - return None - - -def check_r_qtl2_bundle(formargs: dict, - species_id, - population_id) -> Optional[tuple[str, Response]]: - """Check for the existence of the R/qtl2 bundle.""" - fileuploadpage = redirect_on_error("expression-data.rqtl2.upload_rqtl2_bundle", - species_id=species_id, - population_id=population_id) - if not "rqtl2_bundle_file" in formargs: - return ( - "You MUST provide a R/qtl2 zip bundle for expression-data.", fileuploadpage) - - if not Path(fullpath(formargs["rqtl2_bundle_file"])).exists(): - return "No R/qtl2 bundle with the given name exists.", fileuploadpage - - return None - - -def check_geno_dataset(conn: mdb.Connection, - formargs: dict, - species_id, - population_id) -> Optional[tuple[str, Response]]: - """Check for the Genotype dataset.""" - genodsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id) - if not bool(formargs.get("geno-dataset-id")): - return ( - "You MUST provide a valid Genotype dataset identifier", genodsetpg) - - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s", - (formargs["geno-dataset-id"],)) - results = cursor.fetchall() - if not bool(results): - return ("No genotype dataset with the provided identifier exists.", - genodsetpg) - if len(results) > 1: - return ( - "Data corruption: More than one genotype dataset with the same " - "identifier.", - genodsetpg) - - return None - -def check_tissue( - conn: mdb.Connection,formargs: dict) -> Optional[tuple[str, Response]]: - """Check for tissue/organ/biological material.""" - selectdsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info", - species_id=formargs["species_id"], - population_id=formargs["population_id"]) - if not bool(formargs.get("tissueid", "").strip()): - return ("No tissue/organ/biological material provided.", selectdsetpg) - - with conn.cursor(cursorclass=DictCursor) as cursor: - cursor.execute("SELECT * FROM Tissue WHERE Id=%s", - (formargs["tissueid"],)) - results = cursor.fetchall() - if not bool(results): - return ("No tissue/organ with the provided identifier exists.", - selectdsetpg) - - if len(results) > 1: - return ( - "Data corruption: More than one tissue/organ with the same " - "identifier.", - selectdsetpg) - - return None - - -def check_probe_study(conn: mdb.Connection, - formargs: dict, - species_id, - population_id) -> Optional[tuple[str, Response]]: - """Check for the ProbeSet study.""" - dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id) - if not bool(formargs.get("probe-study-id")): - return "No probeset study was selected!", dsetinfopg - - if not bool(probeset_study_by_id(conn, formargs["probe-study-id"])): - return ("No probeset study with the provided identifier exists", - dsetinfopg) - - return None - - -def check_probe_dataset(conn: mdb.Connection, - formargs: dict, - species_id, - population_id) -> Optional[tuple[str, Response]]: - """Check for the ProbeSet dataset.""" - dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id) - if not bool(formargs.get("probe-dataset-id")): - return "No probeset dataset was selected!", dsetinfopg - - if not bool(probeset_dataset_by_id(conn, formargs["probe-dataset-id"])): - return ("No probeset dataset with the provided identifier exists", - dsetinfopg) - - return None - - -def with_errors(endpointthunk: Callable, *checkfns): - """Run 'endpointthunk' with error checking.""" - formargs = {**dict(request.args), **dict(request.form)} - errors = tuple(item for item in (_fn(formargs=formargs) for _fn in checkfns) - if item is not None) - if len(errors) > 0: - flash(errors[0][0], "alert-error error-rqtl2") - return errors[0][1] - - return endpointthunk() - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/select-geno-dataset"), - methods=["POST"]) -@require_login -def select_geno_dataset(species_id: int, population_id: int): - """Select from existing geno datasets.""" - with database_connection(app.config["SQL_URI"]) as conn: - def __thunk__(): - geno_dset = geno_datasets_by_species_and_population( - conn, species_id, population_id) - if not bool(geno_dset): - flash("No genotype dataset was provided!", - "alert-error error-rqtl2") - return redirect(url_for("expression-data.rqtl2.select_geno_dataset", - species_id=species_id, - population_id=population_id, - pgsrc="error"), - code=307) - - flash("Genotype accepted", "alert-success error-rqtl2") - return redirect(url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id, - pgsrc="expression-data.rqtl2.select_geno_dataset"), - code=307) - - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, conn=conn, - species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id), - partial(check_geno_dataset, - conn=conn, - species_id=species_id, - population_id=population_id)) - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/create-geno-dataset"), - methods=["POST"]) -@require_login -def create_geno_dataset(species_id: int, population_id: int): - """Create a new geno dataset.""" - with database_connection(app.config["SQL_URI"]) as conn: - def __thunk__(): - sgeno_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id, - pgsrc="error"), - code=307) - errorclasses = "alert-error error-rqtl2 error-rqtl2-create-geno-dataset" - if not bool(request.form.get("dataset-name")): - flash("You must provide the dataset name", errorclasses) - return sgeno_page - if not bool(request.form.get("dataset-fullname")): - flash("You must provide the dataset full name", errorclasses) - return sgeno_page - public = 2 if request.form.get("dataset-public") == "on" else 0 - - with conn.cursor(cursorclass=DictCursor) as cursor: - datasetname = request.form["dataset-name"] - new_dataset = { - "name": datasetname, - "fname": request.form.get("dataset-fullname"), - "sname": request.form.get("dataset-shortname") or datasetname, - "today": date.today().isoformat(), - "pub": public, - "isetid": population_id - } - cursor.execute("SELECT * FROM GenoFreeze WHERE Name=%s", - (datasetname,)) - results = cursor.fetchall() - if bool(results): - flash( - f"A genotype dataset with name '{escape(datasetname)}' " - "already exists.", - errorclasses) - return redirect(url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id, - pgsrc="error"), - code=307) - cursor.execute( - "INSERT INTO GenoFreeze(" - "Name, FullName, ShortName, CreateTime, public, InbredSetId" - ") " - "VALUES(" - "%(name)s, %(fname)s, %(sname)s, %(today)s, %(pub)s, %(isetid)s" - ")", - new_dataset) - flash("Created dataset successfully.", "alert-success") - return render_template( - "rqtl2/create-geno-dataset-success.html", - species=species_by_id(conn, species_id), - population=population_by_species_and_id( - conn, species_id, population_id), - rqtl2_bundle_file=request.form["rqtl2_bundle_file"], - geno_dataset={**new_dataset, "id": cursor.lastrowid}) - - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, conn=conn, species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id)) - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/select-tissue"), - methods=["POST"]) -@require_login -def select_tissue(species_id: int, population_id: int): - """Select from existing tissues.""" - with database_connection(app.config["SQL_URI"]) as conn: - def __thunk__(): - if not bool(request.form.get("tissueid", "").strip()): - flash("Invalid tissue selection!", - "alert-error error-select-tissue error-rqtl2") - - return redirect(url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id, - pgsrc="expression-data.rqtl2.select_geno_dataset"), - code=307) - - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, - conn=conn, - species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id), - partial(check_geno_dataset, - conn=conn, - species_id=species_id, - population_id=population_id)) - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/create-tissue"), - methods=["POST"]) -@require_login -def create_tissue(species_id: int, population_id: int): - """Add new tissue, organ or biological material to the system.""" - form = request.form - datasetinfopage = redirect( - url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id, - pgsrc="expression-data.rqtl2.select_geno_dataset"), - code=307) - with database_connection(app.config["SQL_URI"]) as conn: - tissuename = form.get("tissuename", "").strip() - tissueshortname = form.get("tissueshortname", "").strip() - if not bool(tissuename): - flash("Organ/Tissue name MUST be provided.", - "alert-error error-create-tissue error-rqtl2") - return datasetinfopage - - if not bool(tissueshortname): - flash("Organ/Tissue short name MUST be provided.", - "alert-error error-create-tissue error-rqtl2") - return datasetinfopage - - try: - tissue = create_new_tissue(conn, tissuename, tissueshortname) - flash("Tissue created successfully!", "alert-success") - return render_template( - "rqtl2/create-tissue-success.html", - species=species_by_id(conn, species_id), - population=population_by_species_and_id( - conn, species_id, population_id), - rqtl2_bundle_file=request.form["rqtl2_bundle_file"], - geno_dataset=geno_dataset_by_id( - conn, - int(request.form["geno-dataset-id"])), - tissue=tissue) - except mdb.IntegrityError as _ierr: - flash("Tissue/Organ with that short name already exists!", - "alert-error error-create-tissue error-rqtl2") - return datasetinfopage - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/select-probeset-study"), - methods=["POST"]) -@require_login -def select_probeset_study(species_id: int, population_id: int): - """Select or create a probeset study.""" - with database_connection(app.config["SQL_URI"]) as conn: - def __thunk__(): - summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id), - code=307) - if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): - flash("Invalid study selected!", "alert-error error-rqtl2") - return summary_page - - return summary_page - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, - conn=conn, - species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id), - partial(check_geno_dataset, - conn=conn, - species_id=species_id, - population_id=population_id), - partial(check_tissue, conn=conn), - partial(check_probe_study, - conn=conn, - species_id=species_id, - population_id=population_id)) - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/select-probeset-dataset"), - methods=["POST"]) -@require_login -def select_probeset_dataset(species_id: int, population_id: int): - """Select or create a probeset dataset.""" - with database_connection(app.config["SQL_URI"]) as conn: - def __thunk__(): - summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id), - code=307) - if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))): - flash("Invalid study selected!", "alert-error error-rqtl2") - return summary_page - - return summary_page - - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, - conn=conn, - species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id), - partial(check_geno_dataset, - conn=conn, - species_id=species_id, - population_id=population_id), - partial(check_tissue, conn=conn), - partial(check_probe_study, - conn=conn, - species_id=species_id, - population_id=population_id), - partial(check_probe_dataset, - conn=conn, - species_id=species_id, - population_id=population_id)) - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/create-probeset-study"), - methods=["POST"]) -@require_login -def create_probeset_study(species_id: int, population_id: int): - """Create a new probeset study.""" - errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-study" - with database_connection(app.config["SQL_URI"]) as conn: - def __thunk__(): - form = request.form - dataset_info_page = redirect( - url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id), - code=307) - - if not (bool(form.get("platformid")) and - bool(platform_by_id(conn, int(form["platformid"])))): - flash("Invalid platform selected.", errorclasses) - return dataset_info_page - - if not (bool(form.get("tissueid")) and - bool(tissue_by_id(conn, int(form["tissueid"])))): - flash("Invalid tissue selected.", errorclasses) - return dataset_info_page - - studyname = form["studyname"] - try: - study = probeset_create_study( - conn, population_id, int(form["platformid"]), int(form["tissueid"]), - studyname, form.get("studyfullname") or "", - form.get("studyshortname") or "") - except mdb.IntegrityError as _ierr: - flash(f"ProbeSet study with name '{escape(studyname)}' already " - "exists.", - errorclasses) - return dataset_info_page - return render_template( - "rqtl2/create-probe-study-success.html", - species=species_by_id(conn, species_id), - population=population_by_species_and_id( - conn, species_id, population_id), - rqtl2_bundle_file=request.form["rqtl2_bundle_file"], - geno_dataset=geno_dataset_by_id( - conn, - int(request.form["geno-dataset-id"])), - study=study) - - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, - conn=conn, - species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id), - partial(check_geno_dataset, - conn=conn, - species_id=species_id, - population_id=population_id), - partial(check_tissue, conn=conn)) - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/create-probeset-dataset"), - methods=["POST"]) -@require_login -def create_probeset_dataset(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements] - """Create a new probeset dataset.""" - errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-dataset" - with database_connection(app.config["SQL_URI"]) as conn: - def __thunk__():#pylint: disable=[too-many-return-statements] - form = request.form - summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info", - species_id=species_id, - population_id=population_id), - code=307) - if not bool(form.get("averageid")): - flash("Averaging method not selected!", errorclasses) - return summary_page - if not bool(form.get("datasetname")): - flash("Dataset name not provided!", errorclasses) - return summary_page - if not bool(form.get("datasetfullname")): - flash("Dataset full name not provided!", errorclasses) - return summary_page - - tissue = tissue_by_id(conn, form.get("tissueid", "").strip()) - - study = probeset_study_by_id(conn, int(form["probe-study-id"])) - if not bool(study): - flash("Invalid ProbeSet study provided!", errorclasses) - return summary_page - - avgmethod = averaging_method_by_id(conn, int(form["averageid"])) - if not bool(avgmethod): - flash("Invalid averaging method provided!", errorclasses) - return summary_page - - try: - dset = probeset_create_dataset(conn, - int(form["probe-study-id"]), - int(form["averageid"]), - form["datasetname"], - form["datasetfullname"], - form["datasetshortname"], - form["datasetpublic"] == "on", - form.get( - "datasetdatascale", "log2")) - except mdb.IntegrityError as _ierr: - app.logger.debug("Possible integrity error: %s", traceback.format_exc()) - flash(("IntegrityError: The data you provided has some errors: " - f"{_ierr.args}"), - errorclasses) - return summary_page - except Exception as _exc:# pylint: disable=[broad-except] - app.logger.debug("Error creating ProbeSet dataset: %s", - traceback.format_exc()) - flash(("There was a problem creating your dataset. Please try " - "again."), - errorclasses) - return summary_page - return render_template( - "rqtl2/create-probe-dataset-success.html", - species=species_by_id(conn, species_id), - population=population_by_species_and_id( - conn, species_id, population_id), - rqtl2_bundle_file=request.form["rqtl2_bundle_file"], - geno_dataset=geno_dataset_by_id( - conn, - int(request.form["geno-dataset-id"])), - tissue=tissue, - study=study, - avgmethod=avgmethod, - dataset=dset) - - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, - conn=conn, - species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id), - partial(check_geno_dataset, - conn=conn, - species_id=species_id, - population_id=population_id), - partial(check_tissue, conn=conn), - partial(check_probe_study, - conn=conn, - species_id=species_id, - population_id=population_id)) - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/dataset-info"), - methods=["POST"]) -@require_login -def select_dataset_info(species_id: int, population_id: int): - """ - If `geno` files exist in the R/qtl2 bundle, prompt user to provide the - dataset the genotypes belong to. - """ - form = request.form - with database_connection(app.config["SQL_URI"]) as conn: - def __thunk__(): - species = species_by_id(conn, species_id) - population = population_by_species_and_id( - conn, species_id, population_id) - thefile = fullpath(form["rqtl2_bundle_file"]) - with ZipFile(str(thefile), "r") as zfile: - cdata = r_qtl2.control_data(zfile) - - geno_dataset = geno_dataset_by_id( - conn,form.get("geno-dataset-id", "").strip()) - if "geno" in cdata and not bool(form.get("geno-dataset-id")): - return render_template( - "rqtl2/select-geno-dataset.html", - species=species, - population=population, - rqtl2_bundle_file=thefile.name, - datasets=geno_datasets_by_species_and_population( - conn, species_id, population_id)) - - tissue = tissue_by_id(conn, form.get("tissueid", "").strip()) - if "pheno" in cdata and not bool(tissue): - return render_template( - "rqtl2/select-tissue.html", - species=species, - population=population, - rqtl2_bundle_file=thefile.name, - geno_dataset=geno_dataset, - studies=probeset_studies_by_species_and_population( - conn, species_id, population_id), - platforms=platforms_by_species(conn, species_id), - tissues=all_tissues(conn)) - - probeset_study = probeset_study_by_id( - conn, form.get("probe-study-id", "").strip()) - if "pheno" in cdata and not bool(probeset_study): - return render_template( - "rqtl2/select-probeset-study-id.html", - species=species, - population=population, - rqtl2_bundle_file=thefile.name, - geno_dataset=geno_dataset, - studies=probeset_studies_by_species_and_population( - conn, species_id, population_id), - platforms=platforms_by_species(conn, species_id), - tissue=tissue) - probeset_study = probeset_study_by_id( - conn, int(form["probe-study-id"])) - - probeset_dataset = probeset_dataset_by_id( - conn, form.get("probe-dataset-id", "").strip()) - if "pheno" in cdata and not bool(probeset_dataset): - return render_template( - "rqtl2/select-probeset-dataset.html", - species=species, - population=population, - rqtl2_bundle_file=thefile.name, - geno_dataset=geno_dataset, - probe_study=probeset_study, - tissue=tissue, - datasets=probeset_datasets_by_study( - conn, int(form["probe-study-id"])), - avgmethods=averaging_methods(conn)) - - return render_template("rqtl2/summary-info.html", - species=species, - population=population, - rqtl2_bundle_file=thefile.name, - geno_dataset=geno_dataset, - tissue=tissue, - probe_study=probeset_study, - probe_dataset=probeset_dataset) - - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, - conn=conn, - species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id)) - - -@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>" - "/rqtl2-bundle/confirm-bundle-details"), - methods=["POST"]) -@require_login -def confirm_bundle_details(species_id: int, population_id: int): - """Confirm the details and trigger R/qtl2 bundle processing...""" - redisuri = app.config["REDIS_URL"] - with (database_connection(app.config["SQL_URI"]) as conn, - Redis.from_url(redisuri, decode_responses=True) as rconn): - def __thunk__(): - redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"] - jobid = str(uuid4()) - _job = jobs.launch_job( - jobs.initialise_job( - rconn, - jobs.jobsnamespace(), - jobid, - [ - sys.executable, "-m", "scripts.process_rqtl2_bundle", - app.config["SQL_URI"], app.config["REDIS_URL"], - jobs.jobsnamespace(), jobid, "--redisexpiry", - str(redis_ttl_seconds)], - "R/qtl2 Bundle Upload", - redis_ttl_seconds, - { - "bundle-metadata": json.dumps({ - "speciesid": species_id, - "populationid": population_id, - "rqtl2-bundle-file": str(fullpath( - request.form["rqtl2_bundle_file"])), - "geno-dataset-id": request.form.get( - "geno-dataset-id", ""), - "probe-study-id": request.form.get( - "probe-study-id", ""), - "probe-dataset-id": request.form.get( - "probe-dataset-id", ""), - **({ - "platformid": probeset_study_by_id( - conn, - int(request.form["probe-study-id"]))["ChipId"] - } if bool(request.form.get("probe-study-id")) else {}) - }) - }), - redisuri, - f"{app.config['UPLOAD_FOLDER']}/job_errors") - - return redirect(url_for("expression-data.rqtl2.rqtl2_processing_status", - jobid=jobid)) - - return with_errors(__thunk__, - partial(check_species, conn=conn), - partial(check_population, - conn=conn, - species_id=species_id), - partial(check_r_qtl2_bundle, - species_id=species_id, - population_id=population_id), - partial(check_geno_dataset, - conn=conn, - species_id=species_id, - population_id=population_id), - partial(check_probe_study, - conn=conn, - species_id=species_id, - population_id=population_id), - partial(check_probe_dataset, - conn=conn, - species_id=species_id, - population_id=population_id)) - - -@rqtl2.route("/status/<uuid:jobid>") -def rqtl2_processing_status(jobid: UUID): - """Retrieve the status of the job processing the uploaded R/qtl2 bundle.""" - with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: - try: - thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid) - - messagelistname = thejob.get("log-messagelist") - logmessages = (rconn.lrange(messagelistname, 0, -1) - if bool(messagelistname) else []) - - if thejob["status"] == "error": - return render_template( - "rqtl2/rqtl2-job-error.html", job=thejob, messages=logmessages) - if thejob["status"] == "success": - return render_template("rqtl2/rqtl2-job-results.html", - job=thejob, - messages=logmessages) - - return render_template( - "rqtl2/rqtl2-job-status.html", job=thejob, messages=logmessages) - except jobs.JobNotFound as _exc: - return render_template("rqtl2/no-such-job.html", jobid=jobid) diff --git a/uploader/expression_data/views.py b/uploader/expression_data/views.py new file mode 100644 index 0000000..bbe6538 --- /dev/null +++ b/uploader/expression_data/views.py @@ -0,0 +1,384 @@ +"""Views for expression data""" +import os +import uuid +import mimetypes +from typing import Tuple +from zipfile import ZipFile, is_zipfile + +import jsonpickle +from redis import Redis +from werkzeug.utils import secure_filename +from flask import (flash, + request, + url_for, + redirect, + Blueprint, + current_app as app) + +from quality_control.errors import InvalidValue, DuplicateHeading + +from uploader import jobs +from uploader.datautils import order_by_family +from uploader.ui import make_template_renderer +from uploader.authorisation import require_login +from uploader.species.models import all_species, species_by_id +from uploader.db_utils import with_db_connection, database_connection +from uploader.population.models import (populations_by_species, + population_by_species_and_id) + +exprdatabp = Blueprint("expression-data", __name__) +render_template = make_template_renderer("expression-data") + +def isinvalidvalue(item): + """Check whether item is of type InvalidValue""" + return isinstance(item, InvalidValue) + + +def isduplicateheading(item): + """Check whether item is of type DuplicateHeading""" + return isinstance(item, DuplicateHeading) + + +def errors(rqst) -> Tuple[str, ...]: + """Return a tuple of the errors found in the request `rqst`. If no error is + found, then an empty tuple is returned.""" + def __filetype_error__(): + return ( + ("Invalid file type provided.",) + if rqst.form.get("filetype") not in ("average", "standard-error") + else tuple()) + + def __file_missing_error__(): + return ( + ("No file was uploaded.",) + if ("qc_text_file" not in rqst.files or + rqst.files["qc_text_file"].filename == "") + else tuple()) + + def __file_mimetype_error__(): + text_file = rqst.files["qc_text_file"] + return ( + ( + ("Invalid file! Expected a tab-separated-values file, or a zip " + "file of the a tab-separated-values file."),) + if text_file.mimetype not in ( + "text/plain", "text/tab-separated-values", + "application/zip") + else tuple()) + + return ( + __filetype_error__() + + (__file_missing_error__() or __file_mimetype_error__())) + + +def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]: + """Check the uploaded zip file for errors.""" + zfile_errors: Tuple[str, ...] = tuple() + if is_zipfile(filepath): + with ZipFile(filepath, "r") as zfile: + infolist = zfile.infolist() + if len(infolist) != 1: + zfile_errors = zfile_errors + ( + ("Expected exactly one (1) member file within the uploaded zip " + f"file. Got {len(infolist)} member files."),) + if len(infolist) == 1 and infolist[0].is_dir(): + zfile_errors = zfile_errors + ( + ("Expected a member text file in the uploaded zip file. Got a " + "directory/folder."),) + + if len(infolist) == 1 and not infolist[0].is_dir(): + zfile.extract(infolist[0], path=upload_dir) + mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}") + if mime[0] != "text/tab-separated-values": + zfile_errors = zfile_errors + ( + ("Expected the member text file in the uploaded zip file to" + " be a tab-separated file."),) + + return zfile_errors + + +@exprdatabp.route("populations/expression-data", methods=["GET"]) +@require_login +def index(): + """Display the expression data index page.""" + with database_connection(app.config["SQL_URI"]) as conn: + if not bool(request.args.get("species_id")): + return render_template("expression-data/index.html", + species=order_by_family(all_species(conn)), + activelink="expression-data") + species = species_by_id(conn, request.args.get("species_id")) + if not bool(species): + flash("Could not find species selected!", "alert-danger") + return redirect(url_for("species.populations.expression-data.index")) + return redirect(url_for( + "species.populations.expression-data.select_population", + species_id=species["SpeciesId"])) + + +@exprdatabp.route("<int:species_id>/populations/expression-data/select-population", + methods=["GET"]) +@require_login +def select_population(species_id: int): + """Select the expression data's population.""" + with database_connection(app.config["SQL_URI"]) as conn: + species = species_by_id(conn, species_id) + if not bool(species): + flash("No such species!", "alert-danger") + return redirect(url_for("species.populations.expression-data.index")) + + if not bool(request.args.get("population_id")): + return render_template("expression-data/select-population.html", + species=species, + populations=order_by_family( + populations_by_species(conn, species_id), + order_key="FamilyOrder"), + activelink="expression-data") + + population = population_by_species_and_id( + conn, species_id, request.args.get("population_id")) + if not bool(population): + flash("No such population!", "alert-danger") + return redirect(url_for( + "species.populations.expression-data.select_population", + species_id=species_id)) + + return redirect(url_for("species.populations.expression-data.upload_file", + species_id=species_id, + population_id=population["Id"])) + + +@exprdatabp.route("<int:species_id>/populations/<int:population_id>/" + "expression-data/upload", + methods=["GET", "POST"]) +@require_login +def upload_file(species_id: int, population_id: int): + """Enables uploading the files""" + with database_connection(app.config["SQL_URI"]) as conn: + species = species_by_id(conn, species_id) + population = population_by_species_and_id(conn, species_id, population_id) + if request.method == "GET": + return render_template("expression-data/select-file.html", + species=species, + population=population) + + upload_dir = app.config["UPLOAD_FOLDER"] + request_errors = errors(request) + if request_errors: + for error in request_errors: + flash(error, "alert-danger error-expr-data") + return redirect(url_for("species.populations.expression-data.upload_file")) + + filename = secure_filename( + request.files["qc_text_file"].filename)# type: ignore[arg-type] + if not os.path.exists(upload_dir): + os.mkdir(upload_dir) + + filepath = os.path.join(upload_dir, filename) + request.files["qc_text_file"].save(os.path.join(upload_dir, filename)) + + zip_errors = zip_file_errors(filepath, upload_dir) + if zip_errors: + for error in zip_errors: + flash(error, "alert-danger error-expr-data") + return redirect(url_for("species.populations.expression-data.index.upload_file")) + + return redirect(url_for("species.populations.expression-data.parse_file", + species_id=species_id, + population_id=population_id, + filename=filename, + filetype=request.form["filetype"])) + + +@exprdatabp.route("/data-review", methods=["GET"]) +@require_login +def data_review(): + """Provide some help on data expectations to the user.""" + return render_template("expression-data/data-review.html") + + +@exprdatabp.route( + "<int:species_id>/populations/<int:population_id>/expression-data/parse", + methods=["GET"]) +@require_login +def parse_file(species_id: int, population_id: int): + """Trigger file parsing""" + _errors = False + filename = request.args.get("filename") + filetype = request.args.get("filetype") + + species = with_db_connection(lambda con: species_by_id(con, species_id)) + if not bool(species): + flash("No such species.", "alert-danger") + _errors = True + + if filename is None: + flash("No file provided", "alert-danger") + _errors = True + + if filetype is None: + flash("No filetype provided", "alert-danger") + _errors = True + + if filetype not in ("average", "standard-error"): + flash("Invalid filetype provided", "alert-danger") + _errors = True + + if filename: + filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename) + if not os.path.exists(filepath): + flash("Selected file does not exist (any longer)", "alert-danger") + _errors = True + + if _errors: + return redirect(url_for("species.populations.expression-data.upload_file")) + + redisurl = app.config["REDIS_URL"] + with Redis.from_url(redisurl, decode_responses=True) as rconn: + job = jobs.launch_job( + jobs.build_file_verification_job( + rconn, app.config["SQL_URI"], redisurl, + species_id, filepath, filetype,# type: ignore[arg-type] + app.config["JOBS_TTL_SECONDS"]), + redisurl, + f"{app.config['UPLOAD_FOLDER']}/job_errors") + + return redirect(url_for("species.populations.expression-data.parse_status", + species_id=species_id, + population_id=population_id, + job_id=job["jobid"])) + + +@exprdatabp.route( + "<int:species_id>/populations/<int:population_id>/expression-data/parse/" + "status/<uuid:job_id>", + methods=["GET"]) +@require_login +def parse_status(species_id: int, population_id: int, job_id: str): + "Retrieve the status of the job" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + try: + job = jobs.job(rconn, jobs.jobsnamespace(), job_id) + except jobs.JobNotFound as _exc: + return render_template("no_such_job.html", job_id=job_id), 400 + + error_filename = jobs.error_filename( + job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors") + if os.path.exists(error_filename): + stat = os.stat(error_filename) + if stat.st_size > 0: + return redirect(url_for("parse.fail", job_id=job_id)) + + job_id = job["jobid"] + progress = float(job["percent"]) + status = job["status"] + filename = job.get("filename", "uploaded file") + _errors = jsonpickle.decode( + job.get("errors", jsonpickle.encode(tuple()))) + if status in ("success", "aborted"): + return redirect(url_for("species.populations.expression-data.results", + species_id=species_id, + population_id=population_id, + job_id=job_id)) + + if status == "parse-error": + return redirect(url_for("species.populations.expression-data.fail", job_id=job_id)) + + app.jinja_env.globals.update( + isinvalidvalue=isinvalidvalue, + isduplicateheading=isduplicateheading) + return render_template( + "expression-data/job-progress.html", + job_id = job_id, + job_status = status, + progress = progress, + message = job.get("message", ""), + job_name = f"Parsing '{filename}'", + errors=_errors, + species=with_db_connection( + lambda conn: species_by_id(conn, species_id)), + population=with_db_connection( + lambda conn: population_by_species_and_id( + conn, species_id, population_id))) + + +@exprdatabp.route( + "<int:species_id>/populations/<int:population_id>/expression-data/parse/" + "<uuid:job_id>/results", + methods=["GET"]) +@require_login +def results(species_id: int, population_id: int, job_id: uuid.UUID): + """Show results of parsing...""" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + job = jobs.job(rconn, jobs.jobsnamespace(), job_id) + + if job: + filename = job["filename"] + _errors = jsonpickle.decode(job.get("errors", jsonpickle.encode(tuple()))) + app.jinja_env.globals.update( + isinvalidvalue=isinvalidvalue, + isduplicateheading=isduplicateheading) + return render_template( + "expression-data/parse-results.html", + errors=_errors, + job_name = f"Parsing '{filename}'", + user_aborted = job.get("user_aborted"), + job_id=job["jobid"], + species=with_db_connection( + lambda conn: species_by_id(conn, species_id)), + population=with_db_connection( + lambda conn: population_by_species_and_id( + conn, species_id, population_id))) + + return render_template("expression-data/no-such-job.html", job_id=job_id) + + +@exprdatabp.route( + "<int:species_id>/populations/<int:population_id>/expression-data/parse/" + "<uuid:job_id>/fail", + methods=["GET"]) +@require_login +def fail(species_id: int, population_id: int, job_id: str): + """Handle parsing failure""" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + job = jobs.job(rconn, jobs.jobsnamespace(), job_id) + + if job: + error_filename = jobs.error_filename( + job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors") + if os.path.exists(error_filename): + stat = os.stat(error_filename) + if stat.st_size > 0: + return render_template( + "worker_failure.html", job_id=job_id) + + return render_template("parse_failure.html", job=job) + + return render_template("expression-data/no-such-job.html", + **with_db_connection(lambda conn: { + "species_id": species_by_id(conn, species_id), + "population_id": population_by_species_and_id( + conn, species_id, population_id)}), + job_id=job_id) + + +@exprdatabp.route( + "<int:species_id>/populations/<int:population_id>/expression-data/parse/" + "abort", + methods=["POST"]) +@require_login +def abort(species_id: int, population_id: int): + """Handle user request to abort file processing""" + job_id = request.form["job_id"] + + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + job = jobs.job(rconn, jobs.jobsnamespace(), job_id) + + if job: + rconn.hset(name=jobs.job_key(jobs.jobsnamespace(), job_id), + key="user_aborted", + value=int(True)) + + return redirect(url_for("species.populations.expression-data.parse_status", + species_id=species_id, + population_id=population_id, + job_id=job_id)) |