aboutsummaryrefslogtreecommitdiff
path: root/uploader/expression_data
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/expression_data')
-rw-r--r--uploader/expression_data/__init__.py11
-rw-r--r--uploader/expression_data/dbinsert.py399
-rw-r--r--uploader/expression_data/index.py125
-rw-r--r--uploader/expression_data/parse.py178
-rw-r--r--uploader/expression_data/rqtl2.py1175
-rw-r--r--uploader/expression_data/views.py384
6 files changed, 784 insertions, 1488 deletions
diff --git a/uploader/expression_data/__init__.py b/uploader/expression_data/__init__.py
index 206a764..fc8bd41 100644
--- a/uploader/expression_data/__init__.py
+++ b/uploader/expression_data/__init__.py
@@ -1,11 +1,2 @@
"""Package handling upload of files."""
-from flask import Blueprint
-
-from .rqtl2 import rqtl2
-from .index import indexbp
-from .parse import parsebp
-
-exprdatabp = Blueprint("expression-data", __name__)
-exprdatabp.register_blueprint(indexbp, url_prefix="/")
-exprdatabp.register_blueprint(rqtl2, url_prefix="/rqtl2")
-exprdatabp.register_blueprint(parsebp, url_prefix="/parse")
+from .views import exprdatabp
diff --git a/uploader/expression_data/dbinsert.py b/uploader/expression_data/dbinsert.py
new file mode 100644
index 0000000..32ca359
--- /dev/null
+++ b/uploader/expression_data/dbinsert.py
@@ -0,0 +1,399 @@
+"Handle inserting data into the database"
+import os
+import json
+from typing import Union
+from functools import reduce
+from datetime import datetime
+
+from redis import Redis
+from MySQLdb.cursors import DictCursor
+from flask import (
+ flash, request, url_for, Blueprint, redirect, render_template,
+ current_app as app)
+
+from uploader import jobs
+from uploader.authorisation import require_login
+from uploader.population.models import populations_by_species
+from uploader.species.models import all_species, species_by_id
+from uploader.platforms.models import platform_by_species_and_id
+from uploader.db_utils import with_db_connection, database_connection
+
+dbinsertbp = Blueprint("dbinsert", __name__)
+
+def render_error(error_msg):
+ "Render the generic error page"
+ return render_template("dbupdate_error.html", error_message=error_msg), 400
+
+def make_menu_items_grouper(grouping_fn=lambda item: item):
+ "Build function to be used to group menu items."
+ def __grouper__(acc, row):
+ grouping = grouping_fn(row[2])
+ row_values = (row[0].strip(), row[1].strip())
+ if acc.get(grouping) is None:
+ return {**acc, grouping: (row_values,)}
+ return {**acc, grouping: (acc[grouping] + (row_values,))}
+ return __grouper__
+
+def genechips():
+ "Retrieve the genechip information from the database"
+ def __organise_by_species__(acc, chip):
+ speciesid = chip["SpeciesId"]
+ if acc.get(speciesid) is None:
+ return {**acc, speciesid: (chip,)}
+ return {**acc, speciesid: acc[speciesid] + (chip,)}
+
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute("SELECT * FROM GeneChip ORDER BY GeneChipName ASC")
+ return reduce(__organise_by_species__, cursor.fetchall(), {})
+
+ return {}
+
+
+def studies_by_species_and_platform(speciesid:int, genechipid:int) -> tuple:
+ "Retrieve the studies by the related species and gene platform"
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ query = (
+ "SELECT Species.SpeciesId, ProbeFreeze.* "
+ "FROM Species INNER JOIN InbredSet "
+ "ON Species.SpeciesId=InbredSet.SpeciesId "
+ "INNER JOIN ProbeFreeze "
+ "ON InbredSet.InbredSetId=ProbeFreeze.InbredSetId "
+ "WHERE Species.SpeciesId = %s "
+ "AND ProbeFreeze.ChipId = %s")
+ cursor.execute(query, (speciesid, genechipid))
+ return tuple(cursor.fetchall())
+
+ return tuple()
+
+def organise_groups_by_family(acc:dict, group:dict) -> dict:
+ "Organise the group (InbredSet) information by the group field"
+ family = group["Family"]
+ if acc.get(family):
+ return {**acc, family: acc[family] + (group,)}
+ return {**acc, family: (group,)}
+
+def tissues() -> tuple:
+ "Retrieve type (Tissue) information from the database."
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute("SELECT * FROM Tissue ORDER BY Name")
+ return tuple(cursor.fetchall())
+
+ return tuple()
+
+@dbinsertbp.route("/platform", methods=["POST"])
+@require_login
+def select_platform():
+ "Select the platform (GeneChipId) used for the data."
+ job_id = request.form["job_id"]
+ with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+ database_connection(app.config["SQL_URI"]) as conn):
+ job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+ if job:
+ filename = job["filename"]
+ filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}"
+ if os.path.exists(filepath):
+ default_species = 1
+ gchips = genechips()
+ return render_template(
+ "select_platform.html", filename=filename,
+ filetype=job["filetype"], totallines=int(job["currentline"]),
+ default_species=default_species, species=all_species(conn),
+ genechips=gchips[default_species],
+ genechips_data=json.dumps(gchips))
+ return render_error(f"File '{filename}' no longer exists.")
+ return render_error(f"Job '{job_id}' no longer exists.")
+ return render_error("Unknown error")
+
+@dbinsertbp.route("/study", methods=["POST"])
+@require_login
+def select_study():
+ "View to select/create the study (ProbeFreeze) associated with the data."
+ form = request.form
+ try:
+ assert form.get("filename"), "filename"
+ assert form.get("filetype"), "filetype"
+ assert form.get("species"), "species"
+ assert form.get("genechipid"), "platform"
+
+ speciesid = form["species"]
+ genechipid = form["genechipid"]
+
+ the_studies = studies_by_species_and_platform(speciesid, genechipid)
+ the_groups = reduce(
+ organise_groups_by_family,
+ with_db_connection(
+ lambda conn: populations_by_species(conn, speciesid)),
+ {})
+ return render_template(
+ "select_study.html", filename=form["filename"],
+ filetype=form["filetype"], totallines=form["totallines"],
+ species=speciesid, genechipid=genechipid, studies=the_studies,
+ groups=the_groups, tissues = tissues(),
+ selected_group=int(form.get("inbredsetid", -13)),
+ selected_tissue=int(form.get("tissueid", -13)))
+ except AssertionError as aserr:
+ return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/create-study", methods=["POST"])
+@require_login
+def create_study():
+ "Create a new study (ProbeFreeze)."
+ form = request.form
+ try:
+ assert form.get("filename"), "filename"
+ assert form.get("filetype"), "filetype"
+ assert form.get("species"), "species"
+ assert form.get("genechipid"), "platform"
+ assert form.get("studyname"), "study name"
+ assert form.get("inbredsetid"), "group"
+ assert form.get("tissueid"), "type/tissue"
+
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ values = (
+ form["genechipid"],
+ form["tissueid"],
+ form["studyname"],
+ form.get("studyfullname", ""),
+ form.get("studyshortname", ""),
+ datetime.now().date().strftime("%Y-%m-%d"),
+ form["inbredsetid"])
+ query = (
+ "INSERT INTO ProbeFreeze("
+ "ChipId, TissueId, Name, FullName, ShortName, CreateTime, "
+ "InbredSetId"
+ ") VALUES (%s, %s, %s, %s, %s, %s, %s)")
+ cursor.execute(query, values)
+ new_studyid = cursor.lastrowid
+ cursor.execute(
+ "UPDATE ProbeFreeze SET ProbeFreezeId=%s WHERE Id=%s",
+ (new_studyid, new_studyid))
+ flash("Study created successfully", "alert-success")
+ return render_template(
+ "continue_from_create_study.html",
+ filename=form["filename"], filetype=form["filetype"],
+ totallines=form["totallines"], species=form["species"],
+ genechipid=form["genechipid"], studyid=new_studyid)
+ except AssertionError as aserr:
+ flash(f"Missing data: {aserr.args[0]}", "alert-error")
+ return redirect(url_for("dbinsert.select_study"), code=307)
+
+def datasets_by_study(studyid:int) -> tuple:
+ "Retrieve datasets associated with a study with the ID `studyid`."
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ query = "SELECT * FROM ProbeSetFreeze WHERE ProbeFreezeId=%s"
+ cursor.execute(query, (studyid,))
+ return tuple(cursor.fetchall())
+
+ return tuple()
+
+def averaging_methods() -> tuple:
+ "Retrieve averaging methods from database"
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute("SELECT * FROM AvgMethod")
+ return tuple(cursor.fetchall())
+
+ return tuple()
+
+def dataset_datascales() -> tuple:
+ "Retrieve datascales from database"
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor() as cursor:
+ cursor.execute(
+ 'SELECT DISTINCT DataScale FROM ProbeSetFreeze '
+ 'WHERE DataScale IS NOT NULL AND DataScale != ""')
+ return tuple(
+ item for item in
+ (res[0].strip() for res in cursor.fetchall())
+ if (item is not None and item != ""))
+
+ return tuple()
+
+@dbinsertbp.route("/dataset", methods=["POST"])
+@require_login
+def select_dataset():
+ "Select the dataset to add the file contents against"
+ form = request.form
+ try:
+ assert form.get("filename"), "filename"
+ assert form.get("filetype"), "filetype"
+ assert form.get("species"), "species"
+ assert form.get("genechipid"), "platform"
+ assert form.get("studyid"), "study"
+
+ studyid = form["studyid"]
+ datasets = datasets_by_study(studyid)
+ return render_template(
+ "select_dataset.html", **{**form, "studyid": studyid},
+ datasets=datasets, avgmethods=averaging_methods(),
+ datascales=dataset_datascales())
+ except AssertionError as aserr:
+ return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/create-dataset", methods=["POST"])
+@require_login
+def create_dataset():
+ "Select the dataset to add the file contents against"
+ form = request.form
+ try:
+ assert form.get("filename"), "filename"
+ assert form.get("filetype"), "filetype"
+ assert form.get("species"), "species"
+ assert form.get("genechipid"), "platform"
+ assert form.get("studyid"), "study"
+ assert form.get("avgid"), "averaging method"
+ assert form.get("datasetname2"), "Dataset Name 2"
+ assert form.get("datasetfullname"), "Dataset Full Name"
+ assert form.get("datasetshortname"), "Dataset Short Name"
+ assert form.get("datasetpublic"), "Dataset public specification"
+ assert form.get("datasetconfidentiality"), "Dataset confidentiality"
+ assert form.get("datasetdatascale"), "Dataset Datascale"
+
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ datasetname = form["datasetname"]
+ cursor.execute("SELECT * FROM ProbeSetFreeze WHERE Name=%s",
+ (datasetname,))
+ results = cursor.fetchall()
+ if bool(results):
+ flash("A dataset with that name already exists.",
+ "alert-error")
+ return redirect(url_for("dbinsert.select_dataset"), code=307)
+ values = (
+ form["studyid"], form["avgid"],
+ datasetname, form["datasetname2"],
+ form["datasetfullname"], form["datasetshortname"],
+ datetime.now().date().strftime("%Y-%m-%d"),
+ form["datasetpublic"], form["datasetconfidentiality"],
+ "williamslab", form["datasetdatascale"])
+ query = (
+ "INSERT INTO ProbeSetFreeze("
+ "ProbeFreezeId, AvgID, Name, Name2, FullName, "
+ "ShortName, CreateTime, OrderList, public, "
+ "confidentiality, AuthorisedUsers, DataScale) "
+ "VALUES"
+ "(%s, %s, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s)")
+ cursor.execute(query, values)
+ new_datasetid = cursor.lastrowid
+ return render_template(
+ "continue_from_create_dataset.html",
+ filename=form["filename"], filetype=form["filetype"],
+ species=form["species"], genechipid=form["genechipid"],
+ studyid=form["studyid"], datasetid=new_datasetid,
+ totallines=form["totallines"])
+ except AssertionError as aserr:
+ flash(f"Missing data {aserr.args[0]}", "alert-error")
+ return redirect(url_for("dbinsert.select_dataset"), code=307)
+
+def study_by_id(studyid:int) -> Union[dict, None]:
+ "Get a study by its Id"
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ "SELECT * FROM ProbeFreeze WHERE Id=%s",
+ (studyid,))
+ return cursor.fetchone()
+
+def dataset_by_id(datasetid:int) -> Union[dict, None]:
+ "Retrieve a dataset by its id"
+ with database_connection(app.config["SQL_URI"]) as conn:
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute(
+ ("SELECT AvgMethod.Name AS AvgMethodName, ProbeSetFreeze.* "
+ "FROM ProbeSetFreeze INNER JOIN AvgMethod "
+ "ON ProbeSetFreeze.AvgId=AvgMethod.AvgMethodId "
+ "WHERE ProbeSetFreeze.Id=%s"),
+ (datasetid,))
+ return cursor.fetchone()
+
+def selected_keys(original: dict, keys: tuple) -> dict:
+ "Return a new dict from the `original` dict with only `keys` present."
+ return {key: value for key,value in original.items() if key in keys}
+
+@dbinsertbp.route("/final-confirmation", methods=["POST"])
+@require_login
+def final_confirmation():
+ "Preview the data before triggering entry into the database"
+ with database_connection(app.config["SQL_URI"]) as conn:
+ form = request.form
+ try:
+ assert form.get("filename"), "filename"
+ assert form.get("filetype"), "filetype"
+ assert form.get("species"), "species"
+ assert form.get("genechipid"), "platform"
+ assert form.get("studyid"), "study"
+ assert form.get("datasetid"), "dataset"
+
+ speciesid = form["species"]
+ genechipid = form["genechipid"]
+ studyid = form["studyid"]
+ datasetid=form["datasetid"]
+ return render_template(
+ "final_confirmation.html", filename=form["filename"],
+ filetype=form["filetype"], totallines=form["totallines"],
+ species=speciesid, genechipid=genechipid, studyid=studyid,
+ datasetid=datasetid, the_species=selected_keys(
+ with_db_connection(lambda conn: species_by_id(conn, speciesid)),
+ ("SpeciesName", "Name", "MenuName")),
+ platform=selected_keys(
+ platform_by_species_and_id(conn, speciesid, genechipid),
+ ("GeneChipName", "Name", "GeoPlatform", "Title", "GO_tree_value")),
+ study=selected_keys(
+ study_by_id(studyid), ("Name", "FullName", "ShortName")),
+ dataset=selected_keys(
+ dataset_by_id(datasetid),
+ ("AvgMethodName", "Name", "Name2", "FullName", "ShortName",
+ "DataScale")))
+ except AssertionError as aserr:
+ return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/insert-data", methods=["POST"])
+@require_login
+def insert_data():
+ "Trigger data insertion"
+ form = request.form
+ try:
+ assert form.get("filename"), "filename"
+ assert form.get("filetype"), "filetype"
+ assert form.get("species"), "species"
+ assert form.get("genechipid"), "platform"
+ assert form.get("studyid"), "study"
+ assert form.get("datasetid"), "dataset"
+
+ filename = form["filename"]
+ filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}"
+ redisurl = app.config["REDIS_URL"]
+ if os.path.exists(filepath):
+ with Redis.from_url(redisurl, decode_responses=True) as rconn:
+ job = jobs.launch_job(
+ jobs.data_insertion_job(
+ rconn, filepath, form["filetype"], form["totallines"],
+ form["species"], form["genechipid"], form["datasetid"],
+ app.config["SQL_URI"], redisurl,
+ app.config["JOBS_TTL_SECONDS"]),
+ redisurl, f"{app.config['UPLOAD_FOLDER']}/job_errors")
+
+ return redirect(url_for("dbinsert.insert_status", job_id=job["jobid"]))
+ return render_error(f"File '{filename}' no longer exists.")
+ except AssertionError as aserr:
+ return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/status/<job_id>", methods=["GET"])
+def insert_status(job_id: str):
+ "Retrieve status of data insertion."
+ with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+ job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+ if job:
+ job_status = job["status"]
+ if job_status == "success":
+ return render_template("insert_success.html", job=job)
+ if job["status"] == "error":
+ return render_template("insert_error.html", job=job)
+ return render_template("insert_progress.html", job=job)
+ return render_template("no_such_job.html", job_id=job_id), 400
diff --git a/uploader/expression_data/index.py b/uploader/expression_data/index.py
deleted file mode 100644
index db23136..0000000
--- a/uploader/expression_data/index.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""Entry-point module"""
-import os
-import mimetypes
-from typing import Tuple
-from zipfile import ZipFile, is_zipfile
-
-from werkzeug.utils import secure_filename
-from flask import (
- flash,
- request,
- url_for,
- redirect,
- Blueprint,
- render_template,
- current_app as app)
-
-from uploader.species.models import all_species as species
-from uploader.authorisation import require_login
-from uploader.db_utils import with_db_connection
-
-indexbp = Blueprint("index", __name__)
-
-
-def errors(rqst) -> Tuple[str, ...]:
- """Return a tuple of the errors found in the request `rqst`. If no error is
- found, then an empty tuple is returned."""
- def __filetype_error__():
- return (
- ("Invalid file type provided.",)
- if rqst.form.get("filetype") not in ("average", "standard-error")
- else tuple())
-
- def __file_missing_error__():
- return (
- ("No file was uploaded.",)
- if ("qc_text_file" not in rqst.files or
- rqst.files["qc_text_file"].filename == "")
- else tuple())
-
- def __file_mimetype_error__():
- text_file = rqst.files["qc_text_file"]
- return (
- (
- ("Invalid file! Expected a tab-separated-values file, or a zip "
- "file of the a tab-separated-values file."),)
- if text_file.mimetype not in (
- "text/plain", "text/tab-separated-values",
- "application/zip")
- else tuple())
-
- return (
- __filetype_error__() +
- (__file_missing_error__() or __file_mimetype_error__()))
-
-def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
- """Check the uploaded zip file for errors."""
- zfile_errors: Tuple[str, ...] = tuple()
- if is_zipfile(filepath):
- with ZipFile(filepath, "r") as zfile:
- infolist = zfile.infolist()
- if len(infolist) != 1:
- zfile_errors = zfile_errors + (
- ("Expected exactly one (1) member file within the uploaded zip "
- f"file. Got {len(infolist)} member files."),)
- if len(infolist) == 1 and infolist[0].is_dir():
- zfile_errors = zfile_errors + (
- ("Expected a member text file in the uploaded zip file. Got a "
- "directory/folder."),)
-
- if len(infolist) == 1 and not infolist[0].is_dir():
- zfile.extract(infolist[0], path=upload_dir)
- mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
- if mime[0] != "text/tab-separated-values":
- zfile_errors = zfile_errors + (
- ("Expected the member text file in the uploaded zip file to"
- " be a tab-separated file."),)
-
- return zfile_errors
-
-
-@indexbp.route("/", methods=["GET"])
-@require_login
-def index():
- """Display the expression data index page."""
- return render_template("expression-data/index.html")
-
-
-@indexbp.route("/upload", methods=["GET", "POST"])
-@require_login
-def upload_file():
- """Enables uploading the files"""
- if request.method == "GET":
- return render_template(
- "select_species.html", species=with_db_connection(species))
-
- upload_dir = app.config["UPLOAD_FOLDER"]
- request_errors = errors(request)
- if request_errors:
- for error in request_errors:
- flash(error, "alert-danger error-expr-data")
- return redirect(url_for("expression-data.index.upload_file"))
-
- filename = secure_filename(request.files["qc_text_file"].filename)
- if not os.path.exists(upload_dir):
- os.mkdir(upload_dir)
-
- filepath = os.path.join(upload_dir, filename)
- request.files["qc_text_file"].save(os.path.join(upload_dir, filename))
-
- zip_errors = zip_file_errors(filepath, upload_dir)
- if zip_errors:
- for error in zip_errors:
- flash(error, "alert-danger error-expr-data")
- return redirect(url_for("expression-data.index.upload_file"))
-
- return redirect(url_for("expression-data.parse.parse",
- speciesid=request.form["speciesid"],
- filename=filename,
- filetype=request.form["filetype"]))
-
-@indexbp.route("/data-review", methods=["GET"])
-@require_login
-def data_review():
- """Provide some help on data expectations to the user."""
- return render_template("data_review.html")
diff --git a/uploader/expression_data/parse.py b/uploader/expression_data/parse.py
deleted file mode 100644
index fc1c3f0..0000000
--- a/uploader/expression_data/parse.py
+++ /dev/null
@@ -1,178 +0,0 @@
-"""File parsing module"""
-import os
-
-import jsonpickle
-from redis import Redis
-from flask import flash, request, url_for, redirect, Blueprint, render_template
-from flask import current_app as app
-
-from quality_control.errors import InvalidValue, DuplicateHeading
-
-from uploader import jobs
-from uploader.dbinsert import species_by_id
-from uploader.db_utils import with_db_connection
-from uploader.authorisation import require_login
-
-parsebp = Blueprint("parse", __name__)
-
-def isinvalidvalue(item):
- """Check whether item is of type InvalidValue"""
- return isinstance(item, InvalidValue)
-
-def isduplicateheading(item):
- """Check whether item is of type DuplicateHeading"""
- return isinstance(item, DuplicateHeading)
-
-@parsebp.route("/parse", methods=["GET"])
-@require_login
-def parse():
- """Trigger file parsing"""
- errors = False
- speciesid = request.args.get("speciesid")
- filename = request.args.get("filename")
- filetype = request.args.get("filetype")
- if speciesid is None:
- flash("No species selected", "alert-error error-expr-data")
- errors = True
- else:
- try:
- speciesid = int(speciesid)
- species = with_db_connection(
- lambda con: species_by_id(con, speciesid))
- if not bool(species):
- flash("No such species.", "alert-error error-expr-data")
- errors = True
- except ValueError:
- flash("Invalid speciesid provided. Expected an integer.",
- "alert-error error-expr-data")
- errors = True
-
- if filename is None:
- flash("No file provided", "alert-error error-expr-data")
- errors = True
-
- if filetype is None:
- flash("No filetype provided", "alert-error error-expr-data")
- errors = True
-
- if filetype not in ("average", "standard-error"):
- flash("Invalid filetype provided", "alert-error error-expr-data")
- errors = True
-
- if filename:
- filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
- if not os.path.exists(filepath):
- flash("Selected file does not exist (any longer)",
- "alert-error error-expr-data")
- errors = True
-
- if errors:
- return redirect(url_for("expression-data.index.upload_file"))
-
- redisurl = app.config["REDIS_URL"]
- with Redis.from_url(redisurl, decode_responses=True) as rconn:
- job = jobs.launch_job(
- jobs.build_file_verification_job(
- rconn, app.config["SQL_URI"], redisurl,
- speciesid, filepath, filetype,
- app.config["JOBS_TTL_SECONDS"]),
- redisurl,
- f"{app.config['UPLOAD_FOLDER']}/job_errors")
-
- return redirect(url_for("expression-data.parse.parse_status", job_id=job["jobid"]))
-
-@parsebp.route("/status/<job_id>", methods=["GET"])
-def parse_status(job_id: str):
- "Retrieve the status of the job"
- with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
- try:
- job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
- except jobs.JobNotFound as _exc:
- return render_template("no_such_job.html", job_id=job_id), 400
-
- error_filename = jobs.error_filename(
- job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
- if os.path.exists(error_filename):
- stat = os.stat(error_filename)
- if stat.st_size > 0:
- return redirect(url_for("parse.fail", job_id=job_id))
-
- job_id = job["jobid"]
- progress = float(job["percent"])
- status = job["status"]
- filename = job.get("filename", "uploaded file")
- errors = jsonpickle.decode(
- job.get("errors", jsonpickle.encode(tuple())))
- if status in ("success", "aborted"):
- return redirect(url_for("expression-data.parse.results", job_id=job_id))
-
- if status == "parse-error":
- return redirect(url_for("parse.fail", job_id=job_id))
-
- app.jinja_env.globals.update(
- isinvalidvalue=isinvalidvalue,
- isduplicateheading=isduplicateheading)
- return render_template(
- "job_progress.html",
- job_id = job_id,
- job_status = status,
- progress = progress,
- message = job.get("message", ""),
- job_name = f"Parsing '{filename}'",
- errors=errors)
-
-@parsebp.route("/results/<job_id>", methods=["GET"])
-def results(job_id: str):
- """Show results of parsing..."""
- with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
- job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
-
- if job:
- filename = job["filename"]
- errors = jsonpickle.decode(job.get("errors", jsonpickle.encode(tuple())))
- app.jinja_env.globals.update(
- isinvalidvalue=isinvalidvalue,
- isduplicateheading=isduplicateheading)
- return render_template(
- "parse_results.html",
- errors=errors,
- job_name = f"Parsing '{filename}'",
- user_aborted = job.get("user_aborted"),
- job_id=job["jobid"])
-
- return render_template("no_such_job.html", job_id=job_id)
-
-@parsebp.route("/fail/<job_id>", methods=["GET"])
-def fail(job_id: str):
- """Handle parsing failure"""
- with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
- job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
-
- if job:
- error_filename = jobs.error_filename(
- job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
- if os.path.exists(error_filename):
- stat = os.stat(error_filename)
- if stat.st_size > 0:
- return render_template(
- "worker_failure.html", job_id=job_id)
-
- return render_template("parse_failure.html", job=job)
-
- return render_template("no_such_job.html", job_id=job_id)
-
-@parsebp.route("/abort", methods=["POST"])
-@require_login
-def abort():
- """Handle user request to abort file processing"""
- job_id = request.form["job_id"]
-
- with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
- job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
-
- if job:
- rconn.hset(name=jobs.job_key(jobs.jobsnamespace(), job_id),
- key="user_aborted",
- value=int(True))
-
- return redirect(url_for("expression-data.parse.parse_status", job_id=job_id))
diff --git a/uploader/expression_data/rqtl2.py b/uploader/expression_data/rqtl2.py
deleted file mode 100644
index a855699..0000000
--- a/uploader/expression_data/rqtl2.py
+++ /dev/null
@@ -1,1175 +0,0 @@
-"""Module to handle uploading of R/qtl2 bundles."""#pylint: disable=[too-many-lines]
-import sys
-import json
-import traceback
-from pathlib import Path
-from datetime import date
-from uuid import UUID, uuid4
-from functools import partial
-from zipfile import ZipFile, is_zipfile
-from typing import Union, Callable, Optional
-
-import MySQLdb as mdb
-from redis import Redis
-from MySQLdb.cursors import DictCursor
-from werkzeug.utils import secure_filename
-from flask import (
- flash,
- escape,
- request,
- jsonify,
- url_for,
- redirect,
- Response,
- Blueprint,
- render_template,
- current_app as app)
-
-from r_qtl import r_qtl2
-
-from uploader import jobs
-from uploader.files import save_file, fullpath
-from uploader.dbinsert import species as all_species
-from uploader.db_utils import with_db_connection, database_connection
-
-from uploader.authorisation import require_login
-from uploader.db.platforms import platform_by_id, platforms_by_species
-from uploader.db.averaging import averaging_methods, averaging_method_by_id
-from uploader.db.tissues import all_tissues, tissue_by_id, create_new_tissue
-from uploader.population.models import (save_population,
- populations_by_species,
- population_by_species_and_id)
-from uploader.species.models import species_by_id
-from uploader.db.datasets import (
- geno_dataset_by_id,
- geno_datasets_by_species_and_population,
-
- probeset_study_by_id,
- probeset_create_study,
- probeset_dataset_by_id,
- probeset_create_dataset,
- probeset_datasets_by_study,
- probeset_studies_by_species_and_population)
-
-rqtl2 = Blueprint("rqtl2", __name__)
-
-
-@rqtl2.route("/", methods=["GET", "POST"])
-@rqtl2.route("/select-species", methods=["GET", "POST"])
-@require_login
-def select_species():
- """Select the species."""
- if request.method == "GET":
- return render_template("rqtl2/index.html", species=with_db_connection(all_species))
-
- species_id = request.form.get("species_id")
- species = with_db_connection(
- lambda conn: species_by_id(conn, species_id))
- if bool(species):
- return redirect(url_for(
- "expression-data.rqtl2.select_population", species_id=species_id))
- flash("Invalid species or no species selected!", "alert-error error-rqtl2")
- return redirect(url_for("expression-data.rqtl2.select_species"))
-
-
-@rqtl2.route("/upload/species/<int:species_id>/select-population",
- methods=["GET", "POST"])
-@require_login
-def select_population(species_id: int):
- """Select/Create the population to organise data under."""
- with database_connection(app.config["SQL_URI"]) as conn:
- species = species_by_id(conn, species_id)
- if not bool(species):
- flash("Invalid species selected!", "alert-error error-rqtl2")
- return redirect(url_for("expression-data.rqtl2.select_species"))
-
- if request.method == "GET":
- return render_template(
- "rqtl2/select-population.html",
- species=species,
- populations=populations_by_species(conn, species_id))
-
- population = population_by_species_and_id(
- conn, species["SpeciesId"], request.form.get("inbredset_id"))
- if not bool(population):
- flash("Invalid Population!", "alert-error error-rqtl2")
- return redirect(
- url_for("expression-data.rqtl2.select_population", pgsrc="error"),
- code=307)
-
- return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle",
- species_id=species["SpeciesId"],
- population_id=population["InbredSetId"]))
-
-
-@rqtl2.route("/upload/species/<int:species_id>/create-population",
- methods=["POST"])
-@require_login
-def create_population(species_id: int):
- """Create a new population for the given species."""
- population_page = redirect(url_for("expression-data.rqtl2.select_population",
- species_id=species_id))
- with database_connection(app.config["SQL_URI"]) as conn:
- species = species_by_id(conn, species_id)
- population_name = request.form.get("inbredset_name", "").strip()
- population_fullname = request.form.get("inbredset_fullname", "").strip()
- if not bool(species):
- flash("Invalid species!", "alert-error error-rqtl2")
- return redirect(url_for("expression-data.rqtl2.select_species"))
- if not bool(population_name):
- flash("Invalid Population Name!", "alert-error error-rqtl2")
- return population_page
- if not bool(population_fullname):
- flash("Invalid Population Full Name!", "alert-error error-rqtl2")
- return population_page
- new_population = save_population(conn, {
- "SpeciesId": species["SpeciesId"],
- "Name": population_name,
- "InbredSetName": population_fullname,
- "FullName": population_fullname,
- "Family": request.form.get("inbredset_family") or None,
- "Description": request.form.get("description") or None
- })
-
- flash("Population created successfully.", "alert-success")
- return redirect(
- url_for("expression-data.rqtl2.upload_rqtl2_bundle",
- species_id=species_id,
- population_id=new_population["population_id"],
- pgsrc="create-population"),
- code=307)
-
-
-class __RequestError__(Exception): #pylint: disable=[invalid-name]
- """Internal class to avoid pylint's `too-many-return-statements` error."""
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle"),
- methods=["GET", "POST"])
-@require_login
-def upload_rqtl2_bundle(species_id: int, population_id: int):
- """Allow upload of R/qtl2 bundle."""
- with database_connection(app.config["SQL_URI"]) as conn:
- species = species_by_id(conn, species_id)
- population = population_by_species_and_id(
- conn, species["SpeciesId"], population_id)
- if not bool(species):
- flash("Invalid species!", "alert-error error-rqtl2")
- return redirect(url_for("expression-data.rqtl2.select_species"))
- if not bool(population):
- flash("Invalid Population!", "alert-error error-rqtl2")
- return redirect(
- url_for("expression-data.rqtl2.select_population", pgsrc="error"),
- code=307)
- if request.method == "GET" or (
- request.method == "POST"
- and bool(request.args.get("pgsrc"))):
- return render_template("rqtl2/upload-rqtl2-bundle-step-01.html",
- species=species,
- population=population)
-
- try:
- app.logger.debug("Files in the form: %s", request.files)
- the_file = save_file(request.files["rqtl2_bundle_file"],
- Path(app.config["UPLOAD_FOLDER"]))
- except AssertionError:
- app.logger.debug(traceback.format_exc())
- flash("Please provide a valid R/qtl2 zip bundle.",
- "alert-error error-rqtl2")
- return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle",
- species_id=species_id,
- population_id=population_id))
-
- if not is_zipfile(str(the_file)):
- app.logger.debug("The file is not a zip file.")
- raise __RequestError__("Invalid file! Expected a zip file.")
-
- jobid = trigger_rqtl2_bundle_qc(
- species_id,
- population_id,
- the_file,
- request.files["rqtl2_bundle_file"].filename)#type: ignore[arg-type]
- return redirect(url_for(
- "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid))
-
-
-def trigger_rqtl2_bundle_qc(
- species_id: int,
- population_id: int,
- rqtl2bundle: Path,
- originalfilename: str
-) -> UUID:
- """Trigger QC on the R/qtl2 bundle."""
- redisuri = app.config["REDIS_URL"]
- with Redis.from_url(redisuri, decode_responses=True) as rconn:
- jobid = uuid4()
- redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
- jobs.launch_job(
- jobs.initialise_job(
- rconn,
- jobs.jobsnamespace(),
- str(jobid),
- [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle",
- app.config["SQL_URI"], app.config["REDIS_URL"],
- jobs.jobsnamespace(), str(jobid), str(species_id),
- str(population_id), "--redisexpiry",
- str(redis_ttl_seconds)],
- "rqtl2-bundle-qc-job",
- redis_ttl_seconds,
- {"job-metadata": json.dumps({
- "speciesid": species_id,
- "populationid": population_id,
- "rqtl2-bundle-file": str(rqtl2bundle.absolute()),
- "original-filename": originalfilename})}),
- redisuri,
- f"{app.config['UPLOAD_FOLDER']}/job_errors")
- return jobid
-
-
-def chunk_name(uploadfilename: str, chunkno: int) -> str:
- """Generate chunk name from original filename and chunk number"""
- if uploadfilename == "":
- raise ValueError("Name cannot be empty!")
- if chunkno < 1:
- raise ValueError("Chunk number must be greater than zero")
- return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}"
-
-
-def chunks_directory(uniqueidentifier: str) -> Path:
- """Compute the directory where chunks are temporarily stored."""
- if uniqueidentifier == "":
- raise ValueError("Unique identifier cannot be empty!")
- return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}")
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle-chunked"),
- methods=["GET"])
-@require_login
-def upload_rqtl2_bundle_chunked_get(# pylint: disable=["unused-argument"]
- species_id: int,
- population_id: int
-):
- """
- Extension to the `upload_rqtl2_bundle` endpoint above that provides a way
- for testing whether all the chunks have been uploaded and to assist with
- resuming a failed expression-data.
- """
- fileid = request.args.get("resumableIdentifier", type=str) or ""
- filename = request.args.get("resumableFilename", type=str) or ""
- chunk = request.args.get("resumableChunkNumber", type=int) or 0
- if not(fileid or filename or chunk):
- return jsonify({
- "message": "At least one required query parameter is missing.",
- "error": "BadRequest",
- "statuscode": 400
- }), 400
-
- if Path(chunks_directory(fileid),
- chunk_name(filename, chunk)).exists():
- return "OK"
-
- return jsonify({
- "message": f"Chunk {chunk} was not found.",
- "error": "NotFound",
- "statuscode": 404
- }), 404
-
-
-def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path:
- """Merge the chunks into a single file."""
- with open(targetfile, "ab") as _target:
- for chunkfile in chunkpaths:
- with open(chunkfile, "rb") as _chunkdata:
- _target.write(_chunkdata.read())
-
- chunkfile.unlink()
- return targetfile
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle-chunked"),
- methods=["POST"])
-@require_login
-def upload_rqtl2_bundle_chunked_post(species_id: int, population_id: int):
- """
- Extension to the `upload_rqtl2_bundle` endpoint above that allows large
- files to be uploaded in chunks.
-
- This should hopefully speed up uploads, and if done right, even enable
- resumable uploads
- """
- _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0
- _chunk = request.form.get("resumableChunkNumber", default=1, type=int)
- _uploadfilename = request.form.get(
- "resumableFilename", default="", type=str) or ""
- _fileid = request.form.get(
- "resumableIdentifier", default="", type=str) or ""
- _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid)
-
- if _targetfile.exists():
- return jsonify({
- "message": (
- "A file with a similar unique identifier has previously been "
- "uploaded and possibly is/has being/been processed."),
- "error": "BadRequest",
- "statuscode": 400
- }), 400
-
- try:
- # save chunk data
- chunks_directory(_fileid).mkdir(exist_ok=True, parents=True)
- request.files["file"].save(Path(chunks_directory(_fileid),
- chunk_name(_uploadfilename, _chunk)))
-
- # Check whether upload is complete
- chunkpaths = tuple(
- Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk))
- for _achunk in range(1, _totalchunks+1))
- if all(_file.exists() for _file in chunkpaths):
- # merge_files and clean up chunks
- __merge_chunks__(_targetfile, chunkpaths)
- chunks_directory(_fileid).rmdir()
- jobid = trigger_rqtl2_bundle_qc(
- species_id, population_id, _targetfile, _uploadfilename)
- return url_for(
- "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)
- except Exception as exc:# pylint: disable=[broad-except]
- msg = "Error processing uploaded file chunks."
- app.logger.error(msg, exc_info=True, stack_info=True)
- return jsonify({
- "message": msg,
- "error": type(exc).__name__,
- "error-description": " ".join(str(arg) for arg in exc.args),
- "error-trace": traceback.format_exception(exc)
- }), 500
-
- return "OK"
-
-
-@rqtl2.route("/upload/species/rqtl2-bundle/qc-status/<uuid:jobid>",
- methods=["GET", "POST"])
-@require_login
-def rqtl2_bundle_qc_status(jobid: UUID):
- """Check the status of the QC jobs."""
- with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
- database_connection(app.config["SQL_URI"]) as dbconn):
- try:
- thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid)
- messagelistname = thejob.get("log-messagelist")
- logmessages = (rconn.lrange(messagelistname, 0, -1)
- if bool(messagelistname) else [])
- jobstatus = thejob["status"]
- if jobstatus == "error":
- return render_template("rqtl2/rqtl2-qc-job-error.html",
- job=thejob,
- errorsgeneric=json.loads(
- thejob.get("errors-generic", "[]")),
- errorsgeno=json.loads(
- thejob.get("errors-geno", "[]")),
- errorspheno=json.loads(
- thejob.get("errors-pheno", "[]")),
- errorsphenose=json.loads(
- thejob.get("errors-phenose", "[]")),
- errorsphenocovar=json.loads(
- thejob.get("errors-phenocovar", "[]")),
- messages=logmessages)
- if jobstatus == "success":
- jobmeta = json.loads(thejob["job-metadata"])
- species = species_by_id(dbconn, jobmeta["speciesid"])
- return render_template(
- "rqtl2/rqtl2-qc-job-results.html",
- species=species,
- population=population_by_species_and_id(
- dbconn, species["SpeciesId"], jobmeta["populationid"]),
- rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]).name,
- rqtl2bundleorig=jobmeta["original-filename"])
-
- def compute_percentage(thejob, filetype) -> Union[str, None]:
- if f"{filetype}-linecount" in thejob:
- return "100"
- if f"{filetype}-filesize" in thejob:
- percent = ((int(thejob.get(f"{filetype}-checked", 0))
- /
- int(thejob.get(f"{filetype}-filesize", 1)))
- * 100)
- return f"{percent:.2f}"
- return None
-
- return render_template(
- "rqtl2/rqtl2-qc-job-status.html",
- job=thejob,
- geno_percent=compute_percentage(thejob, "geno"),
- pheno_percent=compute_percentage(thejob, "pheno"),
- phenose_percent=compute_percentage(thejob, "phenose"),
- messages=logmessages)
- except jobs.JobNotFound:
- return render_template("rqtl2/no-such-job.html", jobid=jobid)
-
-
-def redirect_on_error(flaskroute, **kwargs):
- """Utility to redirect on error"""
- return redirect(url_for(flaskroute, **kwargs, pgsrc="error"),
- code=(307 if request.method == "POST" else 302))
-
-
-def check_species(conn: mdb.Connection, formargs: dict) -> Optional[
- tuple[str, Response]]:
- """
- Check whether the 'species_id' value is provided, and whether a
- corresponding species exists in the database.
-
- Maybe give the function a better name..."""
- speciespage = redirect_on_error("expression-data.rqtl2.select_species")
- if "species_id" not in formargs:
- return "You MUST provide the Species identifier.", speciespage
-
- if not bool(species_by_id(conn, formargs["species_id"])):
- return "No species with the provided identifier exists.", speciespage
-
- return None
-
-
-def check_population(conn: mdb.Connection,
- formargs: dict,
- species_id) -> Optional[tuple[str, Response]]:
- """
- Check whether the 'population_id' value is provided, and whether a
- corresponding population exists in the database.
-
- Maybe give the function a better name..."""
- poppage = redirect_on_error(
- "expression-data.rqtl2.select_species", species_id=species_id)
- if "population_id" not in formargs:
- return "You MUST provide the Population identifier.", poppage
-
- if not bool(population_by_species_and_id(
- conn, species_id, formargs["population_id"])):
- return "No population with the provided identifier exists.", poppage
-
- return None
-
-
-def check_r_qtl2_bundle(formargs: dict,
- species_id,
- population_id) -> Optional[tuple[str, Response]]:
- """Check for the existence of the R/qtl2 bundle."""
- fileuploadpage = redirect_on_error("expression-data.rqtl2.upload_rqtl2_bundle",
- species_id=species_id,
- population_id=population_id)
- if not "rqtl2_bundle_file" in formargs:
- return (
- "You MUST provide a R/qtl2 zip bundle for expression-data.", fileuploadpage)
-
- if not Path(fullpath(formargs["rqtl2_bundle_file"])).exists():
- return "No R/qtl2 bundle with the given name exists.", fileuploadpage
-
- return None
-
-
-def check_geno_dataset(conn: mdb.Connection,
- formargs: dict,
- species_id,
- population_id) -> Optional[tuple[str, Response]]:
- """Check for the Genotype dataset."""
- genodsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id)
- if not bool(formargs.get("geno-dataset-id")):
- return (
- "You MUST provide a valid Genotype dataset identifier", genodsetpg)
-
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s",
- (formargs["geno-dataset-id"],))
- results = cursor.fetchall()
- if not bool(results):
- return ("No genotype dataset with the provided identifier exists.",
- genodsetpg)
- if len(results) > 1:
- return (
- "Data corruption: More than one genotype dataset with the same "
- "identifier.",
- genodsetpg)
-
- return None
-
-def check_tissue(
- conn: mdb.Connection,formargs: dict) -> Optional[tuple[str, Response]]:
- """Check for tissue/organ/biological material."""
- selectdsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
- species_id=formargs["species_id"],
- population_id=formargs["population_id"])
- if not bool(formargs.get("tissueid", "").strip()):
- return ("No tissue/organ/biological material provided.", selectdsetpg)
-
- with conn.cursor(cursorclass=DictCursor) as cursor:
- cursor.execute("SELECT * FROM Tissue WHERE Id=%s",
- (formargs["tissueid"],))
- results = cursor.fetchall()
- if not bool(results):
- return ("No tissue/organ with the provided identifier exists.",
- selectdsetpg)
-
- if len(results) > 1:
- return (
- "Data corruption: More than one tissue/organ with the same "
- "identifier.",
- selectdsetpg)
-
- return None
-
-
-def check_probe_study(conn: mdb.Connection,
- formargs: dict,
- species_id,
- population_id) -> Optional[tuple[str, Response]]:
- """Check for the ProbeSet study."""
- dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id)
- if not bool(formargs.get("probe-study-id")):
- return "No probeset study was selected!", dsetinfopg
-
- if not bool(probeset_study_by_id(conn, formargs["probe-study-id"])):
- return ("No probeset study with the provided identifier exists",
- dsetinfopg)
-
- return None
-
-
-def check_probe_dataset(conn: mdb.Connection,
- formargs: dict,
- species_id,
- population_id) -> Optional[tuple[str, Response]]:
- """Check for the ProbeSet dataset."""
- dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id)
- if not bool(formargs.get("probe-dataset-id")):
- return "No probeset dataset was selected!", dsetinfopg
-
- if not bool(probeset_dataset_by_id(conn, formargs["probe-dataset-id"])):
- return ("No probeset dataset with the provided identifier exists",
- dsetinfopg)
-
- return None
-
-
-def with_errors(endpointthunk: Callable, *checkfns):
- """Run 'endpointthunk' with error checking."""
- formargs = {**dict(request.args), **dict(request.form)}
- errors = tuple(item for item in (_fn(formargs=formargs) for _fn in checkfns)
- if item is not None)
- if len(errors) > 0:
- flash(errors[0][0], "alert-error error-rqtl2")
- return errors[0][1]
-
- return endpointthunk()
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/select-geno-dataset"),
- methods=["POST"])
-@require_login
-def select_geno_dataset(species_id: int, population_id: int):
- """Select from existing geno datasets."""
- with database_connection(app.config["SQL_URI"]) as conn:
- def __thunk__():
- geno_dset = geno_datasets_by_species_and_population(
- conn, species_id, population_id)
- if not bool(geno_dset):
- flash("No genotype dataset was provided!",
- "alert-error error-rqtl2")
- return redirect(url_for("expression-data.rqtl2.select_geno_dataset",
- species_id=species_id,
- population_id=population_id,
- pgsrc="error"),
- code=307)
-
- flash("Genotype accepted", "alert-success error-rqtl2")
- return redirect(url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id,
- pgsrc="expression-data.rqtl2.select_geno_dataset"),
- code=307)
-
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population, conn=conn,
- species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id),
- partial(check_geno_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id))
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/create-geno-dataset"),
- methods=["POST"])
-@require_login
-def create_geno_dataset(species_id: int, population_id: int):
- """Create a new geno dataset."""
- with database_connection(app.config["SQL_URI"]) as conn:
- def __thunk__():
- sgeno_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id,
- pgsrc="error"),
- code=307)
- errorclasses = "alert-error error-rqtl2 error-rqtl2-create-geno-dataset"
- if not bool(request.form.get("dataset-name")):
- flash("You must provide the dataset name", errorclasses)
- return sgeno_page
- if not bool(request.form.get("dataset-fullname")):
- flash("You must provide the dataset full name", errorclasses)
- return sgeno_page
- public = 2 if request.form.get("dataset-public") == "on" else 0
-
- with conn.cursor(cursorclass=DictCursor) as cursor:
- datasetname = request.form["dataset-name"]
- new_dataset = {
- "name": datasetname,
- "fname": request.form.get("dataset-fullname"),
- "sname": request.form.get("dataset-shortname") or datasetname,
- "today": date.today().isoformat(),
- "pub": public,
- "isetid": population_id
- }
- cursor.execute("SELECT * FROM GenoFreeze WHERE Name=%s",
- (datasetname,))
- results = cursor.fetchall()
- if bool(results):
- flash(
- f"A genotype dataset with name '{escape(datasetname)}' "
- "already exists.",
- errorclasses)
- return redirect(url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id,
- pgsrc="error"),
- code=307)
- cursor.execute(
- "INSERT INTO GenoFreeze("
- "Name, FullName, ShortName, CreateTime, public, InbredSetId"
- ") "
- "VALUES("
- "%(name)s, %(fname)s, %(sname)s, %(today)s, %(pub)s, %(isetid)s"
- ")",
- new_dataset)
- flash("Created dataset successfully.", "alert-success")
- return render_template(
- "rqtl2/create-geno-dataset-success.html",
- species=species_by_id(conn, species_id),
- population=population_by_species_and_id(
- conn, species_id, population_id),
- rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
- geno_dataset={**new_dataset, "id": cursor.lastrowid})
-
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population, conn=conn, species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id))
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/select-tissue"),
- methods=["POST"])
-@require_login
-def select_tissue(species_id: int, population_id: int):
- """Select from existing tissues."""
- with database_connection(app.config["SQL_URI"]) as conn:
- def __thunk__():
- if not bool(request.form.get("tissueid", "").strip()):
- flash("Invalid tissue selection!",
- "alert-error error-select-tissue error-rqtl2")
-
- return redirect(url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id,
- pgsrc="expression-data.rqtl2.select_geno_dataset"),
- code=307)
-
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population,
- conn=conn,
- species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id),
- partial(check_geno_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id))
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/create-tissue"),
- methods=["POST"])
-@require_login
-def create_tissue(species_id: int, population_id: int):
- """Add new tissue, organ or biological material to the system."""
- form = request.form
- datasetinfopage = redirect(
- url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id,
- pgsrc="expression-data.rqtl2.select_geno_dataset"),
- code=307)
- with database_connection(app.config["SQL_URI"]) as conn:
- tissuename = form.get("tissuename", "").strip()
- tissueshortname = form.get("tissueshortname", "").strip()
- if not bool(tissuename):
- flash("Organ/Tissue name MUST be provided.",
- "alert-error error-create-tissue error-rqtl2")
- return datasetinfopage
-
- if not bool(tissueshortname):
- flash("Organ/Tissue short name MUST be provided.",
- "alert-error error-create-tissue error-rqtl2")
- return datasetinfopage
-
- try:
- tissue = create_new_tissue(conn, tissuename, tissueshortname)
- flash("Tissue created successfully!", "alert-success")
- return render_template(
- "rqtl2/create-tissue-success.html",
- species=species_by_id(conn, species_id),
- population=population_by_species_and_id(
- conn, species_id, population_id),
- rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
- geno_dataset=geno_dataset_by_id(
- conn,
- int(request.form["geno-dataset-id"])),
- tissue=tissue)
- except mdb.IntegrityError as _ierr:
- flash("Tissue/Organ with that short name already exists!",
- "alert-error error-create-tissue error-rqtl2")
- return datasetinfopage
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/select-probeset-study"),
- methods=["POST"])
-@require_login
-def select_probeset_study(species_id: int, population_id: int):
- """Select or create a probeset study."""
- with database_connection(app.config["SQL_URI"]) as conn:
- def __thunk__():
- summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id),
- code=307)
- if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))):
- flash("Invalid study selected!", "alert-error error-rqtl2")
- return summary_page
-
- return summary_page
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population,
- conn=conn,
- species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id),
- partial(check_geno_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id),
- partial(check_tissue, conn=conn),
- partial(check_probe_study,
- conn=conn,
- species_id=species_id,
- population_id=population_id))
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/select-probeset-dataset"),
- methods=["POST"])
-@require_login
-def select_probeset_dataset(species_id: int, population_id: int):
- """Select or create a probeset dataset."""
- with database_connection(app.config["SQL_URI"]) as conn:
- def __thunk__():
- summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id),
- code=307)
- if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))):
- flash("Invalid study selected!", "alert-error error-rqtl2")
- return summary_page
-
- return summary_page
-
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population,
- conn=conn,
- species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id),
- partial(check_geno_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id),
- partial(check_tissue, conn=conn),
- partial(check_probe_study,
- conn=conn,
- species_id=species_id,
- population_id=population_id),
- partial(check_probe_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id))
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/create-probeset-study"),
- methods=["POST"])
-@require_login
-def create_probeset_study(species_id: int, population_id: int):
- """Create a new probeset study."""
- errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-study"
- with database_connection(app.config["SQL_URI"]) as conn:
- def __thunk__():
- form = request.form
- dataset_info_page = redirect(
- url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id),
- code=307)
-
- if not (bool(form.get("platformid")) and
- bool(platform_by_id(conn, int(form["platformid"])))):
- flash("Invalid platform selected.", errorclasses)
- return dataset_info_page
-
- if not (bool(form.get("tissueid")) and
- bool(tissue_by_id(conn, int(form["tissueid"])))):
- flash("Invalid tissue selected.", errorclasses)
- return dataset_info_page
-
- studyname = form["studyname"]
- try:
- study = probeset_create_study(
- conn, population_id, int(form["platformid"]), int(form["tissueid"]),
- studyname, form.get("studyfullname") or "",
- form.get("studyshortname") or "")
- except mdb.IntegrityError as _ierr:
- flash(f"ProbeSet study with name '{escape(studyname)}' already "
- "exists.",
- errorclasses)
- return dataset_info_page
- return render_template(
- "rqtl2/create-probe-study-success.html",
- species=species_by_id(conn, species_id),
- population=population_by_species_and_id(
- conn, species_id, population_id),
- rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
- geno_dataset=geno_dataset_by_id(
- conn,
- int(request.form["geno-dataset-id"])),
- study=study)
-
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population,
- conn=conn,
- species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id),
- partial(check_geno_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id),
- partial(check_tissue, conn=conn))
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/create-probeset-dataset"),
- methods=["POST"])
-@require_login
-def create_probeset_dataset(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements]
- """Create a new probeset dataset."""
- errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-dataset"
- with database_connection(app.config["SQL_URI"]) as conn:
- def __thunk__():#pylint: disable=[too-many-return-statements]
- form = request.form
- summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
- species_id=species_id,
- population_id=population_id),
- code=307)
- if not bool(form.get("averageid")):
- flash("Averaging method not selected!", errorclasses)
- return summary_page
- if not bool(form.get("datasetname")):
- flash("Dataset name not provided!", errorclasses)
- return summary_page
- if not bool(form.get("datasetfullname")):
- flash("Dataset full name not provided!", errorclasses)
- return summary_page
-
- tissue = tissue_by_id(conn, form.get("tissueid", "").strip())
-
- study = probeset_study_by_id(conn, int(form["probe-study-id"]))
- if not bool(study):
- flash("Invalid ProbeSet study provided!", errorclasses)
- return summary_page
-
- avgmethod = averaging_method_by_id(conn, int(form["averageid"]))
- if not bool(avgmethod):
- flash("Invalid averaging method provided!", errorclasses)
- return summary_page
-
- try:
- dset = probeset_create_dataset(conn,
- int(form["probe-study-id"]),
- int(form["averageid"]),
- form["datasetname"],
- form["datasetfullname"],
- form["datasetshortname"],
- form["datasetpublic"] == "on",
- form.get(
- "datasetdatascale", "log2"))
- except mdb.IntegrityError as _ierr:
- app.logger.debug("Possible integrity error: %s", traceback.format_exc())
- flash(("IntegrityError: The data you provided has some errors: "
- f"{_ierr.args}"),
- errorclasses)
- return summary_page
- except Exception as _exc:# pylint: disable=[broad-except]
- app.logger.debug("Error creating ProbeSet dataset: %s",
- traceback.format_exc())
- flash(("There was a problem creating your dataset. Please try "
- "again."),
- errorclasses)
- return summary_page
- return render_template(
- "rqtl2/create-probe-dataset-success.html",
- species=species_by_id(conn, species_id),
- population=population_by_species_and_id(
- conn, species_id, population_id),
- rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
- geno_dataset=geno_dataset_by_id(
- conn,
- int(request.form["geno-dataset-id"])),
- tissue=tissue,
- study=study,
- avgmethod=avgmethod,
- dataset=dset)
-
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population,
- conn=conn,
- species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id),
- partial(check_geno_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id),
- partial(check_tissue, conn=conn),
- partial(check_probe_study,
- conn=conn,
- species_id=species_id,
- population_id=population_id))
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/dataset-info"),
- methods=["POST"])
-@require_login
-def select_dataset_info(species_id: int, population_id: int):
- """
- If `geno` files exist in the R/qtl2 bundle, prompt user to provide the
- dataset the genotypes belong to.
- """
- form = request.form
- with database_connection(app.config["SQL_URI"]) as conn:
- def __thunk__():
- species = species_by_id(conn, species_id)
- population = population_by_species_and_id(
- conn, species_id, population_id)
- thefile = fullpath(form["rqtl2_bundle_file"])
- with ZipFile(str(thefile), "r") as zfile:
- cdata = r_qtl2.control_data(zfile)
-
- geno_dataset = geno_dataset_by_id(
- conn,form.get("geno-dataset-id", "").strip())
- if "geno" in cdata and not bool(form.get("geno-dataset-id")):
- return render_template(
- "rqtl2/select-geno-dataset.html",
- species=species,
- population=population,
- rqtl2_bundle_file=thefile.name,
- datasets=geno_datasets_by_species_and_population(
- conn, species_id, population_id))
-
- tissue = tissue_by_id(conn, form.get("tissueid", "").strip())
- if "pheno" in cdata and not bool(tissue):
- return render_template(
- "rqtl2/select-tissue.html",
- species=species,
- population=population,
- rqtl2_bundle_file=thefile.name,
- geno_dataset=geno_dataset,
- studies=probeset_studies_by_species_and_population(
- conn, species_id, population_id),
- platforms=platforms_by_species(conn, species_id),
- tissues=all_tissues(conn))
-
- probeset_study = probeset_study_by_id(
- conn, form.get("probe-study-id", "").strip())
- if "pheno" in cdata and not bool(probeset_study):
- return render_template(
- "rqtl2/select-probeset-study-id.html",
- species=species,
- population=population,
- rqtl2_bundle_file=thefile.name,
- geno_dataset=geno_dataset,
- studies=probeset_studies_by_species_and_population(
- conn, species_id, population_id),
- platforms=platforms_by_species(conn, species_id),
- tissue=tissue)
- probeset_study = probeset_study_by_id(
- conn, int(form["probe-study-id"]))
-
- probeset_dataset = probeset_dataset_by_id(
- conn, form.get("probe-dataset-id", "").strip())
- if "pheno" in cdata and not bool(probeset_dataset):
- return render_template(
- "rqtl2/select-probeset-dataset.html",
- species=species,
- population=population,
- rqtl2_bundle_file=thefile.name,
- geno_dataset=geno_dataset,
- probe_study=probeset_study,
- tissue=tissue,
- datasets=probeset_datasets_by_study(
- conn, int(form["probe-study-id"])),
- avgmethods=averaging_methods(conn))
-
- return render_template("rqtl2/summary-info.html",
- species=species,
- population=population,
- rqtl2_bundle_file=thefile.name,
- geno_dataset=geno_dataset,
- tissue=tissue,
- probe_study=probeset_study,
- probe_dataset=probeset_dataset)
-
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population,
- conn=conn,
- species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id))
-
-
-@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
- "/rqtl2-bundle/confirm-bundle-details"),
- methods=["POST"])
-@require_login
-def confirm_bundle_details(species_id: int, population_id: int):
- """Confirm the details and trigger R/qtl2 bundle processing..."""
- redisuri = app.config["REDIS_URL"]
- with (database_connection(app.config["SQL_URI"]) as conn,
- Redis.from_url(redisuri, decode_responses=True) as rconn):
- def __thunk__():
- redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
- jobid = str(uuid4())
- _job = jobs.launch_job(
- jobs.initialise_job(
- rconn,
- jobs.jobsnamespace(),
- jobid,
- [
- sys.executable, "-m", "scripts.process_rqtl2_bundle",
- app.config["SQL_URI"], app.config["REDIS_URL"],
- jobs.jobsnamespace(), jobid, "--redisexpiry",
- str(redis_ttl_seconds)],
- "R/qtl2 Bundle Upload",
- redis_ttl_seconds,
- {
- "bundle-metadata": json.dumps({
- "speciesid": species_id,
- "populationid": population_id,
- "rqtl2-bundle-file": str(fullpath(
- request.form["rqtl2_bundle_file"])),
- "geno-dataset-id": request.form.get(
- "geno-dataset-id", ""),
- "probe-study-id": request.form.get(
- "probe-study-id", ""),
- "probe-dataset-id": request.form.get(
- "probe-dataset-id", ""),
- **({
- "platformid": probeset_study_by_id(
- conn,
- int(request.form["probe-study-id"]))["ChipId"]
- } if bool(request.form.get("probe-study-id")) else {})
- })
- }),
- redisuri,
- f"{app.config['UPLOAD_FOLDER']}/job_errors")
-
- return redirect(url_for("expression-data.rqtl2.rqtl2_processing_status",
- jobid=jobid))
-
- return with_errors(__thunk__,
- partial(check_species, conn=conn),
- partial(check_population,
- conn=conn,
- species_id=species_id),
- partial(check_r_qtl2_bundle,
- species_id=species_id,
- population_id=population_id),
- partial(check_geno_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id),
- partial(check_probe_study,
- conn=conn,
- species_id=species_id,
- population_id=population_id),
- partial(check_probe_dataset,
- conn=conn,
- species_id=species_id,
- population_id=population_id))
-
-
-@rqtl2.route("/status/<uuid:jobid>")
-def rqtl2_processing_status(jobid: UUID):
- """Retrieve the status of the job processing the uploaded R/qtl2 bundle."""
- with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
- try:
- thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid)
-
- messagelistname = thejob.get("log-messagelist")
- logmessages = (rconn.lrange(messagelistname, 0, -1)
- if bool(messagelistname) else [])
-
- if thejob["status"] == "error":
- return render_template(
- "rqtl2/rqtl2-job-error.html", job=thejob, messages=logmessages)
- if thejob["status"] == "success":
- return render_template("rqtl2/rqtl2-job-results.html",
- job=thejob,
- messages=logmessages)
-
- return render_template(
- "rqtl2/rqtl2-job-status.html", job=thejob, messages=logmessages)
- except jobs.JobNotFound as _exc:
- return render_template("rqtl2/no-such-job.html", jobid=jobid)
diff --git a/uploader/expression_data/views.py b/uploader/expression_data/views.py
new file mode 100644
index 0000000..bbe6538
--- /dev/null
+++ b/uploader/expression_data/views.py
@@ -0,0 +1,384 @@
+"""Views for expression data"""
+import os
+import uuid
+import mimetypes
+from typing import Tuple
+from zipfile import ZipFile, is_zipfile
+
+import jsonpickle
+from redis import Redis
+from werkzeug.utils import secure_filename
+from flask import (flash,
+ request,
+ url_for,
+ redirect,
+ Blueprint,
+ current_app as app)
+
+from quality_control.errors import InvalidValue, DuplicateHeading
+
+from uploader import jobs
+from uploader.datautils import order_by_family
+from uploader.ui import make_template_renderer
+from uploader.authorisation import require_login
+from uploader.species.models import all_species, species_by_id
+from uploader.db_utils import with_db_connection, database_connection
+from uploader.population.models import (populations_by_species,
+ population_by_species_and_id)
+
+exprdatabp = Blueprint("expression-data", __name__)
+render_template = make_template_renderer("expression-data")
+
+def isinvalidvalue(item):
+ """Check whether item is of type InvalidValue"""
+ return isinstance(item, InvalidValue)
+
+
+def isduplicateheading(item):
+ """Check whether item is of type DuplicateHeading"""
+ return isinstance(item, DuplicateHeading)
+
+
+def errors(rqst) -> Tuple[str, ...]:
+ """Return a tuple of the errors found in the request `rqst`. If no error is
+ found, then an empty tuple is returned."""
+ def __filetype_error__():
+ return (
+ ("Invalid file type provided.",)
+ if rqst.form.get("filetype") not in ("average", "standard-error")
+ else tuple())
+
+ def __file_missing_error__():
+ return (
+ ("No file was uploaded.",)
+ if ("qc_text_file" not in rqst.files or
+ rqst.files["qc_text_file"].filename == "")
+ else tuple())
+
+ def __file_mimetype_error__():
+ text_file = rqst.files["qc_text_file"]
+ return (
+ (
+ ("Invalid file! Expected a tab-separated-values file, or a zip "
+ "file of the a tab-separated-values file."),)
+ if text_file.mimetype not in (
+ "text/plain", "text/tab-separated-values",
+ "application/zip")
+ else tuple())
+
+ return (
+ __filetype_error__() +
+ (__file_missing_error__() or __file_mimetype_error__()))
+
+
+def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
+ """Check the uploaded zip file for errors."""
+ zfile_errors: Tuple[str, ...] = tuple()
+ if is_zipfile(filepath):
+ with ZipFile(filepath, "r") as zfile:
+ infolist = zfile.infolist()
+ if len(infolist) != 1:
+ zfile_errors = zfile_errors + (
+ ("Expected exactly one (1) member file within the uploaded zip "
+ f"file. Got {len(infolist)} member files."),)
+ if len(infolist) == 1 and infolist[0].is_dir():
+ zfile_errors = zfile_errors + (
+ ("Expected a member text file in the uploaded zip file. Got a "
+ "directory/folder."),)
+
+ if len(infolist) == 1 and not infolist[0].is_dir():
+ zfile.extract(infolist[0], path=upload_dir)
+ mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
+ if mime[0] != "text/tab-separated-values":
+ zfile_errors = zfile_errors + (
+ ("Expected the member text file in the uploaded zip file to"
+ " be a tab-separated file."),)
+
+ return zfile_errors
+
+
+@exprdatabp.route("populations/expression-data", methods=["GET"])
+@require_login
+def index():
+ """Display the expression data index page."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ if not bool(request.args.get("species_id")):
+ return render_template("expression-data/index.html",
+ species=order_by_family(all_species(conn)),
+ activelink="expression-data")
+ species = species_by_id(conn, request.args.get("species_id"))
+ if not bool(species):
+ flash("Could not find species selected!", "alert-danger")
+ return redirect(url_for("species.populations.expression-data.index"))
+ return redirect(url_for(
+ "species.populations.expression-data.select_population",
+ species_id=species["SpeciesId"]))
+
+
+@exprdatabp.route("<int:species_id>/populations/expression-data/select-population",
+ methods=["GET"])
+@require_login
+def select_population(species_id: int):
+ """Select the expression data's population."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ species = species_by_id(conn, species_id)
+ if not bool(species):
+ flash("No such species!", "alert-danger")
+ return redirect(url_for("species.populations.expression-data.index"))
+
+ if not bool(request.args.get("population_id")):
+ return render_template("expression-data/select-population.html",
+ species=species,
+ populations=order_by_family(
+ populations_by_species(conn, species_id),
+ order_key="FamilyOrder"),
+ activelink="expression-data")
+
+ population = population_by_species_and_id(
+ conn, species_id, request.args.get("population_id"))
+ if not bool(population):
+ flash("No such population!", "alert-danger")
+ return redirect(url_for(
+ "species.populations.expression-data.select_population",
+ species_id=species_id))
+
+ return redirect(url_for("species.populations.expression-data.upload_file",
+ species_id=species_id,
+ population_id=population["Id"]))
+
+
+@exprdatabp.route("<int:species_id>/populations/<int:population_id>/"
+ "expression-data/upload",
+ methods=["GET", "POST"])
+@require_login
+def upload_file(species_id: int, population_id: int):
+ """Enables uploading the files"""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ species = species_by_id(conn, species_id)
+ population = population_by_species_and_id(conn, species_id, population_id)
+ if request.method == "GET":
+ return render_template("expression-data/select-file.html",
+ species=species,
+ population=population)
+
+ upload_dir = app.config["UPLOAD_FOLDER"]
+ request_errors = errors(request)
+ if request_errors:
+ for error in request_errors:
+ flash(error, "alert-danger error-expr-data")
+ return redirect(url_for("species.populations.expression-data.upload_file"))
+
+ filename = secure_filename(
+ request.files["qc_text_file"].filename)# type: ignore[arg-type]
+ if not os.path.exists(upload_dir):
+ os.mkdir(upload_dir)
+
+ filepath = os.path.join(upload_dir, filename)
+ request.files["qc_text_file"].save(os.path.join(upload_dir, filename))
+
+ zip_errors = zip_file_errors(filepath, upload_dir)
+ if zip_errors:
+ for error in zip_errors:
+ flash(error, "alert-danger error-expr-data")
+ return redirect(url_for("species.populations.expression-data.index.upload_file"))
+
+ return redirect(url_for("species.populations.expression-data.parse_file",
+ species_id=species_id,
+ population_id=population_id,
+ filename=filename,
+ filetype=request.form["filetype"]))
+
+
+@exprdatabp.route("/data-review", methods=["GET"])
+@require_login
+def data_review():
+ """Provide some help on data expectations to the user."""
+ return render_template("expression-data/data-review.html")
+
+
+@exprdatabp.route(
+ "<int:species_id>/populations/<int:population_id>/expression-data/parse",
+ methods=["GET"])
+@require_login
+def parse_file(species_id: int, population_id: int):
+ """Trigger file parsing"""
+ _errors = False
+ filename = request.args.get("filename")
+ filetype = request.args.get("filetype")
+
+ species = with_db_connection(lambda con: species_by_id(con, species_id))
+ if not bool(species):
+ flash("No such species.", "alert-danger")
+ _errors = True
+
+ if filename is None:
+ flash("No file provided", "alert-danger")
+ _errors = True
+
+ if filetype is None:
+ flash("No filetype provided", "alert-danger")
+ _errors = True
+
+ if filetype not in ("average", "standard-error"):
+ flash("Invalid filetype provided", "alert-danger")
+ _errors = True
+
+ if filename:
+ filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
+ if not os.path.exists(filepath):
+ flash("Selected file does not exist (any longer)", "alert-danger")
+ _errors = True
+
+ if _errors:
+ return redirect(url_for("species.populations.expression-data.upload_file"))
+
+ redisurl = app.config["REDIS_URL"]
+ with Redis.from_url(redisurl, decode_responses=True) as rconn:
+ job = jobs.launch_job(
+ jobs.build_file_verification_job(
+ rconn, app.config["SQL_URI"], redisurl,
+ species_id, filepath, filetype,# type: ignore[arg-type]
+ app.config["JOBS_TTL_SECONDS"]),
+ redisurl,
+ f"{app.config['UPLOAD_FOLDER']}/job_errors")
+
+ return redirect(url_for("species.populations.expression-data.parse_status",
+ species_id=species_id,
+ population_id=population_id,
+ job_id=job["jobid"]))
+
+
+@exprdatabp.route(
+ "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+ "status/<uuid:job_id>",
+ methods=["GET"])
+@require_login
+def parse_status(species_id: int, population_id: int, job_id: str):
+ "Retrieve the status of the job"
+ with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+ try:
+ job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+ except jobs.JobNotFound as _exc:
+ return render_template("no_such_job.html", job_id=job_id), 400
+
+ error_filename = jobs.error_filename(
+ job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
+ if os.path.exists(error_filename):
+ stat = os.stat(error_filename)
+ if stat.st_size > 0:
+ return redirect(url_for("parse.fail", job_id=job_id))
+
+ job_id = job["jobid"]
+ progress = float(job["percent"])
+ status = job["status"]
+ filename = job.get("filename", "uploaded file")
+ _errors = jsonpickle.decode(
+ job.get("errors", jsonpickle.encode(tuple())))
+ if status in ("success", "aborted"):
+ return redirect(url_for("species.populations.expression-data.results",
+ species_id=species_id,
+ population_id=population_id,
+ job_id=job_id))
+
+ if status == "parse-error":
+ return redirect(url_for("species.populations.expression-data.fail", job_id=job_id))
+
+ app.jinja_env.globals.update(
+ isinvalidvalue=isinvalidvalue,
+ isduplicateheading=isduplicateheading)
+ return render_template(
+ "expression-data/job-progress.html",
+ job_id = job_id,
+ job_status = status,
+ progress = progress,
+ message = job.get("message", ""),
+ job_name = f"Parsing '{filename}'",
+ errors=_errors,
+ species=with_db_connection(
+ lambda conn: species_by_id(conn, species_id)),
+ population=with_db_connection(
+ lambda conn: population_by_species_and_id(
+ conn, species_id, population_id)))
+
+
+@exprdatabp.route(
+ "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+ "<uuid:job_id>/results",
+ methods=["GET"])
+@require_login
+def results(species_id: int, population_id: int, job_id: uuid.UUID):
+ """Show results of parsing..."""
+ with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+ job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+ if job:
+ filename = job["filename"]
+ _errors = jsonpickle.decode(job.get("errors", jsonpickle.encode(tuple())))
+ app.jinja_env.globals.update(
+ isinvalidvalue=isinvalidvalue,
+ isduplicateheading=isduplicateheading)
+ return render_template(
+ "expression-data/parse-results.html",
+ errors=_errors,
+ job_name = f"Parsing '{filename}'",
+ user_aborted = job.get("user_aborted"),
+ job_id=job["jobid"],
+ species=with_db_connection(
+ lambda conn: species_by_id(conn, species_id)),
+ population=with_db_connection(
+ lambda conn: population_by_species_and_id(
+ conn, species_id, population_id)))
+
+ return render_template("expression-data/no-such-job.html", job_id=job_id)
+
+
+@exprdatabp.route(
+ "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+ "<uuid:job_id>/fail",
+ methods=["GET"])
+@require_login
+def fail(species_id: int, population_id: int, job_id: str):
+ """Handle parsing failure"""
+ with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+ job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+ if job:
+ error_filename = jobs.error_filename(
+ job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
+ if os.path.exists(error_filename):
+ stat = os.stat(error_filename)
+ if stat.st_size > 0:
+ return render_template(
+ "worker_failure.html", job_id=job_id)
+
+ return render_template("parse_failure.html", job=job)
+
+ return render_template("expression-data/no-such-job.html",
+ **with_db_connection(lambda conn: {
+ "species_id": species_by_id(conn, species_id),
+ "population_id": population_by_species_and_id(
+ conn, species_id, population_id)}),
+ job_id=job_id)
+
+
+@exprdatabp.route(
+ "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+ "abort",
+ methods=["POST"])
+@require_login
+def abort(species_id: int, population_id: int):
+ """Handle user request to abort file processing"""
+ job_id = request.form["job_id"]
+
+ with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+ job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+ if job:
+ rconn.hset(name=jobs.job_key(jobs.jobsnamespace(), job_id),
+ key="user_aborted",
+ value=int(True))
+
+ return redirect(url_for("species.populations.expression-data.parse_status",
+ species_id=species_id,
+ population_id=population_id,
+ job_id=job_id))