about summary refs log tree commit diff
path: root/uploader/expression_data
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/expression_data')
-rw-r--r--uploader/expression_data/__init__.py2
-rw-r--r--uploader/expression_data/dbinsert.py400
-rw-r--r--uploader/expression_data/views.py385
3 files changed, 787 insertions, 0 deletions
diff --git a/uploader/expression_data/__init__.py b/uploader/expression_data/__init__.py
new file mode 100644
index 0000000..fc8bd41
--- /dev/null
+++ b/uploader/expression_data/__init__.py
@@ -0,0 +1,2 @@
+"""Package handling upload of files."""
+from .views import exprdatabp
diff --git a/uploader/expression_data/dbinsert.py b/uploader/expression_data/dbinsert.py
new file mode 100644
index 0000000..7040698
--- /dev/null
+++ b/uploader/expression_data/dbinsert.py
@@ -0,0 +1,400 @@
+"Handle inserting data into the database"
+import os
+import json
+from typing import Union
+from functools import reduce
+from datetime import datetime
+
+from redis import Redis
+from MySQLdb.cursors import DictCursor
+from gn_libs.mysqldb import database_connection
+from flask import (
+    flash, request, url_for, Blueprint, redirect, render_template,
+    current_app as app)
+
+from uploader import jobs
+from uploader.authorisation import require_login
+from uploader.db_utils import with_db_connection
+from uploader.population.models import populations_by_species
+from uploader.species.models import all_species, species_by_id
+from uploader.platforms.models import platform_by_species_and_id
+
+dbinsertbp = Blueprint("dbinsert", __name__)
+
+def render_error(error_msg):
+    "Render the generic error page"
+    return render_template("dbupdate_error.html", error_message=error_msg), 400
+
+def make_menu_items_grouper(grouping_fn=lambda item: item):
+    "Build function to be used to group menu items."
+    def __grouper__(acc, row):
+        grouping = grouping_fn(row[2])
+        row_values = (row[0].strip(), row[1].strip())
+        if acc.get(grouping) is None:
+            return {**acc, grouping: (row_values,)}
+        return {**acc, grouping: (acc[grouping] + (row_values,))}
+    return __grouper__
+
+def genechips():
+    "Retrieve the genechip information from the database"
+    def __organise_by_species__(acc, chip):
+        speciesid = chip["SpeciesId"]
+        if acc.get(speciesid) is None:
+            return {**acc, speciesid: (chip,)}
+        return {**acc, speciesid: acc[speciesid] + (chip,)}
+
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute("SELECT * FROM GeneChip ORDER BY GeneChipName ASC")
+            return reduce(__organise_by_species__, cursor.fetchall(), {})
+
+    return {}
+
+
+def studies_by_species_and_platform(speciesid:int, genechipid:int) -> tuple:
+    "Retrieve the studies by the related species and gene platform"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            query = (
+                "SELECT Species.SpeciesId, ProbeFreeze.* "
+                "FROM Species INNER JOIN InbredSet "
+                "ON Species.SpeciesId=InbredSet.SpeciesId "
+                "INNER JOIN ProbeFreeze "
+                "ON InbredSet.InbredSetId=ProbeFreeze.InbredSetId "
+                "WHERE Species.SpeciesId = %s "
+                "AND ProbeFreeze.ChipId = %s")
+            cursor.execute(query, (speciesid, genechipid))
+            return tuple(cursor.fetchall())
+
+    return tuple()
+
+def organise_groups_by_family(acc:dict, group:dict) -> dict:
+    "Organise the group (InbredSet) information by the group field"
+    family = group["Family"]
+    if acc.get(family):
+        return {**acc, family: acc[family] + (group,)}
+    return {**acc, family: (group,)}
+
+def tissues() -> tuple:
+    "Retrieve type (Tissue) information from the database."
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute("SELECT * FROM Tissue ORDER BY Name")
+            return tuple(cursor.fetchall())
+
+    return tuple()
+
+@dbinsertbp.route("/platform", methods=["POST"])
+@require_login
+def select_platform():
+    "Select the platform (GeneChipId) used for the data."
+    job_id = request.form["job_id"]
+    with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+          database_connection(app.config["SQL_URI"]) as conn):
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+        if job:
+            filename = job["filename"]
+            filepath = f"{app.config['UPLOADS_DIRECTORY']}/{filename}"
+            if os.path.exists(filepath):
+                default_species = 1
+                gchips = genechips()
+                return render_template(
+                    "select_platform.html", filename=filename,
+                    filetype=job["filetype"], totallines=int(job["currentline"]),
+                    default_species=default_species, species=all_species(conn),
+                    genechips=gchips[default_species],
+                    genechips_data=json.dumps(gchips))
+            return render_error(f"File '{filename}' no longer exists.")
+        return render_error(f"Job '{job_id}' no longer exists.")
+    return render_error("Unknown error")
+
+@dbinsertbp.route("/study", methods=["POST"])
+@require_login
+def select_study():
+    "View to select/create the study (ProbeFreeze) associated with the data."
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+
+        speciesid = form["species"]
+        genechipid = form["genechipid"]
+
+        the_studies = studies_by_species_and_platform(speciesid, genechipid)
+        the_groups = reduce(
+            organise_groups_by_family,
+            with_db_connection(
+                lambda conn: populations_by_species(conn, speciesid)),
+            {})
+        return render_template(
+            "select_study.html", filename=form["filename"],
+            filetype=form["filetype"], totallines=form["totallines"],
+            species=speciesid, genechipid=genechipid, studies=the_studies,
+            groups=the_groups, tissues = tissues(),
+            selected_group=int(form.get("inbredsetid", -13)),
+            selected_tissue=int(form.get("tissueid", -13)))
+    except AssertionError as aserr:
+        return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/create-study", methods=["POST"])
+@require_login
+def create_study():
+    "Create a new study (ProbeFreeze)."
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyname"), "study name"
+        assert form.get("inbredsetid"), "group"
+        assert form.get("tissueid"), "type/tissue"
+
+        with database_connection(app.config["SQL_URI"]) as conn:
+            with conn.cursor(cursorclass=DictCursor) as cursor:
+                values = (
+                    form["genechipid"],
+                    form["tissueid"],
+                    form["studyname"],
+                    form.get("studyfullname", ""),
+                    form.get("studyshortname", ""),
+                    datetime.now().date().strftime("%Y-%m-%d"),
+                    form["inbredsetid"])
+                query = (
+                    "INSERT INTO ProbeFreeze("
+                    "ChipId, TissueId, Name, FullName, ShortName, CreateTime, "
+                    "InbredSetId"
+                    ") VALUES (%s, %s, %s, %s, %s, %s, %s)")
+                cursor.execute(query, values)
+                new_studyid = cursor.lastrowid
+                cursor.execute(
+                    "UPDATE ProbeFreeze SET ProbeFreezeId=%s WHERE Id=%s",
+                    (new_studyid, new_studyid))
+                flash("Study created successfully", "alert-success")
+                return render_template(
+                    "continue_from_create_study.html",
+                    filename=form["filename"], filetype=form["filetype"],
+                    totallines=form["totallines"], species=form["species"],
+                    genechipid=form["genechipid"], studyid=new_studyid)
+    except AssertionError as aserr:
+        flash(f"Missing data: {aserr.args[0]}", "alert-error")
+        return redirect(url_for("dbinsert.select_study"), code=307)
+
+def datasets_by_study(studyid:int) -> tuple:
+    "Retrieve datasets associated with a study with the ID `studyid`."
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            query = "SELECT * FROM ProbeSetFreeze WHERE ProbeFreezeId=%s"
+            cursor.execute(query, (studyid,))
+            return tuple(cursor.fetchall())
+
+    return tuple()
+
+def averaging_methods() -> tuple:
+    "Retrieve averaging methods from database"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute("SELECT * FROM AvgMethod")
+            return tuple(cursor.fetchall())
+
+    return tuple()
+
+def dataset_datascales() -> tuple:
+    "Retrieve datascales from database"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor() as cursor:
+            cursor.execute(
+                'SELECT DISTINCT DataScale FROM ProbeSetFreeze '
+                'WHERE DataScale IS NOT NULL AND DataScale != ""')
+            return tuple(
+                item for item in
+                (res[0].strip() for res in cursor.fetchall())
+                if (item is not None and item != ""))
+
+    return tuple()
+
+@dbinsertbp.route("/dataset", methods=["POST"])
+@require_login
+def select_dataset():
+    "Select the dataset to add the file contents against"
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyid"), "study"
+
+        studyid = form["studyid"]
+        datasets = datasets_by_study(studyid)
+        return render_template(
+            "select_dataset.html", **{**form, "studyid": studyid},
+            datasets=datasets, avgmethods=averaging_methods(),
+            datascales=dataset_datascales())
+    except AssertionError as aserr:
+        return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/create-dataset", methods=["POST"])
+@require_login
+def create_dataset():
+    "Select the dataset to add the file contents against"
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyid"), "study"
+        assert form.get("avgid"), "averaging method"
+        assert form.get("datasetname2"), "Dataset Name 2"
+        assert form.get("datasetfullname"), "Dataset Full Name"
+        assert form.get("datasetshortname"), "Dataset Short Name"
+        assert form.get("datasetpublic"), "Dataset public specification"
+        assert form.get("datasetconfidentiality"), "Dataset confidentiality"
+        assert form.get("datasetdatascale"), "Dataset Datascale"
+
+        with database_connection(app.config["SQL_URI"]) as conn:
+            with conn.cursor(cursorclass=DictCursor) as cursor:
+                datasetname = form["datasetname"]
+                cursor.execute("SELECT * FROM ProbeSetFreeze WHERE Name=%s",
+                               (datasetname,))
+                results = cursor.fetchall()
+                if bool(results):
+                    flash("A dataset with that name already exists.",
+                          "alert-error")
+                    return redirect(url_for("dbinsert.select_dataset"), code=307)
+                values = (
+                    form["studyid"], form["avgid"],
+                    datasetname, form["datasetname2"],
+                    form["datasetfullname"], form["datasetshortname"],
+                    datetime.now().date().strftime("%Y-%m-%d"),
+                    form["datasetpublic"], form["datasetconfidentiality"],
+                    "williamslab", form["datasetdatascale"])
+                query = (
+                    "INSERT INTO ProbeSetFreeze("
+                    "ProbeFreezeId, AvgID, Name, Name2, FullName, "
+                    "ShortName, CreateTime, OrderList, public, "
+                    "confidentiality, AuthorisedUsers, DataScale) "
+                    "VALUES"
+                    "(%s, %s, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s)")
+                cursor.execute(query, values)
+                new_datasetid = cursor.lastrowid
+                return render_template(
+                    "continue_from_create_dataset.html",
+                    filename=form["filename"], filetype=form["filetype"],
+                    species=form["species"], genechipid=form["genechipid"],
+                    studyid=form["studyid"], datasetid=new_datasetid,
+                    totallines=form["totallines"])
+    except AssertionError as aserr:
+        flash(f"Missing data {aserr.args[0]}", "alert-error")
+        return redirect(url_for("dbinsert.select_dataset"), code=307)
+
+def study_by_id(studyid:int) -> Union[dict, None]:
+    "Get a study by its Id"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(
+                "SELECT * FROM ProbeFreeze WHERE Id=%s",
+                (studyid,))
+            return cursor.fetchone()
+
+def dataset_by_id(datasetid:int) -> Union[dict, None]:
+    "Retrieve a dataset by its id"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(
+                ("SELECT AvgMethod.Name AS AvgMethodName, ProbeSetFreeze.* "
+                 "FROM ProbeSetFreeze INNER JOIN AvgMethod "
+                 "ON ProbeSetFreeze.AvgId=AvgMethod.AvgMethodId "
+                 "WHERE ProbeSetFreeze.Id=%s"),
+                (datasetid,))
+            return cursor.fetchone()
+
+def selected_keys(original: dict, keys: tuple) -> dict:
+    "Return a new dict from the `original` dict with only `keys` present."
+    return {key: value for key,value in original.items() if key in keys}
+
+@dbinsertbp.route("/final-confirmation", methods=["POST"])
+@require_login
+def final_confirmation():
+    "Preview the data before triggering entry into the database"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        form = request.form
+        try:
+            assert form.get("filename"), "filename"
+            assert form.get("filetype"), "filetype"
+            assert form.get("species"), "species"
+            assert form.get("genechipid"), "platform"
+            assert form.get("studyid"), "study"
+            assert form.get("datasetid"), "dataset"
+
+            speciesid = form["species"]
+            genechipid = form["genechipid"]
+            studyid = form["studyid"]
+            datasetid=form["datasetid"]
+            return render_template(
+                "final_confirmation.html", filename=form["filename"],
+                filetype=form["filetype"], totallines=form["totallines"],
+                species=speciesid, genechipid=genechipid, studyid=studyid,
+                datasetid=datasetid, the_species=selected_keys(
+                    with_db_connection(lambda conn: species_by_id(conn, speciesid)),
+                    ("SpeciesName", "Name", "MenuName")),
+                platform=selected_keys(
+                    platform_by_species_and_id(conn, speciesid, genechipid),
+                    ("GeneChipName", "Name", "GeoPlatform", "Title", "GO_tree_value")),
+                study=selected_keys(
+                    study_by_id(studyid), ("Name", "FullName", "ShortName")),
+                dataset=selected_keys(
+                    dataset_by_id(datasetid),
+                    ("AvgMethodName", "Name", "Name2", "FullName", "ShortName",
+                     "DataScale")))
+        except AssertionError as aserr:
+            return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/insert-data", methods=["POST"])
+@require_login
+def insert_data():
+    "Trigger data insertion"
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyid"), "study"
+        assert form.get("datasetid"), "dataset"
+
+        filename = form["filename"]
+        filepath = f"{app.config['UPLOADS_DIRECTORY']}/{filename}"
+        redisurl = app.config["REDIS_URL"]
+        if os.path.exists(filepath):
+            with Redis.from_url(redisurl, decode_responses=True) as rconn:
+                job = jobs.launch_job(
+                    jobs.data_insertion_job(
+                        rconn, filepath, form["filetype"], form["totallines"],
+                        form["species"], form["genechipid"], form["datasetid"],
+                        app.config["SQL_URI"], redisurl,
+                        app.config["JOBS_TTL_SECONDS"]),
+                    redisurl, f"{app.config['UPLOADS_DIRECTORY']}/job_errors")
+
+            return redirect(url_for("dbinsert.insert_status", job_id=job["jobid"]))
+        return render_error(f"File '{filename}' no longer exists.")
+    except AssertionError as aserr:
+        return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/status/<job_id>", methods=["GET"])
+def insert_status(job_id: str):
+    "Retrieve status of data insertion."
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+    if job:
+        job_status = job["status"]
+        if job_status == "success":
+            return render_template("insert_success.html", job=job)
+        if job["status"] == "error":
+            return render_template("insert_error.html", job=job)
+        return render_template("insert_progress.html", job=job)
+    return render_template("no_such_job.html", job_id=job_id), 400
diff --git a/uploader/expression_data/views.py b/uploader/expression_data/views.py
new file mode 100644
index 0000000..0e9b072
--- /dev/null
+++ b/uploader/expression_data/views.py
@@ -0,0 +1,385 @@
+"""Views for expression data"""
+import os
+import uuid
+import mimetypes
+from typing import Tuple
+from zipfile import ZipFile, is_zipfile
+
+import jsonpickle
+from redis import Redis
+from werkzeug.utils import secure_filename
+from gn_libs.mysqldb import database_connection
+from flask import (flash,
+                   request,
+                   redirect,
+                   Blueprint,
+                   current_app as app)
+
+from quality_control.errors import InvalidValue, DuplicateHeading
+
+from uploader import jobs
+from uploader.flask_extensions import url_for
+from uploader.datautils import order_by_family
+from uploader.ui import make_template_renderer
+from uploader.authorisation import require_login
+from uploader.db_utils import with_db_connection
+from uploader.species.models import all_species, species_by_id
+from uploader.population.models import (populations_by_species,
+                                        population_by_species_and_id)
+
+exprdatabp = Blueprint("expression-data", __name__)
+render_template = make_template_renderer("expression-data")
+
+def isinvalidvalue(item):
+    """Check whether item is of type InvalidValue"""
+    return isinstance(item, InvalidValue)
+
+
+def isduplicateheading(item):
+    """Check whether item is of type DuplicateHeading"""
+    return isinstance(item, DuplicateHeading)
+
+
+def errors(rqst) -> Tuple[str, ...]:
+    """Return a tuple of the errors found in the request `rqst`. If no error is
+    found, then an empty tuple is returned."""
+    def __filetype_error__():
+        return (
+            ("Invalid file type provided.",)
+            if rqst.form.get("filetype") not in ("average", "standard-error")
+            else tuple())
+
+    def __file_missing_error__():
+        return (
+            ("No file was uploaded.",)
+            if ("qc_text_file" not in rqst.files or
+                rqst.files["qc_text_file"].filename == "")
+            else tuple())
+
+    def __file_mimetype_error__():
+        text_file = rqst.files["qc_text_file"]
+        return (
+            (
+                ("Invalid file! Expected a tab-separated-values file, or a zip "
+                 "file of the a tab-separated-values file."),)
+            if text_file.mimetype not in (
+                    "text/plain", "text/tab-separated-values",
+                    "application/zip")
+            else tuple())
+
+    return (
+        __filetype_error__() +
+        (__file_missing_error__() or __file_mimetype_error__()))
+
+
+def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
+    """Check the uploaded zip file for errors."""
+    zfile_errors: Tuple[str, ...] = tuple()
+    if is_zipfile(filepath):
+        with ZipFile(filepath, "r") as zfile:
+            infolist = zfile.infolist()
+            if len(infolist) != 1:
+                zfile_errors = zfile_errors + (
+                    ("Expected exactly one (1) member file within the uploaded zip "
+                     f"file. Got {len(infolist)} member files."),)
+            if len(infolist) == 1 and infolist[0].is_dir():
+                zfile_errors = zfile_errors + (
+                    ("Expected a member text file in the uploaded zip file. Got a "
+                     "directory/folder."),)
+
+            if len(infolist) == 1 and not infolist[0].is_dir():
+                zfile.extract(infolist[0], path=upload_dir)
+                mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
+                if mime[0] != "text/tab-separated-values":
+                    zfile_errors = zfile_errors + (
+                        ("Expected the member text file in the uploaded zip file to"
+                         " be a tab-separated file."),)
+
+    return zfile_errors
+
+
+@exprdatabp.route("populations/expression-data", methods=["GET"])
+@require_login
+def index():
+    """Display the expression data index page."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        if not bool(request.args.get("species_id")):
+            return render_template("expression-data/index.html",
+                                   species=order_by_family(all_species(conn)),
+                                   activelink="expression-data")
+        species = species_by_id(conn, request.args.get("species_id"))
+        if not bool(species):
+            flash("Could not find species selected!", "alert-danger")
+            return redirect(url_for("species.populations.expression-data.index"))
+        return redirect(url_for(
+            "species.populations.expression-data.select_population",
+            species_id=species["SpeciesId"]))
+
+
+@exprdatabp.route("<int:species_id>/populations/expression-data/select-population",
+                  methods=["GET"])
+@require_login
+def select_population(species_id: int):
+    """Select the expression data's population."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        if not bool(species):
+            flash("No such species!", "alert-danger")
+            return redirect(url_for("species.populations.expression-data.index"))
+
+        if not bool(request.args.get("population_id")):
+            return render_template("expression-data/select-population.html",
+                                   species=species,
+                                   populations=order_by_family(
+                                       populations_by_species(conn, species_id),
+                                       order_key="FamilyOrder"),
+                                   activelink="expression-data")
+
+        population = population_by_species_and_id(
+            conn, species_id, request.args.get("population_id"))
+        if not bool(population):
+            flash("No such population!", "alert-danger")
+            return redirect(url_for(
+                "species.populations.expression-data.select_population",
+                species_id=species_id))
+
+        return redirect(url_for("species.populations.expression-data.upload_file",
+                                species_id=species_id,
+                                population_id=population["Id"]))
+
+
+@exprdatabp.route("<int:species_id>/populations/<int:population_id>/"
+                  "expression-data/upload",
+                  methods=["GET", "POST"])
+@require_login
+def upload_file(species_id: int, population_id: int):
+    """Enables uploading the files"""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        population = population_by_species_and_id(conn, species_id, population_id)
+        if request.method == "GET":
+            return render_template("expression-data/select-file.html",
+                                   species=species,
+                                   population=population)
+
+        upload_dir = app.config["UPLOADS_DIRECTORY"]
+        request_errors = errors(request)
+        if request_errors:
+            for error in request_errors:
+                flash(error, "alert-danger error-expr-data")
+            return redirect(url_for("species.populations.expression-data.upload_file"))
+
+        filename = secure_filename(
+            request.files["qc_text_file"].filename)# type: ignore[arg-type]
+        if not os.path.exists(upload_dir):
+            os.mkdir(upload_dir)
+
+        filepath = os.path.join(upload_dir, filename)
+        request.files["qc_text_file"].save(os.path.join(upload_dir, filename))
+
+        zip_errors = zip_file_errors(filepath, upload_dir)
+        if zip_errors:
+            for error in zip_errors:
+                flash(error, "alert-danger error-expr-data")
+            return redirect(url_for("species.populations.expression-data.index.upload_file"))
+
+        return redirect(url_for("species.populations.expression-data.parse_file",
+                                species_id=species_id,
+                                population_id=population_id,
+                                filename=filename,
+                                filetype=request.form["filetype"]))
+
+
+@exprdatabp.route("/data-review", methods=["GET"])
+@require_login
+def data_review():
+    """Provide some help on data expectations to the user."""
+    return render_template("expression-data/data-review.html")
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse",
+    methods=["GET"])
+@require_login
+def parse_file(species_id: int, population_id: int):
+    """Trigger file parsing"""
+    _errors = False
+    filename = request.args.get("filename")
+    filetype = request.args.get("filetype")
+
+    species = with_db_connection(lambda con: species_by_id(con, species_id))
+    if not bool(species):
+        flash("No such species.", "alert-danger")
+        _errors = True
+
+    if filename is None:
+        flash("No file provided", "alert-danger")
+        _errors = True
+
+    if filetype is None:
+        flash("No filetype provided", "alert-danger")
+        _errors = True
+
+    if filetype not in ("average", "standard-error"):
+        flash("Invalid filetype provided", "alert-danger")
+        _errors = True
+
+    if filename:
+        filepath = os.path.join(app.config["UPLOADS_DIRECTORY"], filename)
+        if not os.path.exists(filepath):
+            flash("Selected file does not exist (any longer)", "alert-danger")
+            _errors = True
+
+    if _errors:
+        return redirect(url_for("species.populations.expression-data.upload_file"))
+
+    redisurl = app.config["REDIS_URL"]
+    with Redis.from_url(redisurl, decode_responses=True) as rconn:
+        job = jobs.launch_job(
+            jobs.build_file_verification_job(
+                rconn, app.config["SQL_URI"], redisurl,
+                species_id, filepath, filetype,# type: ignore[arg-type]
+                app.config["JOBS_TTL_SECONDS"]),
+            redisurl,
+            f"{app.config['UPLOADS_DIRECTORY']}/job_errors")
+
+    return redirect(url_for("species.populations.expression-data.parse_status",
+                            species_id=species_id,
+                            population_id=population_id,
+                            job_id=job["jobid"]))
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+    "status/<uuid:job_id>",
+    methods=["GET"])
+@require_login
+def parse_status(species_id: int, population_id: int, job_id: str):
+    "Retrieve the status of the job"
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        try:
+            job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+        except jobs.JobNotFound as _exc:
+            return render_template("no_such_job.html", job_id=job_id), 400
+
+    error_filename = jobs.error_filename(
+        job_id, f"{app.config['UPLOADS_DIRECTORY']}/job_errors")
+    if os.path.exists(error_filename):
+        stat = os.stat(error_filename)
+        if stat.st_size > 0:
+            return redirect(url_for("parse.fail", job_id=job_id))
+
+    job_id = job["jobid"]
+    progress = float(job["percent"])
+    status = job["status"]
+    filename = job.get("filename", "uploaded file")
+    _errors = jsonpickle.decode(
+        job.get("errors", jsonpickle.encode(tuple())))
+    if status in ("success", "aborted"):
+        return redirect(url_for("species.populations.expression-data.results",
+                                species_id=species_id,
+                                population_id=population_id,
+                                job_id=job_id))
+
+    if status == "parse-error":
+        return redirect(url_for("species.populations.expression-data.fail", job_id=job_id))
+
+    app.jinja_env.globals.update(
+        isinvalidvalue=isinvalidvalue,
+        isduplicateheading=isduplicateheading)
+    return render_template(
+        "expression-data/job-progress.html",
+        job_id = job_id,
+        job_status = status,
+        progress = progress,
+        message = job.get("message", ""),
+        job_name = f"Parsing '{filename}'",
+        errors=_errors,
+        species=with_db_connection(
+            lambda conn: species_by_id(conn, species_id)),
+        population=with_db_connection(
+            lambda conn: population_by_species_and_id(
+                conn, species_id, population_id)))
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+    "<uuid:job_id>/results",
+    methods=["GET"])
+@require_login
+def results(species_id: int, population_id: int, job_id: uuid.UUID):
+    """Show results of parsing..."""
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+    if job:
+        filename = job["filename"]
+        _errors = jsonpickle.decode(job.get("errors", jsonpickle.encode(tuple())))
+        app.jinja_env.globals.update(
+            isinvalidvalue=isinvalidvalue,
+            isduplicateheading=isduplicateheading)
+        return render_template(
+            "expression-data/parse-results.html",
+            errors=_errors,
+            job_name = f"Parsing '{filename}'",
+            user_aborted = job.get("user_aborted"),
+            job_id=job["jobid"],
+            species=with_db_connection(
+                lambda conn: species_by_id(conn, species_id)),
+            population=with_db_connection(
+                lambda conn: population_by_species_and_id(
+                    conn, species_id, population_id)))
+
+    return render_template("expression-data/no-such-job.html", job_id=job_id)
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+    "<uuid:job_id>/fail",
+    methods=["GET"])
+@require_login
+def fail(species_id: int, population_id: int, job_id: str):
+    """Handle parsing failure"""
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+    if job:
+        error_filename = jobs.error_filename(
+            job_id, f"{app.config['UPLOADS_DIRECTORY']}/job_errors")
+        if os.path.exists(error_filename):
+            stat = os.stat(error_filename)
+            if stat.st_size > 0:
+                return render_template(
+                    "worker_failure.html", job_id=job_id)
+
+        return render_template("parse_failure.html", job=job)
+
+    return render_template("expression-data/no-such-job.html",
+                           **with_db_connection(lambda conn: {
+                               "species_id": species_by_id(conn, species_id),
+                               "population_id": population_by_species_and_id(
+                                   conn, species_id, population_id)}),
+                           job_id=job_id)
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+    "abort",
+    methods=["POST"])
+@require_login
+def abort(species_id: int, population_id: int):
+    """Handle user request to abort file processing"""
+    job_id = request.form["job_id"]
+
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+        if job:
+            rconn.hset(name=jobs.job_key(jobs.jobsnamespace(), job_id),
+                       key="user_aborted",
+                       value=int(True))
+
+    return redirect(url_for("species.populations.expression-data.parse_status",
+                            species_id=species_id,
+                            population_id=population_id,
+                            job_id=job_id))