about summary refs log tree commit diff
path: root/uploader/expression_data
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-09-23 14:28:49 -0500
committerFrederick Muriuki Muriithi2024-09-23 16:35:38 -0500
commit0b37b9b3fa4fead86787a705713645fa14530a54 (patch)
tree83bba12d52f340ec39e16cae9547f325a01743a2 /uploader/expression_data
parent3bec3b312a1e235247f1431d4351db5efe7a785d (diff)
downloadgn-uploader-0b37b9b3fa4fead86787a705713645fa14530a54.tar.gz
Initialise the "Expression Data" section.
Diffstat (limited to 'uploader/expression_data')
-rw-r--r--uploader/expression_data/__init__.py11
-rw-r--r--uploader/expression_data/dbinsert.py406
-rw-r--r--uploader/expression_data/index.py125
-rw-r--r--uploader/expression_data/parse.py178
-rw-r--r--uploader/expression_data/views.py384
5 files changed, 791 insertions, 313 deletions
diff --git a/uploader/expression_data/__init__.py b/uploader/expression_data/__init__.py
index 206a764..fc8bd41 100644
--- a/uploader/expression_data/__init__.py
+++ b/uploader/expression_data/__init__.py
@@ -1,11 +1,2 @@
 """Package handling upload of files."""
-from flask import Blueprint
-
-from .rqtl2 import rqtl2
-from .index import indexbp
-from .parse import parsebp
-
-exprdatabp = Blueprint("expression-data", __name__)
-exprdatabp.register_blueprint(indexbp, url_prefix="/")
-exprdatabp.register_blueprint(rqtl2, url_prefix="/rqtl2")
-exprdatabp.register_blueprint(parsebp, url_prefix="/parse")
+from .views import exprdatabp
diff --git a/uploader/expression_data/dbinsert.py b/uploader/expression_data/dbinsert.py
new file mode 100644
index 0000000..2116031
--- /dev/null
+++ b/uploader/expression_data/dbinsert.py
@@ -0,0 +1,406 @@
+"Handle inserting data into the database"
+import os
+import json
+from typing import Union
+from functools import reduce
+from datetime import datetime
+
+from redis import Redis
+from MySQLdb.cursors import DictCursor
+from flask import (
+    flash, request, url_for, Blueprint, redirect, render_template,
+    current_app as app)
+
+from uploader.authorisation import require_login
+from uploader.population.models import populations_by_species
+from uploader.db_utils import with_db_connection, database_connection
+from uploader.species.models import species_by_id, all_species as species
+
+from . import jobs
+
+dbinsertbp = Blueprint("dbinsert", __name__)
+
+def render_error(error_msg):
+    "Render the generic error page"
+    return render_template("dbupdate_error.html", error_message=error_msg), 400
+
+def make_menu_items_grouper(grouping_fn=lambda item: item):
+    "Build function to be used to group menu items."
+    def __grouper__(acc, row):
+        grouping = grouping_fn(row[2])
+        row_values = (row[0].strip(), row[1].strip())
+        if acc.get(grouping) is None:
+            return {**acc, grouping: (row_values,)}
+        return {**acc, grouping: (acc[grouping] + (row_values,))}
+    return __grouper__
+
+def genechips():
+    "Retrieve the genechip information from the database"
+    def __organise_by_species__(acc, chip):
+        speciesid = chip["SpeciesId"]
+        if acc.get(speciesid) is None:
+            return {**acc, speciesid: (chip,)}
+        return {**acc, speciesid: acc[speciesid] + (chip,)}
+
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute("SELECT * FROM GeneChip ORDER BY GeneChipName ASC")
+            return reduce(__organise_by_species__, cursor.fetchall(), {})
+
+    return {}
+
+def platform_by_id(genechipid:int) -> Union[dict, None]:
+    "Retrieve the gene platform by id"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(
+                "SELECT * FROM GeneChip WHERE GeneChipId=%s",
+                (genechipid,))
+            return cursor.fetchone()
+
+def studies_by_species_and_platform(speciesid:int, genechipid:int) -> tuple:
+    "Retrieve the studies by the related species and gene platform"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            query = (
+                "SELECT Species.SpeciesId, ProbeFreeze.* "
+                "FROM Species INNER JOIN InbredSet "
+                "ON Species.SpeciesId=InbredSet.SpeciesId "
+                "INNER JOIN ProbeFreeze "
+                "ON InbredSet.InbredSetId=ProbeFreeze.InbredSetId "
+                "WHERE Species.SpeciesId = %s "
+                "AND ProbeFreeze.ChipId = %s")
+            cursor.execute(query, (speciesid, genechipid))
+            return tuple(cursor.fetchall())
+
+    return tuple()
+
+def organise_groups_by_family(acc:dict, group:dict) -> dict:
+    "Organise the group (InbredSet) information by the group field"
+    family = group["Family"]
+    if acc.get(family):
+        return {**acc, family: acc[family] + (group,)}
+    return {**acc, family: (group,)}
+
+def tissues() -> tuple:
+    "Retrieve type (Tissue) information from the database."
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute("SELECT * FROM Tissue ORDER BY Name")
+            return tuple(cursor.fetchall())
+
+    return tuple()
+
+@dbinsertbp.route("/platform", methods=["POST"])
+@require_login
+def select_platform():
+    "Select the platform (GeneChipId) used for the data."
+    job_id = request.form["job_id"]
+    with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+          database_connection(app.config["SQL_URI"]) as conn):
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+        if job:
+            filename = job["filename"]
+            filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}"
+            if os.path.exists(filepath):
+                default_species = 1
+                gchips = genechips()
+                return render_template(
+                    "select_platform.html", filename=filename,
+                    filetype=job["filetype"], totallines=int(job["currentline"]),
+                    default_species=default_species, species=species(conn),
+                    genechips=gchips[default_species],
+                    genechips_data=json.dumps(gchips))
+            return render_error(f"File '{filename}' no longer exists.")
+        return render_error(f"Job '{job_id}' no longer exists.")
+    return render_error("Unknown error")
+
+@dbinsertbp.route("/study", methods=["POST"])
+@require_login
+def select_study():
+    "View to select/create the study (ProbeFreeze) associated with the data."
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+
+        speciesid = form["species"]
+        genechipid = form["genechipid"]
+
+        the_studies = studies_by_species_and_platform(speciesid, genechipid)
+        the_groups = reduce(
+            organise_groups_by_family,
+            with_db_connection(
+                lambda conn: populations_by_species(conn, speciesid)),
+            {})
+        return render_template(
+            "select_study.html", filename=form["filename"],
+            filetype=form["filetype"], totallines=form["totallines"],
+            species=speciesid, genechipid=genechipid, studies=the_studies,
+            groups=the_groups, tissues = tissues(),
+            selected_group=int(form.get("inbredsetid", -13)),
+            selected_tissue=int(form.get("tissueid", -13)))
+    except AssertionError as aserr:
+        return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/create-study", methods=["POST"])
+@require_login
+def create_study():
+    "Create a new study (ProbeFreeze)."
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyname"), "study name"
+        assert form.get("inbredsetid"), "group"
+        assert form.get("tissueid"), "type/tissue"
+
+        with database_connection(app.config["SQL_URI"]) as conn:
+            with conn.cursor(cursorclass=DictCursor) as cursor:
+                values = (
+                    form["genechipid"],
+                    form["tissueid"],
+                    form["studyname"],
+                    form.get("studyfullname", ""),
+                    form.get("studyshortname", ""),
+                    datetime.now().date().strftime("%Y-%m-%d"),
+                    form["inbredsetid"])
+                query = (
+                    "INSERT INTO ProbeFreeze("
+                    "ChipId, TissueId, Name, FullName, ShortName, CreateTime, "
+                    "InbredSetId"
+                    ") VALUES (%s, %s, %s, %s, %s, %s, %s)")
+                cursor.execute(query, values)
+                new_studyid = cursor.lastrowid
+                cursor.execute(
+                    "UPDATE ProbeFreeze SET ProbeFreezeId=%s WHERE Id=%s",
+                    (new_studyid, new_studyid))
+                flash("Study created successfully", "alert-success")
+                return render_template(
+                    "continue_from_create_study.html",
+                    filename=form["filename"], filetype=form["filetype"],
+                    totallines=form["totallines"], species=form["species"],
+                    genechipid=form["genechipid"], studyid=new_studyid)
+    except AssertionError as aserr:
+        flash(f"Missing data: {aserr.args[0]}", "alert-error")
+        return redirect(url_for("dbinsert.select_study"), code=307)
+
+def datasets_by_study(studyid:int) -> tuple:
+    "Retrieve datasets associated with a study with the ID `studyid`."
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            query = "SELECT * FROM ProbeSetFreeze WHERE ProbeFreezeId=%s"
+            cursor.execute(query, (studyid,))
+            return tuple(cursor.fetchall())
+
+    return tuple()
+
+def averaging_methods() -> tuple:
+    "Retrieve averaging methods from database"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute("SELECT * FROM AvgMethod")
+            return tuple(cursor.fetchall())
+
+    return tuple()
+
+def dataset_datascales() -> tuple:
+    "Retrieve datascales from database"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor() as cursor:
+            cursor.execute(
+                'SELECT DISTINCT DataScale FROM ProbeSetFreeze '
+                'WHERE DataScale IS NOT NULL AND DataScale != ""')
+            return tuple(
+                item for item in
+                (res[0].strip() for res in cursor.fetchall())
+                if (item is not None and item != ""))
+
+    return tuple()
+
+@dbinsertbp.route("/dataset", methods=["POST"])
+@require_login
+def select_dataset():
+    "Select the dataset to add the file contents against"
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyid"), "study"
+
+        studyid = form["studyid"]
+        datasets = datasets_by_study(studyid)
+        return render_template(
+            "select_dataset.html", **{**form, "studyid": studyid},
+            datasets=datasets, avgmethods=averaging_methods(),
+            datascales=dataset_datascales())
+    except AssertionError as aserr:
+        return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/create-dataset", methods=["POST"])
+@require_login
+def create_dataset():
+    "Select the dataset to add the file contents against"
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyid"), "study"
+        assert form.get("avgid"), "averaging method"
+        assert form.get("datasetname2"), "Dataset Name 2"
+        assert form.get("datasetfullname"), "Dataset Full Name"
+        assert form.get("datasetshortname"), "Dataset Short Name"
+        assert form.get("datasetpublic"), "Dataset public specification"
+        assert form.get("datasetconfidentiality"), "Dataset confidentiality"
+        assert form.get("datasetdatascale"), "Dataset Datascale"
+
+        with database_connection(app.config["SQL_URI"]) as conn:
+            with conn.cursor(cursorclass=DictCursor) as cursor:
+                datasetname = form["datasetname"]
+                cursor.execute("SELECT * FROM ProbeSetFreeze WHERE Name=%s",
+                               (datasetname,))
+                results = cursor.fetchall()
+                if bool(results):
+                    flash("A dataset with that name already exists.",
+                          "alert-error")
+                    return redirect(url_for("dbinsert.select_dataset"), code=307)
+                values = (
+                    form["studyid"], form["avgid"],
+                    datasetname, form["datasetname2"],
+                    form["datasetfullname"], form["datasetshortname"],
+                    datetime.now().date().strftime("%Y-%m-%d"),
+                    form["datasetpublic"], form["datasetconfidentiality"],
+                    "williamslab", form["datasetdatascale"])
+                query = (
+                    "INSERT INTO ProbeSetFreeze("
+                    "ProbeFreezeId, AvgID, Name, Name2, FullName, "
+                    "ShortName, CreateTime, OrderList, public, "
+                    "confidentiality, AuthorisedUsers, DataScale) "
+                    "VALUES"
+                    "(%s, %s, %s, %s, %s, %s, %s, NULL, %s, %s, %s, %s)")
+                cursor.execute(query, values)
+                new_datasetid = cursor.lastrowid
+                return render_template(
+                    "continue_from_create_dataset.html",
+                    filename=form["filename"], filetype=form["filetype"],
+                    species=form["species"], genechipid=form["genechipid"],
+                    studyid=form["studyid"], datasetid=new_datasetid,
+                    totallines=form["totallines"])
+    except AssertionError as aserr:
+        flash(f"Missing data {aserr.args[0]}", "alert-error")
+        return redirect(url_for("dbinsert.select_dataset"), code=307)
+
+def study_by_id(studyid:int) -> Union[dict, None]:
+    "Get a study by its Id"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(
+                "SELECT * FROM ProbeFreeze WHERE Id=%s",
+                (studyid,))
+            return cursor.fetchone()
+
+def dataset_by_id(datasetid:int) -> Union[dict, None]:
+    "Retrieve a dataset by its id"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(
+                ("SELECT AvgMethod.Name AS AvgMethodName, ProbeSetFreeze.* "
+                 "FROM ProbeSetFreeze INNER JOIN AvgMethod "
+                 "ON ProbeSetFreeze.AvgId=AvgMethod.AvgMethodId "
+                 "WHERE ProbeSetFreeze.Id=%s"),
+                (datasetid,))
+            return cursor.fetchone()
+
+def selected_keys(original: dict, keys: tuple) -> dict:
+    "Return a new dict from the `original` dict with only `keys` present."
+    return {key: value for key,value in original.items() if key in keys}
+
+@dbinsertbp.route("/final-confirmation", methods=["POST"])
+@require_login
+def final_confirmation():
+    "Preview the data before triggering entry into the database"
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyid"), "study"
+        assert form.get("datasetid"), "dataset"
+
+        speciesid = form["species"]
+        genechipid = form["genechipid"]
+        studyid = form["studyid"]
+        datasetid=form["datasetid"]
+        return render_template(
+            "final_confirmation.html", filename=form["filename"],
+            filetype=form["filetype"], totallines=form["totallines"],
+            species=speciesid, genechipid=genechipid, studyid=studyid,
+            datasetid=datasetid, the_species=selected_keys(
+                with_db_connection(lambda conn: species_by_id(conn, speciesid)),
+                ("SpeciesName", "Name", "MenuName")),
+            platform=selected_keys(
+                platform_by_id(genechipid),
+                ("GeneChipName", "Name", "GeoPlatform", "Title", "GO_tree_value")),
+            study=selected_keys(
+                study_by_id(studyid), ("Name", "FullName", "ShortName")),
+            dataset=selected_keys(
+                dataset_by_id(datasetid),
+                ("AvgMethodName", "Name", "Name2", "FullName", "ShortName",
+                 "DataScale")))
+    except AssertionError as aserr:
+        return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/insert-data", methods=["POST"])
+@require_login
+def insert_data():
+    "Trigger data insertion"
+    form = request.form
+    try:
+        assert form.get("filename"), "filename"
+        assert form.get("filetype"), "filetype"
+        assert form.get("species"), "species"
+        assert form.get("genechipid"), "platform"
+        assert form.get("studyid"), "study"
+        assert form.get("datasetid"), "dataset"
+
+        filename = form["filename"]
+        filepath = f"{app.config['UPLOAD_FOLDER']}/{filename}"
+        redisurl = app.config["REDIS_URL"]
+        if os.path.exists(filepath):
+            with Redis.from_url(redisurl, decode_responses=True) as rconn:
+                job = jobs.launch_job(
+                    jobs.data_insertion_job(
+                        rconn, filepath, form["filetype"], form["totallines"],
+                        form["species"], form["genechipid"], form["datasetid"],
+                        app.config["SQL_URI"], redisurl,
+                        app.config["JOBS_TTL_SECONDS"]),
+                    redisurl, f"{app.config['UPLOAD_FOLDER']}/job_errors")
+
+            return redirect(url_for("dbinsert.insert_status", job_id=job["jobid"]))
+        return render_error(f"File '{filename}' no longer exists.")
+    except AssertionError as aserr:
+        return render_error(f"Missing data: {aserr.args[0]}")
+
+@dbinsertbp.route("/status/<job_id>", methods=["GET"])
+def insert_status(job_id: str):
+    "Retrieve status of data insertion."
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+    if job:
+        job_status = job["status"]
+        if job_status == "success":
+            return render_template("insert_success.html", job=job)
+        if job["status"] == "error":
+            return render_template("insert_error.html", job=job)
+        return render_template("insert_progress.html", job=job)
+    return render_template("no_such_job.html", job_id=job_id), 400
diff --git a/uploader/expression_data/index.py b/uploader/expression_data/index.py
deleted file mode 100644
index db23136..0000000
--- a/uploader/expression_data/index.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""Entry-point module"""
-import os
-import mimetypes
-from typing import Tuple
-from zipfile import ZipFile, is_zipfile
-
-from werkzeug.utils import secure_filename
-from flask import (
-    flash,
-    request,
-    url_for,
-    redirect,
-    Blueprint,
-    render_template,
-    current_app as app)
-
-from uploader.species.models import all_species as species
-from uploader.authorisation import require_login
-from uploader.db_utils import with_db_connection
-
-indexbp = Blueprint("index", __name__)
-
-
-def errors(rqst) -> Tuple[str, ...]:
-    """Return a tuple of the errors found in the request `rqst`. If no error is
-    found, then an empty tuple is returned."""
-    def __filetype_error__():
-        return (
-            ("Invalid file type provided.",)
-            if rqst.form.get("filetype") not in ("average", "standard-error")
-            else tuple())
-
-    def __file_missing_error__():
-        return (
-            ("No file was uploaded.",)
-            if ("qc_text_file" not in rqst.files or
-                rqst.files["qc_text_file"].filename == "")
-            else tuple())
-
-    def __file_mimetype_error__():
-        text_file = rqst.files["qc_text_file"]
-        return (
-            (
-                ("Invalid file! Expected a tab-separated-values file, or a zip "
-                 "file of the a tab-separated-values file."),)
-            if text_file.mimetype not in (
-                    "text/plain", "text/tab-separated-values",
-                    "application/zip")
-            else tuple())
-
-    return (
-        __filetype_error__() +
-        (__file_missing_error__() or __file_mimetype_error__()))
-
-def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
-    """Check the uploaded zip file for errors."""
-    zfile_errors: Tuple[str, ...] = tuple()
-    if is_zipfile(filepath):
-        with ZipFile(filepath, "r") as zfile:
-            infolist = zfile.infolist()
-            if len(infolist) != 1:
-                zfile_errors = zfile_errors + (
-                    ("Expected exactly one (1) member file within the uploaded zip "
-                     f"file. Got {len(infolist)} member files."),)
-            if len(infolist) == 1 and infolist[0].is_dir():
-                zfile_errors = zfile_errors + (
-                    ("Expected a member text file in the uploaded zip file. Got a "
-                     "directory/folder."),)
-
-            if len(infolist) == 1 and not infolist[0].is_dir():
-                zfile.extract(infolist[0], path=upload_dir)
-                mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
-                if mime[0] != "text/tab-separated-values":
-                    zfile_errors = zfile_errors + (
-                        ("Expected the member text file in the uploaded zip file to"
-                         " be a tab-separated file."),)
-
-    return zfile_errors
-
-
-@indexbp.route("/", methods=["GET"])
-@require_login
-def index():
-    """Display the expression data index page."""
-    return render_template("expression-data/index.html")
-
-
-@indexbp.route("/upload", methods=["GET", "POST"])
-@require_login
-def upload_file():
-    """Enables uploading the files"""
-    if request.method == "GET":
-        return render_template(
-            "select_species.html", species=with_db_connection(species))
-
-    upload_dir = app.config["UPLOAD_FOLDER"]
-    request_errors = errors(request)
-    if request_errors:
-        for error in request_errors:
-            flash(error, "alert-danger error-expr-data")
-        return redirect(url_for("expression-data.index.upload_file"))
-
-    filename = secure_filename(request.files["qc_text_file"].filename)
-    if not os.path.exists(upload_dir):
-        os.mkdir(upload_dir)
-
-    filepath = os.path.join(upload_dir, filename)
-    request.files["qc_text_file"].save(os.path.join(upload_dir, filename))
-
-    zip_errors = zip_file_errors(filepath, upload_dir)
-    if zip_errors:
-        for error in zip_errors:
-            flash(error, "alert-danger error-expr-data")
-        return redirect(url_for("expression-data.index.upload_file"))
-
-    return redirect(url_for("expression-data.parse.parse",
-                            speciesid=request.form["speciesid"],
-                            filename=filename,
-                            filetype=request.form["filetype"]))
-
-@indexbp.route("/data-review", methods=["GET"])
-@require_login
-def data_review():
-    """Provide some help on data expectations to the user."""
-    return render_template("data_review.html")
diff --git a/uploader/expression_data/parse.py b/uploader/expression_data/parse.py
deleted file mode 100644
index fc1c3f0..0000000
--- a/uploader/expression_data/parse.py
+++ /dev/null
@@ -1,178 +0,0 @@
-"""File parsing module"""
-import os
-
-import jsonpickle
-from redis import Redis
-from flask import flash, request, url_for, redirect, Blueprint, render_template
-from flask import current_app as app
-
-from quality_control.errors import InvalidValue, DuplicateHeading
-
-from uploader import jobs
-from uploader.dbinsert import species_by_id
-from uploader.db_utils import with_db_connection
-from uploader.authorisation import require_login
-
-parsebp = Blueprint("parse", __name__)
-
-def isinvalidvalue(item):
-    """Check whether item is of type InvalidValue"""
-    return isinstance(item, InvalidValue)
-
-def isduplicateheading(item):
-    """Check whether item is of type DuplicateHeading"""
-    return isinstance(item, DuplicateHeading)
-
-@parsebp.route("/parse", methods=["GET"])
-@require_login
-def parse():
-    """Trigger file parsing"""
-    errors = False
-    speciesid = request.args.get("speciesid")
-    filename = request.args.get("filename")
-    filetype = request.args.get("filetype")
-    if speciesid is None:
-        flash("No species selected", "alert-error error-expr-data")
-        errors = True
-    else:
-        try:
-            speciesid = int(speciesid)
-            species = with_db_connection(
-                lambda con: species_by_id(con, speciesid))
-            if not bool(species):
-                flash("No such species.", "alert-error error-expr-data")
-                errors = True
-        except ValueError:
-            flash("Invalid speciesid provided. Expected an integer.",
-                  "alert-error error-expr-data")
-            errors = True
-
-    if filename is None:
-        flash("No file provided", "alert-error error-expr-data")
-        errors = True
-
-    if filetype is None:
-        flash("No filetype provided", "alert-error error-expr-data")
-        errors = True
-
-    if filetype not in ("average", "standard-error"):
-        flash("Invalid filetype provided", "alert-error error-expr-data")
-        errors = True
-
-    if filename:
-        filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
-        if not os.path.exists(filepath):
-            flash("Selected file does not exist (any longer)",
-                  "alert-error error-expr-data")
-            errors = True
-
-    if errors:
-        return redirect(url_for("expression-data.index.upload_file"))
-
-    redisurl = app.config["REDIS_URL"]
-    with Redis.from_url(redisurl, decode_responses=True) as rconn:
-        job = jobs.launch_job(
-            jobs.build_file_verification_job(
-                rconn, app.config["SQL_URI"], redisurl,
-                speciesid, filepath, filetype,
-                app.config["JOBS_TTL_SECONDS"]),
-            redisurl,
-            f"{app.config['UPLOAD_FOLDER']}/job_errors")
-
-    return redirect(url_for("expression-data.parse.parse_status", job_id=job["jobid"]))
-
-@parsebp.route("/status/<job_id>", methods=["GET"])
-def parse_status(job_id: str):
-    "Retrieve the status of the job"
-    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
-        try:
-            job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
-        except jobs.JobNotFound as _exc:
-            return render_template("no_such_job.html", job_id=job_id), 400
-
-    error_filename = jobs.error_filename(
-        job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
-    if os.path.exists(error_filename):
-        stat = os.stat(error_filename)
-        if stat.st_size > 0:
-            return redirect(url_for("parse.fail", job_id=job_id))
-
-    job_id = job["jobid"]
-    progress = float(job["percent"])
-    status = job["status"]
-    filename = job.get("filename", "uploaded file")
-    errors = jsonpickle.decode(
-        job.get("errors", jsonpickle.encode(tuple())))
-    if status in ("success", "aborted"):
-        return redirect(url_for("expression-data.parse.results", job_id=job_id))
-
-    if status == "parse-error":
-        return redirect(url_for("parse.fail", job_id=job_id))
-
-    app.jinja_env.globals.update(
-        isinvalidvalue=isinvalidvalue,
-        isduplicateheading=isduplicateheading)
-    return render_template(
-        "job_progress.html",
-        job_id = job_id,
-        job_status = status,
-        progress = progress,
-        message = job.get("message", ""),
-        job_name = f"Parsing '{filename}'",
-        errors=errors)
-
-@parsebp.route("/results/<job_id>", methods=["GET"])
-def results(job_id: str):
-    """Show results of parsing..."""
-    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
-        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
-
-    if job:
-        filename = job["filename"]
-        errors = jsonpickle.decode(job.get("errors", jsonpickle.encode(tuple())))
-        app.jinja_env.globals.update(
-            isinvalidvalue=isinvalidvalue,
-            isduplicateheading=isduplicateheading)
-        return render_template(
-            "parse_results.html",
-            errors=errors,
-            job_name = f"Parsing '{filename}'",
-            user_aborted = job.get("user_aborted"),
-            job_id=job["jobid"])
-
-    return render_template("no_such_job.html", job_id=job_id)
-
-@parsebp.route("/fail/<job_id>", methods=["GET"])
-def fail(job_id: str):
-    """Handle parsing failure"""
-    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
-        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
-
-    if job:
-        error_filename = jobs.error_filename(
-            job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
-        if os.path.exists(error_filename):
-            stat = os.stat(error_filename)
-            if stat.st_size > 0:
-                return render_template(
-                    "worker_failure.html", job_id=job_id)
-
-        return render_template("parse_failure.html", job=job)
-
-    return render_template("no_such_job.html", job_id=job_id)
-
-@parsebp.route("/abort", methods=["POST"])
-@require_login
-def abort():
-    """Handle user request to abort file processing"""
-    job_id = request.form["job_id"]
-
-    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
-        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
-
-        if job:
-            rconn.hset(name=jobs.job_key(jobs.jobsnamespace(), job_id),
-                       key="user_aborted",
-                       value=int(True))
-
-    return redirect(url_for("expression-data.parse.parse_status", job_id=job_id))
diff --git a/uploader/expression_data/views.py b/uploader/expression_data/views.py
new file mode 100644
index 0000000..6900c51
--- /dev/null
+++ b/uploader/expression_data/views.py
@@ -0,0 +1,384 @@
+"""Views for expression data"""
+import os
+import uuid
+import mimetypes
+from typing import Tuple
+from zipfile import ZipFile, is_zipfile
+
+import jsonpickle
+from redis import Redis
+from werkzeug.utils import secure_filename
+from flask import (flash,
+                   request,
+                   url_for,
+                   redirect,
+                   Blueprint,
+                   current_app as app)
+
+from quality_control.errors import InvalidValue, DuplicateHeading
+
+from uploader import jobs
+from uploader.ui import make_template_renderer
+from uploader.authorisation import require_login
+from uploader.species.models import all_species, species_by_id
+from uploader.db_utils import with_db_connection, database_connection
+from uploader.datautils import safe_int, order_by_family, enumerate_sequence
+from uploader.population.models import (populations_by_species,
+                                        population_by_species_and_id)
+
+exprdatabp = Blueprint("expression-data", __name__)
+render_template = make_template_renderer("expression-data")
+
+def isinvalidvalue(item):
+    """Check whether item is of type InvalidValue"""
+    return isinstance(item, InvalidValue)
+
+
+def isduplicateheading(item):
+    """Check whether item is of type DuplicateHeading"""
+    return isinstance(item, DuplicateHeading)
+
+
+def errors(rqst) -> Tuple[str, ...]:
+    """Return a tuple of the errors found in the request `rqst`. If no error is
+    found, then an empty tuple is returned."""
+    def __filetype_error__():
+        return (
+            ("Invalid file type provided.",)
+            if rqst.form.get("filetype") not in ("average", "standard-error")
+            else tuple())
+
+    def __file_missing_error__():
+        return (
+            ("No file was uploaded.",)
+            if ("qc_text_file" not in rqst.files or
+                rqst.files["qc_text_file"].filename == "")
+            else tuple())
+
+    def __file_mimetype_error__():
+        text_file = rqst.files["qc_text_file"]
+        return (
+            (
+                ("Invalid file! Expected a tab-separated-values file, or a zip "
+                 "file of the a tab-separated-values file."),)
+            if text_file.mimetype not in (
+                    "text/plain", "text/tab-separated-values",
+                    "application/zip")
+            else tuple())
+
+    return (
+        __filetype_error__() +
+        (__file_missing_error__() or __file_mimetype_error__()))
+
+
+def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
+    """Check the uploaded zip file for errors."""
+    zfile_errors: Tuple[str, ...] = tuple()
+    if is_zipfile(filepath):
+        with ZipFile(filepath, "r") as zfile:
+            infolist = zfile.infolist()
+            if len(infolist) != 1:
+                zfile_errors = zfile_errors + (
+                    ("Expected exactly one (1) member file within the uploaded zip "
+                     f"file. Got {len(infolist)} member files."),)
+            if len(infolist) == 1 and infolist[0].is_dir():
+                zfile_errors = zfile_errors + (
+                    ("Expected a member text file in the uploaded zip file. Got a "
+                     "directory/folder."),)
+
+            if len(infolist) == 1 and not infolist[0].is_dir():
+                zfile.extract(infolist[0], path=upload_dir)
+                mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
+                if mime[0] != "text/tab-separated-values":
+                    zfile_errors = zfile_errors + (
+                        ("Expected the member text file in the uploaded zip file to"
+                         " be a tab-separated file."),)
+
+    return zfile_errors
+
+
+@exprdatabp.route("populations/expression-data", methods=["GET"])
+@require_login
+def index():
+    """Display the expression data index page."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        if not bool(request.args.get("species_id")):
+            return render_template("expression-data/index.html",
+                                   species=order_by_family(all_species(conn)),
+                                   activelink="genotypes")
+        species = species_by_id(conn, request.args.get("species_id"))
+        if not bool(species):
+            flash(f"Could not find species with ID '{request.args.get('species_id')}'!",
+                  "alert-danger")
+            return redirect(url_for("species.populations.expression-data.index"))
+        return redirect(url_for("species.populations.expression-data.select_population",
+                                species_id=species["SpeciesId"]))
+    return render_template()
+
+
+@exprdatabp.route("<int:species_id>/populations/expression-data/select-population",
+                  methods=["GET"])
+@require_login
+def select_population(species_id: int):
+    """Select the expression data's population."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        if not bool(species):
+            flash("Invalid species provided!", "alert-danger")
+            return redirect(url_for("species.populations.expression-data.index"))
+
+        if not bool(request.args.get("population_id")):
+            return render_template("expression-data/select-population.html",
+                                   species=species,
+                                   populations=order_by_family(
+                                       populations_by_species(conn, species_id),
+                                       order_key="FamilyOrder"),
+                                   activelink="genotypes")
+
+        population = population_by_species_and_id(
+            conn, species_id, request.args.get("population_id"))
+        if not bool(population):
+            flash("Invalid population selected!", "alert-danger")
+            return redirect(url_for(
+                "species.populations.expression-data.select_population",
+                species_id=species_id))
+
+        return redirect(url_for("species.populations.expression-data.upload_file",
+                                species_id=species_id,
+                                population_id=population["Id"]))
+
+
+@exprdatabp.route("<int:species_id>/populations/<int:population_id>/"
+                  "expression-data/upload",
+                  methods=["GET", "POST"])
+@require_login
+def upload_file(species_id: int, population_id: int):
+    """Enables uploading the files"""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        population = population_by_species_and_id(conn, species_id, population_id)
+        if request.method == "GET":
+            return render_template("expression-data/select-file.html",
+                                   species=species,
+                                   population=population)
+
+        upload_dir = app.config["UPLOAD_FOLDER"]
+        request_errors = errors(request)
+        if request_errors:
+            for error in request_errors:
+                flash(error, "alert-danger error-expr-data")
+            return redirect(url_for("species.populations.expression-data.upload_file"))
+
+        filename = secure_filename(request.files["qc_text_file"].filename)
+        if not os.path.exists(upload_dir):
+            os.mkdir(upload_dir)
+
+        filepath = os.path.join(upload_dir, filename)
+        request.files["qc_text_file"].save(os.path.join(upload_dir, filename))
+
+        zip_errors = zip_file_errors(filepath, upload_dir)
+        if zip_errors:
+            for error in zip_errors:
+                flash(error, "alert-danger error-expr-data")
+            return redirect(url_for("species.populations.expression-data.index.upload_file"))
+
+        return redirect(url_for("species.populations.expression-data.parse_file",
+                                species_id=species_id,
+                                population_id=population_id,
+                                filename=filename,
+                                filetype=request.form["filetype"]))
+
+
+@exprdatabp.route("/data-review", methods=["GET"])
+@require_login
+def data_review():
+    """Provide some help on data expectations to the user."""
+    return render_template("expression-data/data-review.html")
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse",
+    methods=["GET"])
+@require_login
+def parse_file(species_id: int, population_id: int):
+    """Trigger file parsing"""
+    errors = False
+    filename = request.args.get("filename")
+    filetype = request.args.get("filetype")
+
+    species = with_db_connection(lambda con: species_by_id(con, species_id))
+    if not bool(species):
+        flash("No such species.", "alert-danger")
+        errors = True
+
+    if filename is None:
+        flash("No file provided", "alert-danger")
+        errors = True
+
+    if filetype is None:
+        flash("No filetype provided", "alert-danger")
+        errors = True
+
+    if filetype not in ("average", "standard-error"):
+        flash("Invalid filetype provided", "alert-danger")
+        errors = True
+
+    if filename:
+        filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
+        if not os.path.exists(filepath):
+            flash("Selected file does not exist (any longer)", "alert-danger")
+            errors = True
+
+    if errors:
+        return redirect(url_for("species.populations.expression-data.upload_file"))
+
+    redisurl = app.config["REDIS_URL"]
+    with Redis.from_url(redisurl, decode_responses=True) as rconn:
+        job = jobs.launch_job(
+            jobs.build_file_verification_job(
+                rconn, app.config["SQL_URI"], redisurl,
+                species_id, filepath, filetype,
+                app.config["JOBS_TTL_SECONDS"]),
+            redisurl,
+            f"{app.config['UPLOAD_FOLDER']}/job_errors")
+
+    return redirect(url_for("species.populations.expression-data.parse_status",
+                            species_id=species_id,
+                            population_id=population_id,
+                            job_id=job["jobid"]))
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+    "status/<uuid:job_id>",
+    methods=["GET"])
+@require_login
+def parse_status(species_id: int, population_id: int, job_id: str):
+    "Retrieve the status of the job"
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        try:
+            job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+        except jobs.JobNotFound as _exc:
+            return render_template("no_such_job.html", job_id=job_id), 400
+
+    error_filename = jobs.error_filename(
+        job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
+    if os.path.exists(error_filename):
+        stat = os.stat(error_filename)
+        if stat.st_size > 0:
+            return redirect(url_for("parse.fail", job_id=job_id))
+
+    job_id = job["jobid"]
+    progress = float(job["percent"])
+    status = job["status"]
+    filename = job.get("filename", "uploaded file")
+    errors = jsonpickle.decode(
+        job.get("errors", jsonpickle.encode(tuple())))
+    if status in ("success", "aborted"):
+        return redirect(url_for("species.populations.expression-data.results",
+                                species_id=species_id,
+                                population_id=population_id,
+                                job_id=job_id))
+
+    if status == "parse-error":
+        return redirect(url_for("species.populations.expression-data.fail", job_id=job_id))
+
+    app.jinja_env.globals.update(
+        isinvalidvalue=isinvalidvalue,
+        isduplicateheading=isduplicateheading)
+    return render_template(
+        "expression-data/job-progress.html",
+        job_id = job_id,
+        job_status = status,
+        progress = progress,
+        message = job.get("message", ""),
+        job_name = f"Parsing '{filename}'",
+        errors=errors,
+        species=with_db_connection(
+            lambda conn: species_by_id(conn, species_id)),
+        population=with_db_connection(
+            lambda conn: population_by_species_and_id(
+                conn, species_id, population_id)))
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+    "<uuid:job_id>/results",
+    methods=["GET"])
+@require_login
+def results(species_id: int, population_id: int, job_id: uuid.UUID):
+    """Show results of parsing..."""
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+    if job:
+        filename = job["filename"]
+        errors = jsonpickle.decode(job.get("errors", jsonpickle.encode(tuple())))
+        app.jinja_env.globals.update(
+            isinvalidvalue=isinvalidvalue,
+            isduplicateheading=isduplicateheading)
+        return render_template(
+            "expression-data/parse-results.html",
+            errors=errors,
+            job_name = f"Parsing '{filename}'",
+            user_aborted = job.get("user_aborted"),
+            job_id=job["jobid"],
+            species=with_db_connection(
+                lambda conn: species_by_id(conn, species_id)),
+            population=with_db_connection(
+                lambda conn: population_by_species_and_id(
+                    conn, species_id, population_id)))
+
+    return render_template("expression-data/no-such-job.html", job_id=job_id)
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+    "<uuid:job_id>/fail",
+    methods=["GET"])
+@require_login
+def fail(species_id: int, population_id: int, job_id: str):
+    """Handle parsing failure"""
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+    if job:
+        error_filename = jobs.error_filename(
+            job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
+        if os.path.exists(error_filename):
+            stat = os.stat(error_filename)
+            if stat.st_size > 0:
+                return render_template(
+                    "worker_failure.html", job_id=job_id)
+
+        return render_template("parse_failure.html", job=job)
+
+    return render_template("expression-data/no-such-job.html",
+                           **with_db_connection(lambda conn: {
+                               "species_id": species_by_id(conn, species_id),
+                               "population_id": population_by_species_and_id(
+                                   conn, species_id, population_id)}),
+                           job_id=job_id)
+
+
+@exprdatabp.route(
+    "<int:species_id>/populations/<int:population_id>/expression-data/parse/"
+    "abort",
+    methods=["POST"])
+@require_login
+def abort(species_id: int, population_id: int):
+    """Handle user request to abort file processing"""
+    job_id = request.form["job_id"]
+
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+        if job:
+            rconn.hset(name=jobs.job_key(jobs.jobsnamespace(), job_id),
+                       key="user_aborted",
+                       value=int(True))
+
+    return redirect(url_for("species.populations.expression-data.parse_status",
+                            species_id=species_id,
+                            population_id=population_id,
+                            job_id=job_id))