about summary refs log tree commit diff
path: root/uploader/expression_data
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-08-28 17:12:26 -0500
committerFrederick Muriuki Muriithi2024-08-28 17:54:17 -0500
commit06c6a7f7f42e8ff2d33a934ff695efde24d26d65 (patch)
tree0ab6115fa7a8ee490cc6efc343c44549c1871281 /uploader/expression_data
parent05191fa146fac31fd079c50bf6bcc4983f2f0792 (diff)
downloadgn-uploader-06c6a7f7f42e8ff2d33a934ff695efde24d26d65.tar.gz
Move code handling expression data upload into new module.
Diffstat (limited to 'uploader/expression_data')
-rw-r--r--uploader/expression_data/__init__.py13
-rw-r--r--uploader/expression_data/index.py126
-rw-r--r--uploader/expression_data/parse.py178
-rw-r--r--uploader/expression_data/rqtl2.py1176
-rw-r--r--uploader/expression_data/samples.py359
5 files changed, 1852 insertions, 0 deletions
diff --git a/uploader/expression_data/__init__.py b/uploader/expression_data/__init__.py
new file mode 100644
index 0000000..b773bce
--- /dev/null
+++ b/uploader/expression_data/__init__.py
@@ -0,0 +1,13 @@
+"""Package handling upload of files."""
+from flask import Blueprint
+
+from .rqtl2 import rqtl2
+from .index import indexbp
+from .parse import parsebp
+from .samples import samples
+
+exprdatabp = Blueprint("expression-data", __name__)
+exprdatabp.register_blueprint(indexbp, url_prefix="/")
+exprdatabp.register_blueprint(rqtl2, url_prefix="/rqtl2")
+exprdatabp.register_blueprint(parsebp, url_prefix="/parse")
+exprdatabp.register_blueprint(samples, url_prefix="/sample")
diff --git a/uploader/expression_data/index.py b/uploader/expression_data/index.py
new file mode 100644
index 0000000..a334c51
--- /dev/null
+++ b/uploader/expression_data/index.py
@@ -0,0 +1,126 @@
+"""Entry-point module"""
+import os
+import mimetypes
+from typing import Tuple
+from zipfile import ZipFile, is_zipfile
+
+from werkzeug.utils import secure_filename
+from flask import (
+    flash,
+    request,
+    url_for,
+    redirect,
+    Blueprint,
+    render_template,
+    current_app as app,
+    send_from_directory)
+
+from uploader.db import species
+from uploader.authorisation import require_login
+from uploader.db_utils import with_db_connection
+
+indexbp = Blueprint("index", __name__)
+
+
+def errors(rqst) -> Tuple[str, ...]:
+    """Return a tuple of the errors found in the request `rqst`. If no error is
+    found, then an empty tuple is returned."""
+    def __filetype_error__():
+        return (
+            ("Invalid file type provided.",)
+            if rqst.form.get("filetype") not in ("average", "standard-error")
+            else tuple())
+
+    def __file_missing_error__():
+        return (
+            ("No file was uploaded.",)
+            if ("qc_text_file" not in rqst.files or
+                rqst.files["qc_text_file"].filename == "")
+            else tuple())
+
+    def __file_mimetype_error__():
+        text_file = rqst.files["qc_text_file"]
+        return (
+            (
+                ("Invalid file! Expected a tab-separated-values file, or a zip "
+                 "file of the a tab-separated-values file."),)
+            if text_file.mimetype not in (
+                    "text/plain", "text/tab-separated-values",
+                    "application/zip")
+            else tuple())
+
+    return (
+        __filetype_error__() +
+        (__file_missing_error__() or __file_mimetype_error__()))
+
+def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
+    """Check the uploaded zip file for errors."""
+    zfile_errors: Tuple[str, ...] = tuple()
+    if is_zipfile(filepath):
+        with ZipFile(filepath, "r") as zfile:
+            infolist = zfile.infolist()
+            if len(infolist) != 1:
+                zfile_errors = zfile_errors + (
+                    ("Expected exactly one (1) member file within the uploaded zip "
+                     f"file. Got {len(infolist)} member files."),)
+            if len(infolist) == 1 and infolist[0].is_dir():
+                zfile_errors = zfile_errors + (
+                    ("Expected a member text file in the uploaded zip file. Got a "
+                     "directory/folder."),)
+
+            if len(infolist) == 1 and not infolist[0].is_dir():
+                zfile.extract(infolist[0], path=upload_dir)
+                mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
+                if mime[0] != "text/tab-separated-values":
+                    zfile_errors = zfile_errors + (
+                        ("Expected the member text file in the uploaded zip file to"
+                         " be a tab-separated file."),)
+
+    return zfile_errors
+
+
+@indexbp.route("/", methods=["GET"])
+@require_login
+def index():
+    """Display the expression data index page."""
+    return render_template("expression-data/index.html")
+
+
+@indexbp.route("/upload", methods=["GET", "POST"])
+@require_login
+def upload_file():
+    """Enables uploading the files"""
+    if request.method == "GET":
+        return render_template(
+            "select_species.html", species=with_db_connection(species))
+
+    upload_dir = app.config["UPLOAD_FOLDER"]
+    request_errors = errors(request)
+    if request_errors:
+        for error in request_errors:
+            flash(error, "alert-danger error-expr-data")
+        return redirect(url_for("expression-data.index.upload_file"))
+
+    filename = secure_filename(request.files["qc_text_file"].filename)
+    if not os.path.exists(upload_dir):
+        os.mkdir(upload_dir)
+
+    filepath = os.path.join(upload_dir, filename)
+    request.files["qc_text_file"].save(os.path.join(upload_dir, filename))
+
+    zip_errors = zip_file_errors(filepath, upload_dir)
+    if zip_errors:
+        for error in zip_errors:
+            flash(error, "alert-danger error-expr-data")
+        return redirect(url_for("expression-data.index.upload_file"))
+
+    return redirect(url_for("expression-data.parse.parse",
+                            speciesid=request.form["speciesid"],
+                            filename=filename,
+                            filetype=request.form["filetype"]))
+
+@indexbp.route("/data-review", methods=["GET"])
+@require_login
+def data_review():
+    """Provide some help on data expectations to the user."""
+    return render_template("data_review.html")
diff --git a/uploader/expression_data/parse.py b/uploader/expression_data/parse.py
new file mode 100644
index 0000000..fc1c3f0
--- /dev/null
+++ b/uploader/expression_data/parse.py
@@ -0,0 +1,178 @@
+"""File parsing module"""
+import os
+
+import jsonpickle
+from redis import Redis
+from flask import flash, request, url_for, redirect, Blueprint, render_template
+from flask import current_app as app
+
+from quality_control.errors import InvalidValue, DuplicateHeading
+
+from uploader import jobs
+from uploader.dbinsert import species_by_id
+from uploader.db_utils import with_db_connection
+from uploader.authorisation import require_login
+
+parsebp = Blueprint("parse", __name__)
+
+def isinvalidvalue(item):
+    """Check whether item is of type InvalidValue"""
+    return isinstance(item, InvalidValue)
+
+def isduplicateheading(item):
+    """Check whether item is of type DuplicateHeading"""
+    return isinstance(item, DuplicateHeading)
+
+@parsebp.route("/parse", methods=["GET"])
+@require_login
+def parse():
+    """Trigger file parsing"""
+    errors = False
+    speciesid = request.args.get("speciesid")
+    filename = request.args.get("filename")
+    filetype = request.args.get("filetype")
+    if speciesid is None:
+        flash("No species selected", "alert-error error-expr-data")
+        errors = True
+    else:
+        try:
+            speciesid = int(speciesid)
+            species = with_db_connection(
+                lambda con: species_by_id(con, speciesid))
+            if not bool(species):
+                flash("No such species.", "alert-error error-expr-data")
+                errors = True
+        except ValueError:
+            flash("Invalid speciesid provided. Expected an integer.",
+                  "alert-error error-expr-data")
+            errors = True
+
+    if filename is None:
+        flash("No file provided", "alert-error error-expr-data")
+        errors = True
+
+    if filetype is None:
+        flash("No filetype provided", "alert-error error-expr-data")
+        errors = True
+
+    if filetype not in ("average", "standard-error"):
+        flash("Invalid filetype provided", "alert-error error-expr-data")
+        errors = True
+
+    if filename:
+        filepath = os.path.join(app.config["UPLOAD_FOLDER"], filename)
+        if not os.path.exists(filepath):
+            flash("Selected file does not exist (any longer)",
+                  "alert-error error-expr-data")
+            errors = True
+
+    if errors:
+        return redirect(url_for("expression-data.index.upload_file"))
+
+    redisurl = app.config["REDIS_URL"]
+    with Redis.from_url(redisurl, decode_responses=True) as rconn:
+        job = jobs.launch_job(
+            jobs.build_file_verification_job(
+                rconn, app.config["SQL_URI"], redisurl,
+                speciesid, filepath, filetype,
+                app.config["JOBS_TTL_SECONDS"]),
+            redisurl,
+            f"{app.config['UPLOAD_FOLDER']}/job_errors")
+
+    return redirect(url_for("expression-data.parse.parse_status", job_id=job["jobid"]))
+
+@parsebp.route("/status/<job_id>", methods=["GET"])
+def parse_status(job_id: str):
+    "Retrieve the status of the job"
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        try:
+            job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+        except jobs.JobNotFound as _exc:
+            return render_template("no_such_job.html", job_id=job_id), 400
+
+    error_filename = jobs.error_filename(
+        job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
+    if os.path.exists(error_filename):
+        stat = os.stat(error_filename)
+        if stat.st_size > 0:
+            return redirect(url_for("parse.fail", job_id=job_id))
+
+    job_id = job["jobid"]
+    progress = float(job["percent"])
+    status = job["status"]
+    filename = job.get("filename", "uploaded file")
+    errors = jsonpickle.decode(
+        job.get("errors", jsonpickle.encode(tuple())))
+    if status in ("success", "aborted"):
+        return redirect(url_for("expression-data.parse.results", job_id=job_id))
+
+    if status == "parse-error":
+        return redirect(url_for("parse.fail", job_id=job_id))
+
+    app.jinja_env.globals.update(
+        isinvalidvalue=isinvalidvalue,
+        isduplicateheading=isduplicateheading)
+    return render_template(
+        "job_progress.html",
+        job_id = job_id,
+        job_status = status,
+        progress = progress,
+        message = job.get("message", ""),
+        job_name = f"Parsing '{filename}'",
+        errors=errors)
+
+@parsebp.route("/results/<job_id>", methods=["GET"])
+def results(job_id: str):
+    """Show results of parsing..."""
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+    if job:
+        filename = job["filename"]
+        errors = jsonpickle.decode(job.get("errors", jsonpickle.encode(tuple())))
+        app.jinja_env.globals.update(
+            isinvalidvalue=isinvalidvalue,
+            isduplicateheading=isduplicateheading)
+        return render_template(
+            "parse_results.html",
+            errors=errors,
+            job_name = f"Parsing '{filename}'",
+            user_aborted = job.get("user_aborted"),
+            job_id=job["jobid"])
+
+    return render_template("no_such_job.html", job_id=job_id)
+
+@parsebp.route("/fail/<job_id>", methods=["GET"])
+def fail(job_id: str):
+    """Handle parsing failure"""
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+    if job:
+        error_filename = jobs.error_filename(
+            job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors")
+        if os.path.exists(error_filename):
+            stat = os.stat(error_filename)
+            if stat.st_size > 0:
+                return render_template(
+                    "worker_failure.html", job_id=job_id)
+
+        return render_template("parse_failure.html", job=job)
+
+    return render_template("no_such_job.html", job_id=job_id)
+
+@parsebp.route("/abort", methods=["POST"])
+@require_login
+def abort():
+    """Handle user request to abort file processing"""
+    job_id = request.form["job_id"]
+
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        job = jobs.job(rconn, jobs.jobsnamespace(), job_id)
+
+        if job:
+            rconn.hset(name=jobs.job_key(jobs.jobsnamespace(), job_id),
+                       key="user_aborted",
+                       value=int(True))
+
+    return redirect(url_for("expression-data.parse.parse_status", job_id=job_id))
diff --git a/uploader/expression_data/rqtl2.py b/uploader/expression_data/rqtl2.py
new file mode 100644
index 0000000..48df66c
--- /dev/null
+++ b/uploader/expression_data/rqtl2.py
@@ -0,0 +1,1176 @@
+"""Module to handle uploading of R/qtl2 bundles."""#pylint: disable=[too-many-lines]
+import sys
+import json
+import traceback
+from pathlib import Path
+from datetime import date
+from uuid import UUID, uuid4
+from functools import partial
+from zipfile import ZipFile, is_zipfile
+from typing import Union, Callable, Optional
+
+import MySQLdb as mdb
+from redis import Redis
+from MySQLdb.cursors import DictCursor
+from werkzeug.utils import secure_filename
+from flask import (
+    flash,
+    escape,
+    request,
+    jsonify,
+    url_for,
+    redirect,
+    Response,
+    Blueprint,
+    render_template,
+    current_app as app)
+
+from r_qtl import r_qtl2
+
+from uploader import jobs
+from uploader.files import save_file, fullpath
+from uploader.dbinsert import species as all_species
+from uploader.db_utils import with_db_connection, database_connection
+
+from uploader.authorisation import require_login
+from uploader.db.platforms import platform_by_id, platforms_by_species
+from uploader.db.averaging import averaging_methods, averaging_method_by_id
+from uploader.db.tissues import all_tissues, tissue_by_id, create_new_tissue
+from uploader.db import (
+    species_by_id,
+    save_population,
+    populations_by_species,
+    population_by_species_and_id,)
+from uploader.db.datasets import (
+    geno_dataset_by_id,
+    geno_datasets_by_species_and_population,
+
+    probeset_study_by_id,
+    probeset_create_study,
+    probeset_dataset_by_id,
+    probeset_create_dataset,
+    probeset_datasets_by_study,
+    probeset_studies_by_species_and_population)
+
+rqtl2 = Blueprint("rqtl2", __name__)
+
+
+@rqtl2.route("/", methods=["GET", "POST"])
+@rqtl2.route("/select-species", methods=["GET", "POST"])
+@require_login
+def select_species():
+    """Select the species."""
+    if request.method == "GET":
+        return render_template("rqtl2/index.html", species=with_db_connection(all_species))
+
+    species_id = request.form.get("species_id")
+    species = with_db_connection(
+        lambda conn: species_by_id(conn, species_id))
+    if bool(species):
+        return redirect(url_for(
+            "expression-data.rqtl2.select_population", species_id=species_id))
+    flash("Invalid species or no species selected!", "alert-error error-rqtl2")
+    return redirect(url_for("expression-data.rqtl2.select_species"))
+
+
+@rqtl2.route("/upload/species/<int:species_id>/select-population",
+             methods=["GET", "POST"])
+@require_login
+def select_population(species_id: int):
+    """Select/Create the population to organise data under."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        if not bool(species):
+            flash("Invalid species selected!", "alert-error error-rqtl2")
+            return redirect(url_for("expression-data.rqtl2.select_species"))
+
+        if request.method == "GET":
+            return render_template(
+                "rqtl2/select-population.html",
+                species=species,
+                populations=populations_by_species(conn, species_id))
+
+        population = population_by_species_and_id(
+            conn, species["SpeciesId"], request.form.get("inbredset_id"))
+        if not bool(population):
+            flash("Invalid Population!", "alert-error error-rqtl2")
+            return redirect(
+                url_for("expression-data.rqtl2.select_population", pgsrc="error"),
+                code=307)
+
+        return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle",
+                                species_id=species["SpeciesId"],
+                                population_id=population["InbredSetId"]))
+
+
+@rqtl2.route("/upload/species/<int:species_id>/create-population",
+             methods=["POST"])
+@require_login
+def create_population(species_id: int):
+    """Create a new population for the given species."""
+    population_page = redirect(url_for("expression-data.rqtl2.select_population",
+                                       species_id=species_id))
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        population_name = request.form.get("inbredset_name", "").strip()
+        population_fullname = request.form.get("inbredset_fullname", "").strip()
+        if not bool(species):
+            flash("Invalid species!", "alert-error error-rqtl2")
+            return redirect(url_for("expression-data.rqtl2.select_species"))
+        if not bool(population_name):
+            flash("Invalid Population Name!", "alert-error error-rqtl2")
+            return population_page
+        if not bool(population_fullname):
+            flash("Invalid Population Full Name!", "alert-error error-rqtl2")
+            return population_page
+        new_population = save_population(conn, {
+            "SpeciesId": species["SpeciesId"],
+            "Name": population_name,
+            "InbredSetName": population_fullname,
+            "FullName": population_fullname,
+            "Family": request.form.get("inbredset_family") or None,
+            "Description": request.form.get("description") or None
+        })
+
+    flash("Population created successfully.", "alert-success")
+    return redirect(
+        url_for("expression-data.rqtl2.upload_rqtl2_bundle",
+                species_id=species_id,
+                population_id=new_population["population_id"],
+                pgsrc="create-population"),
+        code=307)
+
+
+class __RequestError__(Exception): #pylint: disable=[invalid-name]
+    """Internal class to avoid pylint's `too-many-return-statements` error."""
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle"),
+    methods=["GET", "POST"])
+@require_login
+def upload_rqtl2_bundle(species_id: int, population_id: int):
+    """Allow upload of R/qtl2 bundle."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        population = population_by_species_and_id(
+            conn, species["SpeciesId"], population_id)
+        if not bool(species):
+            flash("Invalid species!", "alert-error error-rqtl2")
+            return redirect(url_for("expression-data.rqtl2.select_species"))
+        if not bool(population):
+            flash("Invalid Population!", "alert-error error-rqtl2")
+            return redirect(
+                url_for("expression-data.rqtl2.select_population", pgsrc="error"),
+                code=307)
+        if request.method == "GET" or (
+                request.method == "POST"
+                and bool(request.args.get("pgsrc"))):
+            return render_template("rqtl2/upload-rqtl2-bundle-step-01.html",
+                                   species=species,
+                                   population=population)
+
+        try:
+            app.logger.debug("Files in the form: %s", request.files)
+            the_file = save_file(request.files["rqtl2_bundle_file"],
+                                 Path(app.config["UPLOAD_FOLDER"]))
+        except AssertionError:
+            app.logger.debug(traceback.format_exc())
+            flash("Please provide a valid R/qtl2 zip bundle.",
+                  "alert-error error-rqtl2")
+            return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle",
+                                    species_id=species_id,
+                                    population_id=population_id))
+
+        if not is_zipfile(str(the_file)):
+            app.logger.debug("The file is not a zip file.")
+            raise __RequestError__("Invalid file! Expected a zip file.")
+
+        jobid = trigger_rqtl2_bundle_qc(
+            species_id,
+            population_id,
+            the_file,
+            request.files["rqtl2_bundle_file"].filename)#type: ignore[arg-type]
+        return redirect(url_for(
+            "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid))
+
+
+def trigger_rqtl2_bundle_qc(
+        species_id: int,
+        population_id: int,
+        rqtl2bundle: Path,
+        originalfilename: str
+) -> UUID:
+    """Trigger QC on the R/qtl2 bundle."""
+    redisuri = app.config["REDIS_URL"]
+    with Redis.from_url(redisuri, decode_responses=True) as rconn:
+        jobid = uuid4()
+        redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
+        jobs.launch_job(
+            jobs.initialise_job(
+                rconn,
+                jobs.jobsnamespace(),
+                str(jobid),
+                [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle",
+                 app.config["SQL_URI"], app.config["REDIS_URL"],
+                 jobs.jobsnamespace(), str(jobid), str(species_id),
+                 str(population_id), "--redisexpiry",
+                 str(redis_ttl_seconds)],
+                "rqtl2-bundle-qc-job",
+                redis_ttl_seconds,
+                {"job-metadata": json.dumps({
+                    "speciesid": species_id,
+                    "populationid": population_id,
+                    "rqtl2-bundle-file": str(rqtl2bundle.absolute()),
+                    "original-filename": originalfilename})}),
+            redisuri,
+            f"{app.config['UPLOAD_FOLDER']}/job_errors")
+        return jobid
+
+
+def chunk_name(uploadfilename: str, chunkno: int) -> str:
+    """Generate chunk name from original filename and chunk number"""
+    if uploadfilename == "":
+        raise ValueError("Name cannot be empty!")
+    if chunkno < 1:
+        raise ValueError("Chunk number must be greater than zero")
+    return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}"
+
+
+def chunks_directory(uniqueidentifier: str) -> Path:
+    """Compute the directory where chunks are temporarily stored."""
+    if uniqueidentifier == "":
+        raise ValueError("Unique identifier cannot be empty!")
+    return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}")
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle-chunked"),
+             methods=["GET"])
+@require_login
+def upload_rqtl2_bundle_chunked_get(# pylint: disable=["unused-argument"]
+        species_id: int,
+        population_id: int
+):
+    """
+    Extension to the `upload_rqtl2_bundle` endpoint above that provides a way
+    for testing whether all the chunks have been uploaded and to assist with
+    resuming a failed expression-data.
+    """
+    fileid = request.args.get("resumableIdentifier", type=str) or ""
+    filename = request.args.get("resumableFilename", type=str) or ""
+    chunk = request.args.get("resumableChunkNumber", type=int) or 0
+    if not(fileid or filename or chunk):
+        return jsonify({
+            "message": "At least one required query parameter is missing.",
+            "error": "BadRequest",
+            "statuscode": 400
+        }), 400
+
+    if Path(chunks_directory(fileid),
+            chunk_name(filename, chunk)).exists():
+        return "OK"
+
+    return jsonify({
+            "message": f"Chunk {chunk} was not found.",
+            "error": "NotFound",
+            "statuscode": 404
+        }), 404
+
+
+def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path:
+    """Merge the chunks into a single file."""
+    with open(targetfile, "ab") as _target:
+        for chunkfile in chunkpaths:
+            with open(chunkfile, "rb") as _chunkdata:
+                _target.write(_chunkdata.read())
+
+            chunkfile.unlink()
+    return targetfile
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle-chunked"),
+             methods=["POST"])
+@require_login
+def upload_rqtl2_bundle_chunked_post(species_id: int, population_id: int):
+    """
+    Extension to the `upload_rqtl2_bundle` endpoint above that allows large
+    files to be uploaded in chunks.
+
+    This should hopefully speed up uploads, and if done right, even enable
+    resumable uploads
+    """
+    _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0
+    _chunk = request.form.get("resumableChunkNumber", default=1, type=int)
+    _uploadfilename = request.form.get(
+        "resumableFilename", default="", type=str) or ""
+    _fileid = request.form.get(
+        "resumableIdentifier", default="", type=str) or ""
+    _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid)
+
+    if _targetfile.exists():
+        return jsonify({
+            "message": (
+                "A file with a similar unique identifier has previously been "
+                "uploaded and possibly is/has being/been processed."),
+            "error": "BadRequest",
+            "statuscode": 400
+        }), 400
+
+    try:
+        # save chunk data
+        chunks_directory(_fileid).mkdir(exist_ok=True, parents=True)
+        request.files["file"].save(Path(chunks_directory(_fileid),
+                                        chunk_name(_uploadfilename, _chunk)))
+
+        # Check whether upload is complete
+        chunkpaths = tuple(
+            Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk))
+            for _achunk in range(1, _totalchunks+1))
+        if all(_file.exists() for _file in chunkpaths):
+            # merge_files and clean up chunks
+            __merge_chunks__(_targetfile, chunkpaths)
+            chunks_directory(_fileid).rmdir()
+            jobid = trigger_rqtl2_bundle_qc(
+                species_id, population_id, _targetfile, _uploadfilename)
+            return url_for(
+                "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)
+    except Exception as exc:# pylint: disable=[broad-except]
+        msg = "Error processing uploaded file chunks."
+        app.logger.error(msg, exc_info=True, stack_info=True)
+        return jsonify({
+            "message": msg,
+            "error": type(exc).__name__,
+            "error-description": " ".join(str(arg) for arg in exc.args),
+            "error-trace": traceback.format_exception(exc)
+        }), 500
+
+    return "OK"
+
+
+@rqtl2.route("/upload/species/rqtl2-bundle/qc-status/<uuid:jobid>",
+             methods=["GET", "POST"])
+@require_login
+def rqtl2_bundle_qc_status(jobid: UUID):
+    """Check the status of the QC jobs."""
+    with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+          database_connection(app.config["SQL_URI"]) as dbconn):
+        try:
+            thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid)
+            messagelistname = thejob.get("log-messagelist")
+            logmessages = (rconn.lrange(messagelistname, 0, -1)
+                           if bool(messagelistname) else [])
+            jobstatus = thejob["status"]
+            if jobstatus == "error":
+                return render_template("rqtl2/rqtl2-qc-job-error.html",
+                                       job=thejob,
+                                       errorsgeneric=json.loads(
+                                           thejob.get("errors-generic", "[]")),
+                                       errorsgeno=json.loads(
+                                           thejob.get("errors-geno", "[]")),
+                                       errorspheno=json.loads(
+                                           thejob.get("errors-pheno", "[]")),
+                                       errorsphenose=json.loads(
+                                           thejob.get("errors-phenose", "[]")),
+                                       errorsphenocovar=json.loads(
+                                           thejob.get("errors-phenocovar", "[]")),
+                                       messages=logmessages)
+            if jobstatus == "success":
+                jobmeta = json.loads(thejob["job-metadata"])
+                species = species_by_id(dbconn, jobmeta["speciesid"])
+                return render_template(
+                    "rqtl2/rqtl2-qc-job-results.html",
+                    species=species,
+                    population=population_by_species_and_id(
+                        dbconn, species["SpeciesId"], jobmeta["populationid"]),
+                    rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]).name,
+                    rqtl2bundleorig=jobmeta["original-filename"])
+
+            def compute_percentage(thejob, filetype) -> Union[str, None]:
+                if f"{filetype}-linecount" in thejob:
+                    return "100"
+                if f"{filetype}-filesize" in thejob:
+                    percent = ((int(thejob.get(f"{filetype}-checked", 0))
+                                /
+                                int(thejob.get(f"{filetype}-filesize", 1)))
+                               * 100)
+                    return f"{percent:.2f}"
+                return None
+
+            return render_template(
+                "rqtl2/rqtl2-qc-job-status.html",
+                job=thejob,
+                geno_percent=compute_percentage(thejob, "geno"),
+                pheno_percent=compute_percentage(thejob, "pheno"),
+                phenose_percent=compute_percentage(thejob, "phenose"),
+                messages=logmessages)
+        except jobs.JobNotFound:
+            return render_template("rqtl2/no-such-job.html", jobid=jobid)
+
+
+def redirect_on_error(flaskroute, **kwargs):
+    """Utility to redirect on error"""
+    return redirect(url_for(flaskroute, **kwargs, pgsrc="error"),
+                    code=(307 if request.method == "POST" else 302))
+
+
+def check_species(conn: mdb.Connection, formargs: dict) -> Optional[
+        tuple[str, Response]]:
+    """
+    Check whether the 'species_id' value is provided, and whether a
+    corresponding species exists in the database.
+
+    Maybe give the function a better name..."""
+    speciespage = redirect_on_error("expression-data.rqtl2.select_species")
+    if "species_id" not in formargs:
+        return "You MUST provide the Species identifier.", speciespage
+
+    if not bool(species_by_id(conn, formargs["species_id"])):
+        return "No species with the provided identifier exists.", speciespage
+
+    return None
+
+
+def check_population(conn: mdb.Connection,
+                     formargs: dict,
+                     species_id) -> Optional[tuple[str, Response]]:
+    """
+    Check whether the 'population_id' value is provided, and whether a
+    corresponding population exists in the database.
+
+    Maybe give the function a better name..."""
+    poppage = redirect_on_error(
+        "expression-data.rqtl2.select_species", species_id=species_id)
+    if "population_id" not in formargs:
+        return "You MUST provide the Population identifier.", poppage
+
+    if not bool(population_by_species_and_id(
+            conn, species_id, formargs["population_id"])):
+        return "No population with the provided identifier exists.", poppage
+
+    return None
+
+
+def check_r_qtl2_bundle(formargs: dict,
+                        species_id,
+                        population_id) -> Optional[tuple[str, Response]]:
+    """Check for the existence of the R/qtl2 bundle."""
+    fileuploadpage = redirect_on_error("expression-data.rqtl2.upload_rqtl2_bundle",
+                                       species_id=species_id,
+                                       population_id=population_id)
+    if not "rqtl2_bundle_file" in formargs:
+        return (
+            "You MUST provide a R/qtl2 zip bundle for expression-data.", fileuploadpage)
+
+    if not Path(fullpath(formargs["rqtl2_bundle_file"])).exists():
+        return "No R/qtl2 bundle with the given name exists.", fileuploadpage
+
+    return None
+
+
+def check_geno_dataset(conn: mdb.Connection,
+                       formargs: dict,
+                       species_id,
+                       population_id) -> Optional[tuple[str, Response]]:
+    """Check for the Genotype dataset."""
+    genodsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
+                                   species_id=species_id,
+                                   population_id=population_id)
+    if not bool(formargs.get("geno-dataset-id")):
+        return (
+            "You MUST provide a valid Genotype dataset identifier", genodsetpg)
+
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s",
+                       (formargs["geno-dataset-id"],))
+        results = cursor.fetchall()
+        if not bool(results):
+            return ("No genotype dataset with the provided identifier exists.",
+                    genodsetpg)
+        if len(results) > 1:
+            return (
+                "Data corruption: More than one genotype dataset with the same "
+                "identifier.",
+                genodsetpg)
+
+    return None
+
+def check_tissue(
+        conn: mdb.Connection,formargs: dict) -> Optional[tuple[str, Response]]:
+    """Check for tissue/organ/biological material."""
+    selectdsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
+                                     species_id=formargs["species_id"],
+                                     population_id=formargs["population_id"])
+    if not bool(formargs.get("tissueid", "").strip()):
+        return ("No tissue/organ/biological material provided.", selectdsetpg)
+
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute("SELECT * FROM Tissue WHERE Id=%s",
+                       (formargs["tissueid"],))
+        results = cursor.fetchall()
+        if not bool(results):
+            return ("No tissue/organ with the provided identifier exists.",
+                    selectdsetpg)
+
+        if len(results) > 1:
+            return (
+                "Data corruption: More than one tissue/organ with the same "
+                "identifier.",
+                selectdsetpg)
+
+    return None
+
+
+def check_probe_study(conn: mdb.Connection,
+                      formargs: dict,
+                      species_id,
+                      population_id) -> Optional[tuple[str, Response]]:
+    """Check for the ProbeSet study."""
+    dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
+                                   species_id=species_id,
+                                   population_id=population_id)
+    if not bool(formargs.get("probe-study-id")):
+        return "No probeset study was selected!", dsetinfopg
+
+    if not bool(probeset_study_by_id(conn, formargs["probe-study-id"])):
+        return ("No probeset study with the provided identifier exists",
+                dsetinfopg)
+
+    return None
+
+
+def check_probe_dataset(conn: mdb.Connection,
+                        formargs: dict,
+                        species_id,
+                        population_id) -> Optional[tuple[str, Response]]:
+    """Check for the ProbeSet dataset."""
+    dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
+                                   species_id=species_id,
+                                   population_id=population_id)
+    if not bool(formargs.get("probe-dataset-id")):
+        return "No probeset dataset was selected!", dsetinfopg
+
+    if not bool(probeset_dataset_by_id(conn, formargs["probe-dataset-id"])):
+        return ("No probeset dataset with the provided identifier exists",
+                dsetinfopg)
+
+    return None
+
+
+def with_errors(endpointthunk: Callable, *checkfns):
+    """Run 'endpointthunk' with error checking."""
+    formargs = {**dict(request.args), **dict(request.form)}
+    errors = tuple(item for item in (_fn(formargs=formargs) for _fn in checkfns)
+                   if item is not None)
+    if len(errors) > 0:
+        flash(errors[0][0], "alert-error error-rqtl2")
+        return errors[0][1]
+
+    return endpointthunk()
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/select-geno-dataset"),
+             methods=["POST"])
+@require_login
+def select_geno_dataset(species_id: int, population_id: int):
+    """Select from existing geno datasets."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        def __thunk__():
+            geno_dset = geno_datasets_by_species_and_population(
+                conn, species_id, population_id)
+            if not bool(geno_dset):
+                flash("No genotype dataset was provided!",
+                      "alert-error error-rqtl2")
+                return redirect(url_for("expression-data.rqtl2.select_geno_dataset",
+                                        species_id=species_id,
+                                        population_id=population_id,
+                                        pgsrc="error"),
+                                code=307)
+
+            flash("Genotype accepted", "alert-success error-rqtl2")
+            return redirect(url_for("expression-data.rqtl2.select_dataset_info",
+                                    species_id=species_id,
+                                    population_id=population_id,
+                                    pgsrc="expression-data.rqtl2.select_geno_dataset"),
+                            code=307)
+
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population, conn=conn,
+                                   species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_geno_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/create-geno-dataset"),
+             methods=["POST"])
+@require_login
+def create_geno_dataset(species_id: int, population_id: int):
+    """Create a new geno dataset."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        def __thunk__():
+            sgeno_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
+                                          species_id=species_id,
+                                          population_id=population_id,
+                                          pgsrc="error"),
+                                  code=307)
+            errorclasses = "alert-error error-rqtl2 error-rqtl2-create-geno-dataset"
+            if not bool(request.form.get("dataset-name")):
+                flash("You must provide the dataset name", errorclasses)
+                return sgeno_page
+            if not bool(request.form.get("dataset-fullname")):
+                flash("You must provide the dataset full name", errorclasses)
+                return sgeno_page
+            public = 2 if request.form.get("dataset-public") == "on" else 0
+
+            with conn.cursor(cursorclass=DictCursor) as cursor:
+                datasetname = request.form["dataset-name"]
+                new_dataset = {
+                    "name": datasetname,
+                    "fname": request.form.get("dataset-fullname"),
+                    "sname": request.form.get("dataset-shortname") or datasetname,
+                    "today": date.today().isoformat(),
+                    "pub": public,
+                    "isetid": population_id
+                }
+                cursor.execute("SELECT * FROM GenoFreeze WHERE Name=%s",
+                               (datasetname,))
+                results = cursor.fetchall()
+                if bool(results):
+                    flash(
+                        f"A genotype dataset with name '{escape(datasetname)}' "
+                        "already exists.",
+                        errorclasses)
+                    return redirect(url_for("expression-data.rqtl2.select_dataset_info",
+                                          species_id=species_id,
+                                          population_id=population_id,
+                                          pgsrc="error"),
+                                    code=307)
+                cursor.execute(
+                    "INSERT INTO GenoFreeze("
+                    "Name, FullName, ShortName, CreateTime, public, InbredSetId"
+                    ") "
+                    "VALUES("
+                    "%(name)s, %(fname)s, %(sname)s, %(today)s, %(pub)s, %(isetid)s"
+                    ")",
+                    new_dataset)
+                flash("Created dataset successfully.", "alert-success")
+                return render_template(
+                    "rqtl2/create-geno-dataset-success.html",
+                    species=species_by_id(conn, species_id),
+                    population=population_by_species_and_id(
+                        conn, species_id, population_id),
+                    rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
+                    geno_dataset={**new_dataset, "id": cursor.lastrowid})
+
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population, conn=conn, species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/select-tissue"),
+             methods=["POST"])
+@require_login
+def select_tissue(species_id: int, population_id: int):
+    """Select from existing tissues."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        def __thunk__():
+            if not bool(request.form.get("tissueid", "").strip()):
+                flash("Invalid tissue selection!",
+                      "alert-error error-select-tissue error-rqtl2")
+
+            return redirect(url_for("expression-data.rqtl2.select_dataset_info",
+                                    species_id=species_id,
+                                    population_id=population_id,
+                                    pgsrc="expression-data.rqtl2.select_geno_dataset"),
+                            code=307)
+
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population,
+                                   conn=conn,
+                                   species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_geno_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id))
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/create-tissue"),
+             methods=["POST"])
+@require_login
+def create_tissue(species_id: int, population_id: int):
+    """Add new tissue, organ or biological material to the system."""
+    form = request.form
+    datasetinfopage = redirect(
+        url_for("expression-data.rqtl2.select_dataset_info",
+                species_id=species_id,
+                population_id=population_id,
+                pgsrc="expression-data.rqtl2.select_geno_dataset"),
+    code=307)
+    with database_connection(app.config["SQL_URI"]) as conn:
+        tissuename = form.get("tissuename", "").strip()
+        tissueshortname = form.get("tissueshortname", "").strip()
+        if not bool(tissuename):
+            flash("Organ/Tissue name MUST be provided.",
+                  "alert-error error-create-tissue error-rqtl2")
+            return datasetinfopage
+
+        if not bool(tissueshortname):
+            flash("Organ/Tissue short name MUST be provided.",
+                  "alert-error error-create-tissue error-rqtl2")
+            return datasetinfopage
+
+        try:
+            tissue = create_new_tissue(conn, tissuename, tissueshortname)
+            flash("Tissue created successfully!", "alert-success")
+            return render_template(
+                "rqtl2/create-tissue-success.html",
+                species=species_by_id(conn, species_id),
+                population=population_by_species_and_id(
+                    conn, species_id, population_id),
+                rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
+                geno_dataset=geno_dataset_by_id(
+                    conn,
+                    int(request.form["geno-dataset-id"])),
+                tissue=tissue)
+        except mdb.IntegrityError as _ierr:
+            flash("Tissue/Organ with that short name already exists!",
+                  "alert-error error-create-tissue error-rqtl2")
+            return datasetinfopage
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/select-probeset-study"),
+             methods=["POST"])
+@require_login
+def select_probeset_study(species_id: int, population_id: int):
+    """Select or create a probeset study."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        def __thunk__():
+            summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
+                                            species_id=species_id,
+                                            population_id=population_id),
+                                    code=307)
+            if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))):
+                flash("Invalid study selected!", "alert-error error-rqtl2")
+                return summary_page
+
+            return summary_page
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population,
+                                   conn=conn,
+                                   species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_geno_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_tissue, conn=conn),
+                           partial(check_probe_study,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/select-probeset-dataset"),
+             methods=["POST"])
+@require_login
+def select_probeset_dataset(species_id: int, population_id: int):
+    """Select or create a probeset dataset."""
+    with database_connection(app.config["SQL_URI"]) as conn:
+        def __thunk__():
+            summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
+                                            species_id=species_id,
+                                            population_id=population_id),
+                                    code=307)
+            if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))):
+                flash("Invalid study selected!", "alert-error error-rqtl2")
+                return summary_page
+
+            return summary_page
+
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population,
+                                   conn=conn,
+                                   species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_geno_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_tissue, conn=conn),
+                           partial(check_probe_study,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_probe_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/create-probeset-study"),
+             methods=["POST"])
+@require_login
+def create_probeset_study(species_id: int, population_id: int):
+    """Create a new probeset study."""
+    errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-study"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        def __thunk__():
+            form = request.form
+            dataset_info_page = redirect(
+                url_for("expression-data.rqtl2.select_dataset_info",
+                        species_id=species_id,
+                        population_id=population_id),
+                code=307)
+
+            if not (bool(form.get("platformid")) and
+                    bool(platform_by_id(conn, int(form["platformid"])))):
+                flash("Invalid platform selected.", errorclasses)
+                return dataset_info_page
+
+            if not (bool(form.get("tissueid")) and
+                    bool(tissue_by_id(conn, int(form["tissueid"])))):
+                flash("Invalid tissue selected.", errorclasses)
+                return dataset_info_page
+
+            studyname = form["studyname"]
+            try:
+                study = probeset_create_study(
+                    conn, population_id, int(form["platformid"]), int(form["tissueid"]),
+                    studyname, form.get("studyfullname") or "",
+                    form.get("studyshortname") or "")
+            except mdb.IntegrityError as _ierr:
+                flash(f"ProbeSet study with name '{escape(studyname)}' already "
+                      "exists.",
+                      errorclasses)
+                return dataset_info_page
+            return render_template(
+                "rqtl2/create-probe-study-success.html",
+                species=species_by_id(conn, species_id),
+                population=population_by_species_and_id(
+                    conn, species_id, population_id),
+                rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
+                geno_dataset=geno_dataset_by_id(
+                    conn,
+                    int(request.form["geno-dataset-id"])),
+                study=study)
+
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population,
+                                   conn=conn,
+                                   species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_geno_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_tissue, conn=conn))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/create-probeset-dataset"),
+             methods=["POST"])
+@require_login
+def create_probeset_dataset(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements]
+    """Create a new probeset dataset."""
+    errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-dataset"
+    with database_connection(app.config["SQL_URI"]) as conn:
+        def __thunk__():#pylint: disable=[too-many-return-statements]
+            form = request.form
+            summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
+                                            species_id=species_id,
+                                            population_id=population_id),
+                                    code=307)
+            if not bool(form.get("averageid")):
+                flash("Averaging method not selected!", errorclasses)
+                return summary_page
+            if not bool(form.get("datasetname")):
+                flash("Dataset name not provided!", errorclasses)
+                return summary_page
+            if not bool(form.get("datasetfullname")):
+                flash("Dataset full name not provided!", errorclasses)
+                return summary_page
+
+            tissue = tissue_by_id(conn, form.get("tissueid", "").strip())
+
+            study = probeset_study_by_id(conn, int(form["probe-study-id"]))
+            if not bool(study):
+                flash("Invalid ProbeSet study provided!", errorclasses)
+                return summary_page
+
+            avgmethod = averaging_method_by_id(conn, int(form["averageid"]))
+            if not bool(avgmethod):
+                flash("Invalid averaging method provided!", errorclasses)
+                return summary_page
+
+            try:
+                dset = probeset_create_dataset(conn,
+                                               int(form["probe-study-id"]),
+                                               int(form["averageid"]),
+                                               form["datasetname"],
+                                               form["datasetfullname"],
+                                               form["datasetshortname"],
+                                               form["datasetpublic"] == "on",
+                                               form.get(
+                                                   "datasetdatascale", "log2"))
+            except mdb.IntegrityError as _ierr:
+                app.logger.debug("Possible integrity error: %s", traceback.format_exc())
+                flash(("IntegrityError: The data you provided has some errors: "
+                       f"{_ierr.args}"),
+                      errorclasses)
+                return summary_page
+            except Exception as _exc:# pylint: disable=[broad-except]
+                app.logger.debug("Error creating ProbeSet dataset: %s",
+                                 traceback.format_exc())
+                flash(("There was a problem creating your dataset. Please try "
+                       "again."),
+                      errorclasses)
+                return summary_page
+            return render_template(
+                "rqtl2/create-probe-dataset-success.html",
+                species=species_by_id(conn, species_id),
+                population=population_by_species_and_id(
+                    conn, species_id, population_id),
+                rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
+                geno_dataset=geno_dataset_by_id(
+                    conn,
+                    int(request.form["geno-dataset-id"])),
+                tissue=tissue,
+                study=study,
+                avgmethod=avgmethod,
+                dataset=dset)
+
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population,
+                                   conn=conn,
+                                   species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_geno_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_tissue, conn=conn),
+                           partial(check_probe_study,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/dataset-info"),
+             methods=["POST"])
+@require_login
+def select_dataset_info(species_id: int, population_id: int):
+    """
+    If `geno` files exist in the R/qtl2 bundle, prompt user to provide the
+    dataset the genotypes belong to.
+    """
+    form = request.form
+    with database_connection(app.config["SQL_URI"]) as conn:
+        def __thunk__():
+            species = species_by_id(conn, species_id)
+            population = population_by_species_and_id(
+                conn, species_id, population_id)
+            thefile = fullpath(form["rqtl2_bundle_file"])
+            with ZipFile(str(thefile), "r") as zfile:
+                cdata = r_qtl2.control_data(zfile)
+
+                geno_dataset = geno_dataset_by_id(
+                    conn,form.get("geno-dataset-id", "").strip())
+                if "geno" in cdata and not bool(form.get("geno-dataset-id")):
+                    return render_template(
+                        "rqtl2/select-geno-dataset.html",
+                        species=species,
+                        population=population,
+                        rqtl2_bundle_file=thefile.name,
+                        datasets=geno_datasets_by_species_and_population(
+                            conn, species_id, population_id))
+
+                tissue = tissue_by_id(conn, form.get("tissueid", "").strip())
+                if "pheno" in cdata and not bool(tissue):
+                    return render_template(
+                        "rqtl2/select-tissue.html",
+                        species=species,
+                        population=population,
+                        rqtl2_bundle_file=thefile.name,
+                        geno_dataset=geno_dataset,
+                        studies=probeset_studies_by_species_and_population(
+                            conn, species_id, population_id),
+                        platforms=platforms_by_species(conn, species_id),
+                        tissues=all_tissues(conn))
+
+                probeset_study = probeset_study_by_id(
+                    conn, form.get("probe-study-id", "").strip())
+                if "pheno" in cdata and not bool(probeset_study):
+                    return render_template(
+                        "rqtl2/select-probeset-study-id.html",
+                        species=species,
+                        population=population,
+                        rqtl2_bundle_file=thefile.name,
+                        geno_dataset=geno_dataset,
+                        studies=probeset_studies_by_species_and_population(
+                                conn, species_id, population_id),
+                        platforms=platforms_by_species(conn, species_id),
+                        tissue=tissue)
+                probeset_study = probeset_study_by_id(
+                    conn, int(form["probe-study-id"]))
+
+                probeset_dataset = probeset_dataset_by_id(
+                    conn, form.get("probe-dataset-id", "").strip())
+                if "pheno" in cdata and not bool(probeset_dataset):
+                    return render_template(
+                        "rqtl2/select-probeset-dataset.html",
+                        species=species,
+                        population=population,
+                        rqtl2_bundle_file=thefile.name,
+                        geno_dataset=geno_dataset,
+                        probe_study=probeset_study,
+                        tissue=tissue,
+                        datasets=probeset_datasets_by_study(
+                            conn, int(form["probe-study-id"])),
+                        avgmethods=averaging_methods(conn))
+
+            return render_template("rqtl2/summary-info.html",
+                                   species=species,
+                                   population=population,
+                                   rqtl2_bundle_file=thefile.name,
+                                   geno_dataset=geno_dataset,
+                                   tissue=tissue,
+                                   probe_study=probeset_study,
+                                   probe_dataset=probeset_dataset)
+
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population,
+                                   conn=conn,
+                                   species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+              "/rqtl2-bundle/confirm-bundle-details"),
+             methods=["POST"])
+@require_login
+def confirm_bundle_details(species_id: int, population_id: int):
+    """Confirm the details and trigger R/qtl2 bundle processing..."""
+    redisuri = app.config["REDIS_URL"]
+    with (database_connection(app.config["SQL_URI"]) as conn,
+          Redis.from_url(redisuri, decode_responses=True) as rconn):
+        def __thunk__():
+            redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
+            jobid = str(uuid4())
+            _job = jobs.launch_job(
+                jobs.initialise_job(
+                    rconn,
+                    jobs.jobsnamespace(),
+                    jobid,
+                    [
+                        sys.executable, "-m", "scripts.process_rqtl2_bundle",
+                        app.config["SQL_URI"], app.config["REDIS_URL"],
+                        jobs.jobsnamespace(), jobid, "--redisexpiry",
+                        str(redis_ttl_seconds)],
+                    "R/qtl2 Bundle Upload",
+                    redis_ttl_seconds,
+                    {
+                        "bundle-metadata": json.dumps({
+                            "speciesid": species_id,
+                            "populationid": population_id,
+                            "rqtl2-bundle-file": str(fullpath(
+                                request.form["rqtl2_bundle_file"])),
+                            "geno-dataset-id": request.form.get(
+                                "geno-dataset-id", ""),
+                            "probe-study-id": request.form.get(
+                                "probe-study-id", ""),
+                            "probe-dataset-id": request.form.get(
+                                "probe-dataset-id", ""),
+                            **({
+                                "platformid": probeset_study_by_id(
+                                    conn,
+                                    int(request.form["probe-study-id"]))["ChipId"]
+                            } if bool(request.form.get("probe-study-id")) else {})
+                        })
+                    }),
+                redisuri,
+                f"{app.config['UPLOAD_FOLDER']}/job_errors")
+
+            return redirect(url_for("expression-data.rqtl2.rqtl2_processing_status",
+                                    jobid=jobid))
+
+        return with_errors(__thunk__,
+                           partial(check_species, conn=conn),
+                           partial(check_population,
+                                   conn=conn,
+                                   species_id=species_id),
+                           partial(check_r_qtl2_bundle,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_geno_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_probe_study,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id),
+                           partial(check_probe_dataset,
+                                   conn=conn,
+                                   species_id=species_id,
+                                   population_id=population_id))
+
+
+@rqtl2.route("/status/<uuid:jobid>")
+def rqtl2_processing_status(jobid: UUID):
+    """Retrieve the status of the job processing the uploaded R/qtl2 bundle."""
+    with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+        try:
+            thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid)
+
+            messagelistname = thejob.get("log-messagelist")
+            logmessages = (rconn.lrange(messagelistname, 0, -1)
+                           if bool(messagelistname) else [])
+
+            if thejob["status"] == "error":
+                return render_template(
+                    "rqtl2/rqtl2-job-error.html", job=thejob, messages=logmessages)
+            if thejob["status"] == "success":
+                return render_template("rqtl2/rqtl2-job-results.html",
+                                       job=thejob,
+                                       messages=logmessages)
+
+            return render_template(
+                "rqtl2/rqtl2-job-status.html", job=thejob, messages=logmessages)
+        except jobs.JobNotFound as _exc:
+            return render_template("rqtl2/no-such-job.html", jobid=jobid)
diff --git a/uploader/expression_data/samples.py b/uploader/expression_data/samples.py
new file mode 100644
index 0000000..95b9b73
--- /dev/null
+++ b/uploader/expression_data/samples.py
@@ -0,0 +1,359 @@
+"""Code regarding samples"""
+import os
+import sys
+import csv
+import uuid
+from pathlib import Path
+from typing import Iterator
+
+import MySQLdb as mdb
+from redis import Redis
+from MySQLdb.cursors import DictCursor
+from flask import (
+    flash,
+    request,
+    url_for,
+    redirect,
+    Blueprint,
+    render_template,
+    current_app as app)
+
+from functional_tools import take
+
+from uploader import jobs
+from uploader.files import save_file
+from uploader.authorisation import require_login
+from uploader.input_validation import is_integer_input
+from uploader.db_utils import (
+    with_db_connection,
+    database_connection,
+    with_redis_connection)
+from uploader.db import (
+    species_by_id,
+    save_population,
+    population_by_id,
+    populations_by_species,
+    species as fetch_species)
+
+samples = Blueprint("samples", __name__)
+
+@samples.route("/upload/species", methods=["GET", "POST"])
+@require_login
+def select_species():
+    """Select the species."""
+    if request.method == "GET":
+        return render_template("samples/select-species.html",
+                               species=with_db_connection(fetch_species))
+
+    index_page = redirect(url_for("expression-data.index.upload_file"))
+    species_id = request.form.get("species_id")
+    if bool(species_id):
+        species_id = int(species_id)
+        species = with_db_connection(
+            lambda conn: species_by_id(conn, species_id))
+        if bool(species):
+            return redirect(url_for(
+                "samples.select_population", species_id=species_id))
+        flash("Invalid species selected!", "alert-error")
+    flash("You need to select a species", "alert-error")
+    return index_page
+
+@samples.route("/upload/species/<int:species_id>/create-population",
+               methods=["POST"])
+@require_login
+def create_population(species_id: int):
+    """Create new grouping/population."""
+    if not is_integer_input(species_id):
+        flash("You did not provide a valid species. Please select one to "
+              "continue.",
+              "alert-danger")
+        return redirect(url_for("expression-data.samples.select_species"))
+    species = with_db_connection(lambda conn: species_by_id(conn, species_id))
+    if not bool(species):
+        flash("Species with given ID was not found.", "alert-danger")
+        return redirect(url_for("expression-data.samples.select_species"))
+
+    species_page = redirect(url_for("expression-data.samples.select_species"), code=307)
+    with database_connection(app.config["SQL_URI"]) as conn:
+        species = species_by_id(conn, species_id)
+        pop_name = request.form.get("inbredset_name", "").strip()
+        pop_fullname = request.form.get("inbredset_fullname", "").strip()
+
+        if not bool(species):
+            flash("Invalid species!", "alert-error error-create-population")
+            return species_page
+        if (not bool(pop_name)) or (not bool(pop_fullname)):
+            flash("You *MUST* provide a grouping/population name",
+                  "alert-error error-create-population")
+            return species_page
+
+        pop = save_population(conn, {
+            "SpeciesId": species["SpeciesId"],
+            "Name": pop_name,
+            "InbredSetName": pop_fullname,
+            "FullName": pop_fullname,
+            "Family": request.form.get("inbredset_family") or None,
+            "Description": request.form.get("description") or None
+        })
+
+        flash("Grouping/Population created successfully.", "alert-success")
+        return redirect(url_for("samples.upload_samples",
+                                species_id=species_id,
+                                population_id=pop["population_id"]))
+
+@samples.route("/upload/species/<int:species_id>/population",
+               methods=["GET", "POST"])
+@require_login
+def select_population(species_id: int):
+    """Select from existing groupings/populations."""
+    if not is_integer_input(species_id):
+        flash("You did not provide a valid species. Please select one to "
+              "continue.",
+              "alert-danger")
+        return redirect(url_for("expression-data.samples.select_species"))
+    species = with_db_connection(lambda conn: species_by_id(conn, species_id))
+    if not bool(species):
+        flash("Species with given ID was not found.", "alert-danger")
+        return redirect(url_for("expression-data.samples.select_species"))
+
+    if request.method == "GET":
+        return render_template(
+            "samples/select-population.html",
+            species=species,
+            populations=with_db_connection(
+                lambda conn: populations_by_species(conn, species_id)))
+
+    population_page = redirect(url_for(
+        "samples.select_population", species_id=species_id), code=307)
+    _population_id = request.form.get("inbredset_id")
+    if not is_integer_input(_population_id):
+        flash("You did not provide a valid population. Please select one to "
+              "continue.",
+              "alert-danger")
+        return population_page
+    population = with_db_connection(
+        lambda conn: population_by_id(conn, _population_id))
+    if not bool(population):
+        flash("Invalid grouping/population!",
+              "alert-error error-select-population")
+        return population_page
+
+    return redirect(url_for("samples.upload_samples",
+                            species_id=species_id,
+                            population_id=_population_id),
+                    code=307)
+
+def read_samples_file(filepath, separator: str, firstlineheading: bool, **kwargs) -> Iterator[dict]:
+    """Read the samples file."""
+    with open(filepath, "r", encoding="utf-8") as inputfile:
+        reader = csv.DictReader(
+            inputfile,
+            fieldnames=(
+                None if firstlineheading
+                else ("Name", "Name2", "Symbol", "Alias")),
+            delimiter=separator,
+            quotechar=kwargs.get("quotechar", '"'))
+        for row in reader:
+            yield row
+
+def save_samples_data(conn: mdb.Connection,
+                      speciesid: int,
+                      file_data: Iterator[dict]):
+    """Save the samples to DB."""
+    data = ({**row, "SpeciesId": speciesid} for row in file_data)
+    total = 0
+    with conn.cursor() as cursor:
+        while True:
+            batch = take(data, 5000)
+            if len(batch) == 0:
+                break
+            cursor.executemany(
+                "INSERT INTO Strain(Name, Name2, SpeciesId, Symbol, Alias) "
+                "VALUES("
+                "    %(Name)s, %(Name2)s, %(SpeciesId)s, %(Symbol)s, %(Alias)s"
+                ") ON DUPLICATE KEY UPDATE Name=Name",
+                batch)
+            total += len(batch)
+            print(f"\tSaved {total} samples total so far.")
+
+def cross_reference_samples(conn: mdb.Connection,
+                            species_id: int,
+                            population_id: int,
+                            strain_names: Iterator[str]):
+    """Link samples to their population."""
+    with conn.cursor(cursorclass=DictCursor) as cursor:
+        cursor.execute(
+            "SELECT MAX(OrderId) AS loid FROM StrainXRef WHERE InbredSetId=%s",
+            (population_id,))
+        last_order_id = (cursor.fetchone()["loid"] or 10)
+        total = 0
+        while True:
+            batch = take(strain_names, 5000)
+            if len(batch) == 0:
+                break
+            params_str = ", ".join(["%s"] * len(batch))
+            ## This query is slow -- investigate.
+            cursor.execute(
+                "SELECT s.Id FROM Strain AS s LEFT JOIN StrainXRef AS sx "
+                "ON s.Id = sx.StrainId WHERE s.SpeciesId=%s AND s.Name IN "
+                f"({params_str}) AND sx.StrainId IS NULL",
+                (species_id,) + tuple(batch))
+            strain_ids = (sid["Id"] for sid in cursor.fetchall())
+            params = tuple({
+                "pop_id": population_id,
+                "strain_id": strain_id,
+                "order_id": last_order_id + (order_id * 10),
+                "mapping": "N",
+                "pedigree": None
+            } for order_id, strain_id in enumerate(strain_ids, start=1))
+            cursor.executemany(
+                "INSERT INTO StrainXRef( "
+                "  InbredSetId, StrainId, OrderId, Used_for_mapping, PedigreeStatus"
+                ")"
+                "VALUES ("
+                "  %(pop_id)s, %(strain_id)s, %(order_id)s, %(mapping)s, "
+                "  %(pedigree)s"
+                ")",
+                params)
+            last_order_id += (len(params) * 10)
+            total += len(batch)
+            print(f"\t{total} total samples cross-referenced to the population "
+                  "so far.")
+
+def build_sample_upload_job(# pylint: disable=[too-many-arguments]
+        speciesid: int,
+        populationid: int,
+        samplesfile: Path,
+        separator: str,
+        firstlineheading: bool,
+        quotechar: str):
+    """Define the async command to run the actual samples data upload."""
+    return [
+        sys.executable, "-m", "scripts.insert_samples", app.config["SQL_URI"],
+        str(speciesid), str(populationid), str(samplesfile.absolute()),
+        separator, f"--redisuri={app.config['REDIS_URL']}",
+        f"--quotechar={quotechar}"
+    ] + (["--firstlineheading"] if firstlineheading else [])
+
+@samples.route("/upload/species/<int:species_id>/populations/<int:population_id>/samples",
+               methods=["GET", "POST"])
+@require_login
+def upload_samples(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements]
+    """Upload the samples."""
+    samples_uploads_page = redirect(url_for("samples.upload_samples",
+                                            species_id=species_id,
+                                            population_id=population_id))
+    if not is_integer_input(species_id):
+        flash("You did not provide a valid species. Please select one to "
+              "continue.",
+              "alert-danger")
+        return redirect(url_for("expression-data.samples.select_species"))
+    species = with_db_connection(lambda conn: species_by_id(conn, species_id))
+    if not bool(species):
+        flash("Species with given ID was not found.", "alert-danger")
+        return redirect(url_for("expression-data.samples.select_species"))
+
+    if not is_integer_input(population_id):
+        flash("You did not provide a valid population. Please select one "
+              "to continue.",
+              "alert-danger")
+        return redirect(url_for("samples.select_population",
+                                species_id=species_id),
+                        code=307)
+    population = with_db_connection(
+        lambda conn: population_by_id(conn, int(population_id)))
+    if not bool(population):
+        flash("Invalid grouping/population!", "alert-error")
+        return redirect(url_for("samples.select_population",
+                                species_id=species_id),
+                        code=307)
+
+    if request.method == "GET" or request.files.get("samples_file") is None:
+        return render_template("samples/upload-samples.html",
+                               species=species,
+                               population=population)
+
+    try:
+        samples_file = save_file(request.files["samples_file"],
+                                 Path(app.config["UPLOAD_FOLDER"]))
+    except AssertionError:
+        flash("You need to provide a file with the samples data.",
+              "alert-error")
+        return samples_uploads_page
+
+    firstlineheading = (request.form.get("first_line_heading") == "on")
+
+    separator = request.form.get("separator", ",")
+    if separator == "other":
+        separator = request.form.get("other_separator", ",")
+    if not bool(separator):
+        flash("You need to provide a separator character.", "alert-error")
+        return samples_uploads_page
+
+    quotechar = (request.form.get("field_delimiter", '"') or '"')
+
+    redisuri = app.config["REDIS_URL"]
+    with Redis.from_url(redisuri, decode_responses=True) as rconn:
+        the_job = jobs.launch_job(
+            jobs.initialise_job(
+                rconn,
+                jobs.jobsnamespace(),
+                str(uuid.uuid4()),
+                build_sample_upload_job(
+                    species["SpeciesId"],
+                    population["InbredSetId"],
+                    samples_file,
+                    separator,
+                    firstlineheading,
+                    quotechar),
+                "samples_upload",
+                app.config["JOBS_TTL_SECONDS"],
+                {"job_name": f"Samples Upload: {samples_file.name}"}),
+            redisuri,
+            f"{app.config['UPLOAD_FOLDER']}/job_errors")
+        return redirect(url_for(
+            "samples.upload_status", job_id=the_job["jobid"]))
+
+@samples.route("/upload/status/<uuid:job_id>", methods=["GET"])
+def upload_status(job_id: uuid.UUID):
+    """Check on the status of a samples upload job."""
+    job = with_redis_connection(lambda rconn: jobs.job(
+        rconn, jobs.jobsnamespace(), job_id))
+    if job:
+        status = job["status"]
+        if status == "success":
+            return render_template("samples/upload-success.html", job=job)
+
+        if status == "error":
+            return redirect(url_for("samples.upload_failure", job_id=job_id))
+
+        error_filename = Path(jobs.error_filename(
+            job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors"))
+        if error_filename.exists():
+            stat = os.stat(error_filename)
+            if stat.st_size > 0:
+                return redirect(url_for(
+                    "samples.upload_failure", job_id=job_id))
+
+        return render_template(
+            "samples/upload-progress.html",
+            job=job) # maybe also handle this?
+
+    return render_template("no_such_job.html", job_id=job_id), 400
+
+@samples.route("/upload/failure/<uuid:job_id>", methods=["GET"])
+def upload_failure(job_id: uuid.UUID):
+    """Display the errors of the samples upload failure."""
+    job = with_redis_connection(lambda rconn: jobs.job(
+        rconn, jobs.jobsnamespace(), job_id))
+    if not bool(job):
+        return render_template("no_such_job.html", job_id=job_id), 400
+
+    error_filename = Path(jobs.error_filename(
+        job_id, f"{app.config['UPLOAD_FOLDER']}/job_errors"))
+    if error_filename.exists():
+        stat = os.stat(error_filename)
+        if stat.st_size > 0:
+            return render_template("worker_failure.html", job_id=job_id)
+
+    return render_template("samples/upload-failure.html", job=job)