aboutsummaryrefslogtreecommitdiff
path: root/uploader/population/rqtl2.py
diff options
context:
space:
mode:
Diffstat (limited to 'uploader/population/rqtl2.py')
-rw-r--r--uploader/population/rqtl2.py1075
1 files changed, 1075 insertions, 0 deletions
diff --git a/uploader/population/rqtl2.py b/uploader/population/rqtl2.py
new file mode 100644
index 0000000..9968bd6
--- /dev/null
+++ b/uploader/population/rqtl2.py
@@ -0,0 +1,1075 @@
+"""Module to handle uploading of R/qtl2 bundles."""#pylint: disable=[too-many-lines]
+import sys
+import json
+import traceback
+from pathlib import Path
+from uuid import UUID, uuid4
+from functools import partial
+from zipfile import ZipFile, is_zipfile
+from typing import Union, Callable, Optional
+
+import MySQLdb as mdb
+from redis import Redis
+from MySQLdb.cursors import DictCursor
+from werkzeug.utils import secure_filename
+from flask import (
+ flash,
+ escape,
+ request,
+ jsonify,
+ url_for,
+ redirect,
+ Response,
+ Blueprint,
+ render_template,
+ current_app as app)
+
+from r_qtl import r_qtl2
+
+from uploader import jobs
+from uploader.files import save_file, fullpath
+from uploader.species.models import all_species
+from uploader.db_utils import with_db_connection, database_connection
+
+from uploader.authorisation import require_login
+from uploader.platforms.models import platform_by_id, platforms_by_species
+from uploader.db.averaging import averaging_methods, averaging_method_by_id
+from uploader.db.tissues import all_tissues, tissue_by_id, create_new_tissue
+from uploader.population.models import (populations_by_species,
+ population_by_species_and_id)
+from uploader.species.models import species_by_id
+from uploader.db.datasets import (
+ geno_dataset_by_id,
+ geno_datasets_by_species_and_population,
+
+ probeset_study_by_id,
+ probeset_create_study,
+ probeset_dataset_by_id,
+ probeset_create_dataset,
+ probeset_datasets_by_study,
+ probeset_studies_by_species_and_population)
+
+rqtl2 = Blueprint("rqtl2", __name__)
+
+
+@rqtl2.route("/", methods=["GET", "POST"])
+@rqtl2.route("/select-species", methods=["GET", "POST"])
+@require_login
+def select_species():
+ """Select the species."""
+ if request.method == "GET":
+ return render_template("expression-data/rqtl2/index.html",
+ species=with_db_connection(all_species))
+
+ species_id = request.form.get("species_id")
+ species = with_db_connection(
+ lambda conn: species_by_id(conn, species_id))
+ if bool(species):
+ return redirect(url_for(
+ "species.populations.expression-data.rqtl2.select_population",
+ species_id=species_id))
+ flash("Invalid species or no species selected!", "alert-error error-rqtl2")
+ return redirect(url_for("expression-data.rqtl2.select_species"))
+
+
+@rqtl2.route("<int:species_id>/expression-data/rqtl2/select-population",
+ methods=["GET", "POST"])
+@require_login
+def select_population(species_id: int):
+ """Select/Create the population to organise data under."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ species = species_by_id(conn, species_id)
+ if not bool(species):
+ flash("Invalid species selected!", "alert-error error-rqtl2")
+ return redirect(url_for("expression-data.rqtl2.select_species"))
+
+ if request.method == "GET":
+ return render_template(
+ "expression-data/rqtl2/select-population.html",
+ species=species,
+ populations=populations_by_species(conn, species_id))
+
+ population = population_by_species_and_id(
+ conn, species["SpeciesId"], request.form.get("inbredset_id"))
+ if not bool(population):
+ flash("Invalid Population!", "alert-error error-rqtl2")
+ return redirect(
+ url_for("expression-data.rqtl2.select_population", pgsrc="error"),
+ code=307)
+
+ return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle",
+ species_id=species["SpeciesId"],
+ population_id=population["InbredSetId"]))
+
+
+class __RequestError__(Exception): #pylint: disable=[invalid-name]
+ """Internal class to avoid pylint's `too-many-return-statements` error."""
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle"),
+ methods=["GET", "POST"])
+@require_login
+def upload_rqtl2_bundle(species_id: int, population_id: int):
+ """Allow upload of R/qtl2 bundle."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ species = species_by_id(conn, species_id)
+ population = population_by_species_and_id(
+ conn, species["SpeciesId"], population_id)
+ if not bool(species):
+ flash("Invalid species!", "alert-error error-rqtl2")
+ return redirect(url_for("expression-data.rqtl2.select_species"))
+ if not bool(population):
+ flash("Invalid Population!", "alert-error error-rqtl2")
+ return redirect(
+ url_for("expression-data.rqtl2.select_population", pgsrc="error"),
+ code=307)
+ if request.method == "GET" or (
+ request.method == "POST"
+ and bool(request.args.get("pgsrc"))):
+ return render_template(
+ "expression-data/rqtl2/upload-rqtl2-bundle-step-01.html",
+ species=species,
+ population=population)
+
+ try:
+ app.logger.debug("Files in the form: %s", request.files)
+ the_file = save_file(request.files["rqtl2_bundle_file"],
+ Path(app.config["UPLOAD_FOLDER"]))
+ except AssertionError:
+ app.logger.debug(traceback.format_exc())
+ flash("Please provide a valid R/qtl2 zip bundle.",
+ "alert-error error-rqtl2")
+ return redirect(url_for("expression-data.rqtl2.upload_rqtl2_bundle",
+ species_id=species_id,
+ population_id=population_id))
+
+ if not is_zipfile(str(the_file)):
+ app.logger.debug("The file is not a zip file.")
+ raise __RequestError__("Invalid file! Expected a zip file.")
+
+ jobid = trigger_rqtl2_bundle_qc(
+ species_id,
+ population_id,
+ the_file,
+ request.files["rqtl2_bundle_file"].filename)#type: ignore[arg-type]
+ return redirect(url_for(
+ "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid))
+
+
+def trigger_rqtl2_bundle_qc(
+ species_id: int,
+ population_id: int,
+ rqtl2bundle: Path,
+ originalfilename: str
+) -> UUID:
+ """Trigger QC on the R/qtl2 bundle."""
+ redisuri = app.config["REDIS_URL"]
+ with Redis.from_url(redisuri, decode_responses=True) as rconn:
+ jobid = uuid4()
+ redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
+ jobs.launch_job(
+ jobs.initialise_job(
+ rconn,
+ jobs.jobsnamespace(),
+ str(jobid),
+ [sys.executable, "-m", "scripts.qc_on_rqtl2_bundle",
+ app.config["SQL_URI"], app.config["REDIS_URL"],
+ jobs.jobsnamespace(), str(jobid), str(species_id),
+ str(population_id), "--redisexpiry",
+ str(redis_ttl_seconds)],
+ "rqtl2-bundle-qc-job",
+ redis_ttl_seconds,
+ {"job-metadata": json.dumps({
+ "speciesid": species_id,
+ "populationid": population_id,
+ "rqtl2-bundle-file": str(rqtl2bundle.absolute()),
+ "original-filename": originalfilename})}),
+ redisuri,
+ f"{app.config['UPLOAD_FOLDER']}/job_errors")
+ return jobid
+
+
+def chunk_name(uploadfilename: str, chunkno: int) -> str:
+ """Generate chunk name from original filename and chunk number"""
+ if uploadfilename == "":
+ raise ValueError("Name cannot be empty!")
+ if chunkno < 1:
+ raise ValueError("Chunk number must be greater than zero")
+ return f"{secure_filename(uploadfilename)}_part_{chunkno:05d}"
+
+
+def chunks_directory(uniqueidentifier: str) -> Path:
+ """Compute the directory where chunks are temporarily stored."""
+ if uniqueidentifier == "":
+ raise ValueError("Unique identifier cannot be empty!")
+ return Path(app.config["UPLOAD_FOLDER"], f"tempdir_{uniqueidentifier}")
+
+
+@rqtl2.route(("<int:species_id>/populations/<int:population_id>/rqtl2/"
+ "/rqtl2-bundle-chunked"),
+ methods=["GET"])
+@require_login
+def upload_rqtl2_bundle_chunked_get(# pylint: disable=["unused-argument"]
+ species_id: int,
+ population_id: int
+):
+ """
+ Extension to the `upload_rqtl2_bundle` endpoint above that provides a way
+ for testing whether all the chunks have been uploaded and to assist with
+ resuming a failed expression-data.
+ """
+ fileid = request.args.get("resumableIdentifier", type=str) or ""
+ filename = request.args.get("resumableFilename", type=str) or ""
+ chunk = request.args.get("resumableChunkNumber", type=int) or 0
+ if not(fileid or filename or chunk):
+ return jsonify({
+ "message": "At least one required query parameter is missing.",
+ "error": "BadRequest",
+ "statuscode": 400
+ }), 400
+
+ if Path(chunks_directory(fileid),
+ chunk_name(filename, chunk)).exists():
+ return "OK"
+
+ return jsonify({
+ "message": f"Chunk {chunk} was not found.",
+ "error": "NotFound",
+ "statuscode": 404
+ }), 404
+
+
+def __merge_chunks__(targetfile: Path, chunkpaths: tuple[Path, ...]) -> Path:
+ """Merge the chunks into a single file."""
+ with open(targetfile, "ab") as _target:
+ for chunkfile in chunkpaths:
+ with open(chunkfile, "rb") as _chunkdata:
+ _target.write(_chunkdata.read())
+
+ chunkfile.unlink()
+ return targetfile
+
+
+@rqtl2.route(("<int:species_id>/population/<int:population_id>/rqtl2/upload/"
+ "/rqtl2-bundle-chunked"),
+ methods=["POST"])
+@require_login
+def upload_rqtl2_bundle_chunked_post(species_id: int, population_id: int):
+ """
+ Extension to the `upload_rqtl2_bundle` endpoint above that allows large
+ files to be uploaded in chunks.
+
+ This should hopefully speed up uploads, and if done right, even enable
+ resumable uploads
+ """
+ _totalchunks = request.form.get("resumableTotalChunks", type=int) or 0
+ _chunk = request.form.get("resumableChunkNumber", default=1, type=int)
+ _uploadfilename = request.form.get(
+ "resumableFilename", default="", type=str) or ""
+ _fileid = request.form.get(
+ "resumableIdentifier", default="", type=str) or ""
+ _targetfile = Path(app.config["UPLOAD_FOLDER"], _fileid)
+
+ if _targetfile.exists():
+ return jsonify({
+ "message": (
+ "A file with a similar unique identifier has previously been "
+ "uploaded and possibly is/has being/been processed."),
+ "error": "BadRequest",
+ "statuscode": 400
+ }), 400
+
+ try:
+ # save chunk data
+ chunks_directory(_fileid).mkdir(exist_ok=True, parents=True)
+ request.files["file"].save(Path(chunks_directory(_fileid),
+ chunk_name(_uploadfilename, _chunk)))
+
+ # Check whether upload is complete
+ chunkpaths = tuple(
+ Path(chunks_directory(_fileid), chunk_name(_uploadfilename, _achunk))
+ for _achunk in range(1, _totalchunks+1))
+ if all(_file.exists() for _file in chunkpaths):
+ # merge_files and clean up chunks
+ __merge_chunks__(_targetfile, chunkpaths)
+ chunks_directory(_fileid).rmdir()
+ jobid = trigger_rqtl2_bundle_qc(
+ species_id, population_id, _targetfile, _uploadfilename)
+ return url_for(
+ "expression-data.rqtl2.rqtl2_bundle_qc_status", jobid=jobid)
+ except Exception as exc:# pylint: disable=[broad-except]
+ msg = "Error processing uploaded file chunks."
+ app.logger.error(msg, exc_info=True, stack_info=True)
+ return jsonify({
+ "message": msg,
+ "error": type(exc).__name__,
+ "error-description": " ".join(str(arg) for arg in exc.args),
+ "error-trace": traceback.format_exception(exc)
+ }), 500
+
+ return "OK"
+
+
+@rqtl2.route("/upload/species/rqtl2-bundle/qc-status/<uuid:jobid>",
+ methods=["GET", "POST"])
+@require_login
+def rqtl2_bundle_qc_status(jobid: UUID):
+ """Check the status of the QC jobs."""
+ with (Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn,
+ database_connection(app.config["SQL_URI"]) as dbconn):
+ try:
+ thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid)
+ messagelistname = thejob.get("log-messagelist")
+ logmessages = (rconn.lrange(messagelistname, 0, -1)
+ if bool(messagelistname) else [])
+ jobstatus = thejob["status"]
+ if jobstatus == "error":
+ return render_template(
+ "expression-data/rqtl2/rqtl2-qc-job-error.html",
+ job=thejob,
+ errorsgeneric=json.loads(
+ thejob.get("errors-generic", "[]")),
+ errorsgeno=json.loads(
+ thejob.get("errors-geno", "[]")),
+ errorspheno=json.loads(
+ thejob.get("errors-pheno", "[]")),
+ errorsphenose=json.loads(
+ thejob.get("errors-phenose", "[]")),
+ errorsphenocovar=json.loads(
+ thejob.get("errors-phenocovar", "[]")),
+ messages=logmessages)
+ if jobstatus == "success":
+ jobmeta = json.loads(thejob["job-metadata"])
+ species = species_by_id(dbconn, jobmeta["speciesid"])
+ return render_template(
+ "expression-data/rqtl2/rqtl2-qc-job-results.html",
+ species=species,
+ population=population_by_species_and_id(
+ dbconn, species["SpeciesId"], jobmeta["populationid"]),
+ rqtl2bundle=Path(jobmeta["rqtl2-bundle-file"]).name,
+ rqtl2bundleorig=jobmeta["original-filename"])
+
+ def compute_percentage(thejob, filetype) -> Union[str, None]:
+ if f"{filetype}-linecount" in thejob:
+ return "100"
+ if f"{filetype}-filesize" in thejob:
+ percent = ((int(thejob.get(f"{filetype}-checked", 0))
+ /
+ int(thejob.get(f"{filetype}-filesize", 1)))
+ * 100)
+ return f"{percent:.2f}"
+ return None
+
+ return render_template(
+ "expression-data/rqtl2/rqtl2-qc-job-status.html",
+ job=thejob,
+ geno_percent=compute_percentage(thejob, "geno"),
+ pheno_percent=compute_percentage(thejob, "pheno"),
+ phenose_percent=compute_percentage(thejob, "phenose"),
+ messages=logmessages)
+ except jobs.JobNotFound:
+ return render_template("expression-data/rqtl2/no-such-job.html", jobid=jobid)
+
+
+def redirect_on_error(flaskroute, **kwargs):
+ """Utility to redirect on error"""
+ return redirect(url_for(flaskroute, **kwargs, pgsrc="error"),
+ code=(307 if request.method == "POST" else 302))
+
+
+def check_species(conn: mdb.Connection, formargs: dict) -> Optional[
+ tuple[str, Response]]:
+ """
+ Check whether the 'species_id' value is provided, and whether a
+ corresponding species exists in the database.
+
+ Maybe give the function a better name..."""
+ speciespage = redirect_on_error("expression-data.rqtl2.select_species")
+ if "species_id" not in formargs:
+ return "You MUST provide the Species identifier.", speciespage
+
+ if not bool(species_by_id(conn, formargs["species_id"])):
+ return "No species with the provided identifier exists.", speciespage
+
+ return None
+
+
+def check_population(conn: mdb.Connection,
+ formargs: dict,
+ species_id) -> Optional[tuple[str, Response]]:
+ """
+ Check whether the 'population_id' value is provided, and whether a
+ corresponding population exists in the database.
+
+ Maybe give the function a better name..."""
+ poppage = redirect_on_error(
+ "expression-data.rqtl2.select_species", species_id=species_id)
+ if "population_id" not in formargs:
+ return "You MUST provide the Population identifier.", poppage
+
+ if not bool(population_by_species_and_id(
+ conn, species_id, formargs["population_id"])):
+ return "No population with the provided identifier exists.", poppage
+
+ return None
+
+
+def check_r_qtl2_bundle(formargs: dict,
+ species_id,
+ population_id) -> Optional[tuple[str, Response]]:
+ """Check for the existence of the R/qtl2 bundle."""
+ fileuploadpage = redirect_on_error("expression-data.rqtl2.upload_rqtl2_bundle",
+ species_id=species_id,
+ population_id=population_id)
+ if not "rqtl2_bundle_file" in formargs:
+ return (
+ "You MUST provide a R/qtl2 zip bundle for expression-data.", fileuploadpage)
+
+ if not Path(fullpath(formargs["rqtl2_bundle_file"])).exists():
+ return "No R/qtl2 bundle with the given name exists.", fileuploadpage
+
+ return None
+
+
+def check_geno_dataset(conn: mdb.Connection,
+ formargs: dict,
+ species_id,
+ population_id) -> Optional[tuple[str, Response]]:
+ """Check for the Genotype dataset."""
+ genodsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id)
+ if not bool(formargs.get("geno-dataset-id")):
+ return (
+ "You MUST provide a valid Genotype dataset identifier", genodsetpg)
+
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s",
+ (formargs["geno-dataset-id"],))
+ results = cursor.fetchall()
+ if not bool(results):
+ return ("No genotype dataset with the provided identifier exists.",
+ genodsetpg)
+ if len(results) > 1:
+ return (
+ "Data corruption: More than one genotype dataset with the same "
+ "identifier.",
+ genodsetpg)
+
+ return None
+
+def check_tissue(
+ conn: mdb.Connection,formargs: dict) -> Optional[tuple[str, Response]]:
+ """Check for tissue/organ/biological material."""
+ selectdsetpg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
+ species_id=formargs["species_id"],
+ population_id=formargs["population_id"])
+ if not bool(formargs.get("tissueid", "").strip()):
+ return ("No tissue/organ/biological material provided.", selectdsetpg)
+
+ with conn.cursor(cursorclass=DictCursor) as cursor:
+ cursor.execute("SELECT * FROM Tissue WHERE Id=%s",
+ (formargs["tissueid"],))
+ results = cursor.fetchall()
+ if not bool(results):
+ return ("No tissue/organ with the provided identifier exists.",
+ selectdsetpg)
+
+ if len(results) > 1:
+ return (
+ "Data corruption: More than one tissue/organ with the same "
+ "identifier.",
+ selectdsetpg)
+
+ return None
+
+
+def check_probe_study(conn: mdb.Connection,
+ formargs: dict,
+ species_id,
+ population_id) -> Optional[tuple[str, Response]]:
+ """Check for the ProbeSet study."""
+ dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id)
+ if not bool(formargs.get("probe-study-id")):
+ return "No probeset study was selected!", dsetinfopg
+
+ if not bool(probeset_study_by_id(conn, formargs["probe-study-id"])):
+ return ("No probeset study with the provided identifier exists",
+ dsetinfopg)
+
+ return None
+
+
+def check_probe_dataset(conn: mdb.Connection,
+ formargs: dict,
+ species_id,
+ population_id) -> Optional[tuple[str, Response]]:
+ """Check for the ProbeSet dataset."""
+ dsetinfopg = redirect_on_error("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id)
+ if not bool(formargs.get("probe-dataset-id")):
+ return "No probeset dataset was selected!", dsetinfopg
+
+ if not bool(probeset_dataset_by_id(conn, formargs["probe-dataset-id"])):
+ return ("No probeset dataset with the provided identifier exists",
+ dsetinfopg)
+
+ return None
+
+
+def with_errors(endpointthunk: Callable, *checkfns):
+ """Run 'endpointthunk' with error checking."""
+ formargs = {**dict(request.args), **dict(request.form)}
+ errors = tuple(item for item in (_fn(formargs=formargs) for _fn in checkfns)
+ if item is not None)
+ if len(errors) > 0:
+ flash(errors[0][0], "alert-error error-rqtl2")
+ return errors[0][1]
+
+ return endpointthunk()
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/select-geno-dataset"),
+ methods=["POST"])
+@require_login
+def select_geno_dataset(species_id: int, population_id: int):
+ """Select from existing geno datasets."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ def __thunk__():
+ geno_dset = geno_datasets_by_species_and_population(
+ conn, species_id, population_id)
+ if not bool(geno_dset):
+ flash("No genotype dataset was provided!",
+ "alert-error error-rqtl2")
+ return redirect(url_for("expression-data.rqtl2.select_geno_dataset",
+ species_id=species_id,
+ population_id=population_id,
+ pgsrc="error"),
+ code=307)
+
+ flash("Genotype accepted", "alert-success error-rqtl2")
+ return redirect(url_for("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id,
+ pgsrc="expression-data.rqtl2.select_geno_dataset"),
+ code=307)
+
+ return with_errors(__thunk__,
+ partial(check_species, conn=conn),
+ partial(check_population, conn=conn,
+ species_id=species_id),
+ partial(check_r_qtl2_bundle,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_geno_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/select-tissue"),
+ methods=["POST"])
+@require_login
+def select_tissue(species_id: int, population_id: int):
+ """Select from existing tissues."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ def __thunk__():
+ if not bool(request.form.get("tissueid", "").strip()):
+ flash("Invalid tissue selection!",
+ "alert-error error-select-tissue error-rqtl2")
+
+ return redirect(url_for("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id,
+ pgsrc="expression-data.rqtl2.select_geno_dataset"),
+ code=307)
+
+ return with_errors(__thunk__,
+ partial(check_species, conn=conn),
+ partial(check_population,
+ conn=conn,
+ species_id=species_id),
+ partial(check_r_qtl2_bundle,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_geno_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id))
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/create-tissue"),
+ methods=["POST"])
+@require_login
+def create_tissue(species_id: int, population_id: int):
+ """Add new tissue, organ or biological material to the system."""
+ form = request.form
+ datasetinfopage = redirect(
+ url_for("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id,
+ pgsrc="expression-data.rqtl2.select_geno_dataset"),
+ code=307)
+ with database_connection(app.config["SQL_URI"]) as conn:
+ tissuename = form.get("tissuename", "").strip()
+ tissueshortname = form.get("tissueshortname", "").strip()
+ if not bool(tissuename):
+ flash("Organ/Tissue name MUST be provided.",
+ "alert-error error-create-tissue error-rqtl2")
+ return datasetinfopage
+
+ if not bool(tissueshortname):
+ flash("Organ/Tissue short name MUST be provided.",
+ "alert-error error-create-tissue error-rqtl2")
+ return datasetinfopage
+
+ try:
+ tissue = create_new_tissue(conn, tissuename, tissueshortname)
+ flash("Tissue created successfully!", "alert-success")
+ return render_template(
+ "expression-data/rqtl2/create-tissue-success.html",
+ species=species_by_id(conn, species_id),
+ population=population_by_species_and_id(
+ conn, species_id, population_id),
+ rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
+ geno_dataset=geno_dataset_by_id(
+ conn,
+ int(request.form["geno-dataset-id"])),
+ tissue=tissue)
+ except mdb.IntegrityError as _ierr:
+ flash("Tissue/Organ with that short name already exists!",
+ "alert-error error-create-tissue error-rqtl2")
+ return datasetinfopage
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/select-probeset-study"),
+ methods=["POST"])
+@require_login
+def select_probeset_study(species_id: int, population_id: int):
+ """Select or create a probeset study."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ def __thunk__():
+ summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id),
+ code=307)
+ if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))):
+ flash("Invalid study selected!", "alert-error error-rqtl2")
+ return summary_page
+
+ return summary_page
+ return with_errors(__thunk__,
+ partial(check_species, conn=conn),
+ partial(check_population,
+ conn=conn,
+ species_id=species_id),
+ partial(check_r_qtl2_bundle,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_geno_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_tissue, conn=conn),
+ partial(check_probe_study,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/select-probeset-dataset"),
+ methods=["POST"])
+@require_login
+def select_probeset_dataset(species_id: int, population_id: int):
+ """Select or create a probeset dataset."""
+ with database_connection(app.config["SQL_URI"]) as conn:
+ def __thunk__():
+ summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id),
+ code=307)
+ if not bool(probeset_study_by_id(conn, int(request.form["probe-study-id"]))):
+ flash("Invalid study selected!", "alert-error error-rqtl2")
+ return summary_page
+
+ return summary_page
+
+ return with_errors(__thunk__,
+ partial(check_species, conn=conn),
+ partial(check_population,
+ conn=conn,
+ species_id=species_id),
+ partial(check_r_qtl2_bundle,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_geno_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_tissue, conn=conn),
+ partial(check_probe_study,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_probe_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/create-probeset-study"),
+ methods=["POST"])
+@require_login
+def create_probeset_study(species_id: int, population_id: int):
+ """Create a new probeset study."""
+ errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-study"
+ with database_connection(app.config["SQL_URI"]) as conn:
+ def __thunk__():
+ form = request.form
+ dataset_info_page = redirect(
+ url_for("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id),
+ code=307)
+
+ if not (bool(form.get("platformid")) and
+ bool(platform_by_id(conn, int(form["platformid"])))):
+ flash("Invalid platform selected.", errorclasses)
+ return dataset_info_page
+
+ if not (bool(form.get("tissueid")) and
+ bool(tissue_by_id(conn, int(form["tissueid"])))):
+ flash("Invalid tissue selected.", errorclasses)
+ return dataset_info_page
+
+ studyname = form["studyname"]
+ try:
+ study = probeset_create_study(
+ conn, population_id, int(form["platformid"]), int(form["tissueid"]),
+ studyname, form.get("studyfullname") or "",
+ form.get("studyshortname") or "")
+ except mdb.IntegrityError as _ierr:
+ flash(f"ProbeSet study with name '{escape(studyname)}' already "
+ "exists.",
+ errorclasses)
+ return dataset_info_page
+ return render_template(
+ "expression-data/rqtl2/create-probe-study-success.html",
+ species=species_by_id(conn, species_id),
+ population=population_by_species_and_id(
+ conn, species_id, population_id),
+ rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
+ geno_dataset=geno_dataset_by_id(
+ conn,
+ int(request.form["geno-dataset-id"])),
+ study=study)
+
+ return with_errors(__thunk__,
+ partial(check_species, conn=conn),
+ partial(check_population,
+ conn=conn,
+ species_id=species_id),
+ partial(check_r_qtl2_bundle,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_geno_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_tissue, conn=conn))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/create-probeset-dataset"),
+ methods=["POST"])
+@require_login
+def create_probeset_dataset(species_id: int, population_id: int):#pylint: disable=[too-many-return-statements]
+ """Create a new probeset dataset."""
+ errorclasses = "alert-error error-rqtl2 error-rqtl2-create-probeset-dataset"
+ with database_connection(app.config["SQL_URI"]) as conn:
+ def __thunk__():#pylint: disable=[too-many-return-statements]
+ form = request.form
+ summary_page = redirect(url_for("expression-data.rqtl2.select_dataset_info",
+ species_id=species_id,
+ population_id=population_id),
+ code=307)
+ if not bool(form.get("averageid")):
+ flash("Averaging method not selected!", errorclasses)
+ return summary_page
+ if not bool(form.get("datasetname")):
+ flash("Dataset name not provided!", errorclasses)
+ return summary_page
+ if not bool(form.get("datasetfullname")):
+ flash("Dataset full name not provided!", errorclasses)
+ return summary_page
+
+ tissue = tissue_by_id(conn, form.get("tissueid", "").strip())
+
+ study = probeset_study_by_id(conn, int(form["probe-study-id"]))
+ if not bool(study):
+ flash("Invalid ProbeSet study provided!", errorclasses)
+ return summary_page
+
+ avgmethod = averaging_method_by_id(conn, int(form["averageid"]))
+ if not bool(avgmethod):
+ flash("Invalid averaging method provided!", errorclasses)
+ return summary_page
+
+ try:
+ dset = probeset_create_dataset(conn,
+ int(form["probe-study-id"]),
+ int(form["averageid"]),
+ form["datasetname"],
+ form["datasetfullname"],
+ form["datasetshortname"],
+ form["datasetpublic"] == "on",
+ form.get(
+ "datasetdatascale", "log2"))
+ except mdb.IntegrityError as _ierr:
+ app.logger.debug("Possible integrity error: %s", traceback.format_exc())
+ flash(("IntegrityError: The data you provided has some errors: "
+ f"{_ierr.args}"),
+ errorclasses)
+ return summary_page
+ except Exception as _exc:# pylint: disable=[broad-except]
+ app.logger.debug("Error creating ProbeSet dataset: %s",
+ traceback.format_exc())
+ flash(("There was a problem creating your dataset. Please try "
+ "again."),
+ errorclasses)
+ return summary_page
+ return render_template(
+ "expression-data/rqtl2/create-probe-dataset-success.html",
+ species=species_by_id(conn, species_id),
+ population=population_by_species_and_id(
+ conn, species_id, population_id),
+ rqtl2_bundle_file=request.form["rqtl2_bundle_file"],
+ geno_dataset=geno_dataset_by_id(
+ conn,
+ int(request.form["geno-dataset-id"])),
+ tissue=tissue,
+ study=study,
+ avgmethod=avgmethod,
+ dataset=dset)
+
+ return with_errors(__thunk__,
+ partial(check_species, conn=conn),
+ partial(check_population,
+ conn=conn,
+ species_id=species_id),
+ partial(check_r_qtl2_bundle,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_geno_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_tissue, conn=conn),
+ partial(check_probe_study,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/dataset-info"),
+ methods=["POST"])
+@require_login
+def select_dataset_info(species_id: int, population_id: int):
+ """
+ If `geno` files exist in the R/qtl2 bundle, prompt user to provide the
+ dataset the genotypes belong to.
+ """
+ form = request.form
+ with database_connection(app.config["SQL_URI"]) as conn:
+ def __thunk__():
+ species = species_by_id(conn, species_id)
+ population = population_by_species_and_id(
+ conn, species_id, population_id)
+ thefile = fullpath(form["rqtl2_bundle_file"])
+ with ZipFile(str(thefile), "r") as zfile:
+ cdata = r_qtl2.control_data(zfile)
+
+ geno_dataset = geno_dataset_by_id(
+ conn,form.get("geno-dataset-id", "").strip())
+ if "geno" in cdata and not bool(form.get("geno-dataset-id")):
+ return render_template(
+ "expression-data/rqtl2/select-geno-dataset.html",
+ species=species,
+ population=population,
+ rqtl2_bundle_file=thefile.name,
+ datasets=geno_datasets_by_species_and_population(
+ conn, species_id, population_id))
+
+ tissue = tissue_by_id(conn, form.get("tissueid", "").strip())
+ if "pheno" in cdata and not bool(tissue):
+ return render_template(
+ "expression-data/rqtl2/select-tissue.html",
+ species=species,
+ population=population,
+ rqtl2_bundle_file=thefile.name,
+ geno_dataset=geno_dataset,
+ studies=probeset_studies_by_species_and_population(
+ conn, species_id, population_id),
+ platforms=platforms_by_species(conn, species_id),
+ tissues=all_tissues(conn))
+
+ probeset_study = probeset_study_by_id(
+ conn, form.get("probe-study-id", "").strip())
+ if "pheno" in cdata and not bool(probeset_study):
+ return render_template(
+ "expression-data/rqtl2/select-probeset-study-id.html",
+ species=species,
+ population=population,
+ rqtl2_bundle_file=thefile.name,
+ geno_dataset=geno_dataset,
+ studies=probeset_studies_by_species_and_population(
+ conn, species_id, population_id),
+ platforms=platforms_by_species(conn, species_id),
+ tissue=tissue)
+ probeset_study = probeset_study_by_id(
+ conn, int(form["probe-study-id"]))
+
+ probeset_dataset = probeset_dataset_by_id(
+ conn, form.get("probe-dataset-id", "").strip())
+ if "pheno" in cdata and not bool(probeset_dataset):
+ return render_template(
+ "expression-data/rqtl2/select-probeset-dataset.html",
+ species=species,
+ population=population,
+ rqtl2_bundle_file=thefile.name,
+ geno_dataset=geno_dataset,
+ probe_study=probeset_study,
+ tissue=tissue,
+ datasets=probeset_datasets_by_study(
+ conn, int(form["probe-study-id"])),
+ avgmethods=averaging_methods(conn))
+
+ return render_template("expression-data/rqtl2/summary-info.html",
+ species=species,
+ population=population,
+ rqtl2_bundle_file=thefile.name,
+ geno_dataset=geno_dataset,
+ tissue=tissue,
+ probe_study=probeset_study,
+ probe_dataset=probeset_dataset)
+
+ return with_errors(__thunk__,
+ partial(check_species, conn=conn),
+ partial(check_population,
+ conn=conn,
+ species_id=species_id),
+ partial(check_r_qtl2_bundle,
+ species_id=species_id,
+ population_id=population_id))
+
+
+@rqtl2.route(("/upload/species/<int:species_id>/population/<int:population_id>"
+ "/rqtl2-bundle/confirm-bundle-details"),
+ methods=["POST"])
+@require_login
+def confirm_bundle_details(species_id: int, population_id: int):
+ """Confirm the details and trigger R/qtl2 bundle processing..."""
+ redisuri = app.config["REDIS_URL"]
+ with (database_connection(app.config["SQL_URI"]) as conn,
+ Redis.from_url(redisuri, decode_responses=True) as rconn):
+ def __thunk__():
+ redis_ttl_seconds = app.config["JOBS_TTL_SECONDS"]
+ jobid = str(uuid4())
+ _job = jobs.launch_job(
+ jobs.initialise_job(
+ rconn,
+ jobs.jobsnamespace(),
+ jobid,
+ [
+ sys.executable, "-m", "scripts.process_rqtl2_bundle",
+ app.config["SQL_URI"], app.config["REDIS_URL"],
+ jobs.jobsnamespace(), jobid, "--redisexpiry",
+ str(redis_ttl_seconds)],
+ "R/qtl2 Bundle Upload",
+ redis_ttl_seconds,
+ {
+ "bundle-metadata": json.dumps({
+ "speciesid": species_id,
+ "populationid": population_id,
+ "rqtl2-bundle-file": str(fullpath(
+ request.form["rqtl2_bundle_file"])),
+ "geno-dataset-id": request.form.get(
+ "geno-dataset-id", ""),
+ "probe-study-id": request.form.get(
+ "probe-study-id", ""),
+ "probe-dataset-id": request.form.get(
+ "probe-dataset-id", ""),
+ **({
+ "platformid": probeset_study_by_id(
+ conn,
+ int(request.form["probe-study-id"]))["ChipId"]
+ } if bool(request.form.get("probe-study-id")) else {})
+ })
+ }),
+ redisuri,
+ f"{app.config['UPLOAD_FOLDER']}/job_errors")
+
+ return redirect(url_for("expression-data.rqtl2.rqtl2_processing_status",
+ jobid=jobid))
+
+ return with_errors(__thunk__,
+ partial(check_species, conn=conn),
+ partial(check_population,
+ conn=conn,
+ species_id=species_id),
+ partial(check_r_qtl2_bundle,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_geno_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_probe_study,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id),
+ partial(check_probe_dataset,
+ conn=conn,
+ species_id=species_id,
+ population_id=population_id))
+
+
+@rqtl2.route("/status/<uuid:jobid>")
+def rqtl2_processing_status(jobid: UUID):
+ """Retrieve the status of the job processing the uploaded R/qtl2 bundle."""
+ with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
+ try:
+ thejob = jobs.job(rconn, jobs.jobsnamespace(), jobid)
+
+ messagelistname = thejob.get("log-messagelist")
+ logmessages = (rconn.lrange(messagelistname, 0, -1)
+ if bool(messagelistname) else [])
+
+ if thejob["status"] == "error":
+ return render_template(
+ "expression-data/rqtl2/rqtl2-job-error.html",
+ job=thejob,
+ messages=logmessages)
+ if thejob["status"] == "success":
+ return render_template(
+ "expression-data/rqtl2/rqtl2-job-results.html",
+ job=thejob,
+ messages=logmessages)
+
+ return render_template(
+ "expression-data/rqtl2/rqtl2-job-status.html",
+ job=thejob,
+ messages=logmessages)
+ except jobs.JobNotFound as _exc:
+ return render_template("expression-data/rqtl2/no-such-job.html",
+ jobid=jobid)