From 61362063e93dbacc75d093c6862ddc0aef534198 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 8 Apr 2024 12:49:33 +0300 Subject: Refactor: Make error-checking more robust Rather than maintaining a dictionary of possible error-checking functions, this commit allows passing in the error-checking functions necessary for each point. This makes the code easier to extend by simply defining a new error-checking function and passing it in to the error-checking "driver". --- qc_app/upload/rqtl2.py | 235 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 184 insertions(+), 51 deletions(-) diff --git a/qc_app/upload/rqtl2.py b/qc_app/upload/rqtl2.py index 04bfa5b..8b5ac01 100644 --- a/qc_app/upload/rqtl2.py +++ b/qc_app/upload/rqtl2.py @@ -1,12 +1,13 @@ -"""Module to handle uploading of R/qtl2 bundles.""" +"""Module to handle uploading of R/qtl2 bundles."""#pylint: disable=[too-many-lines] import sys import json import traceback from pathlib import Path from datetime import date from uuid import UUID, uuid4 +from functools import partial from zipfile import ZipFile, is_zipfile -from typing import Union, Callable, Optional, Iterable +from typing import Union, Callable, Optional import MySQLdb as mdb from redis import Redis @@ -262,11 +263,6 @@ def rqtl2_bundle_qc_status(jobid: UUID): return render_template("rqtl2/no-such-job.html", jobid=jobid) -def form_errors(formargs, *errorcheckers) -> Iterable[tuple[str, Response]]: - """Retrieve all errors from the form inputs""" - return (checker(formargs) for checker in errorcheckers) - - def redirect_on_error(flaskroute, **kwargs): """Utility to redirect on error""" return redirect(url_for(flaskroute, **kwargs, pgsrc="error"), @@ -327,21 +323,61 @@ def check_r_qtl2_bundle(formargs: dict, return None -def check_geno_dataset(formargs: dict, +def check_geno_dataset(conn: mdb.Connection, + formargs: dict, species_id, population_id) -> Optional[tuple[str, Response]]: """Check for the Genotype dataset.""" - genodsetpg = redirect_on_error("upload.rqtl2.select_geno_dataset", + genodsetpg = redirect_on_error("upload.rqtl2.select_dataset_info", species_id=species_id, population_id=population_id) if not bool(formargs.get("geno-dataset-id")): return ( "You MUST provide a valid Genotype dataset identifier", genodsetpg) + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM GenoFreeze WHERE Id=%s", + (formargs["geno-dataset-id"],)) + results = cursor.fetchall() + if not bool(results): + return ("No genotype dataset with the provided identifier exists.", + genodsetpg) + if len(results) > 1: + return ( + "Data corruption: More than one genotype dataset with the same " + "identifier.", + genodsetpg) + + return None + +def check_tissue( + conn: mdb.Connection,formargs: dict) -> Optional[tuple[str, Response]]: + """Check for tissue/organ/biological material.""" + selectdsetpg = redirect_on_error("upload.rqtl2.select_dataset_info", + species_id=formargs["species_id"], + population_id=formargs["population_id"]) + if not bool(formargs.get("tissueid", "").strip()): + return ("No tissue/organ/biological material provided.", selectdsetpg) + + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM Tissue WHERE Id=%s", + (formargs["tissueid"],)) + results = cursor.fetchall() + if not bool(results): + return ("No tissue/organ with the provided identifier exists.", + selectdsetpg) + + if len(results) > 1: + return ( + "Data corruption: More than one tissue/organ with the same " + "identifier.", + selectdsetpg) + return None -def check_probe_study(formargs: dict, +def check_probe_study(conn: mdb.Connection, + formargs: dict, species_id, population_id) -> Optional[tuple[str, Response]]: """Check for the ProbeSet study.""" @@ -351,10 +387,15 @@ def check_probe_study(formargs: dict, if not bool(formargs.get("probe-study-id")): return "No probeset study was selected!", dsetinfopg + if not bool(probeset_study_by_id(conn, formargs["probe-study-id"])): + return ("No probeset study with the provided identifier exists", + dsetinfopg) + return None -def check_probe_dataset(formargs: dict, +def check_probe_dataset(conn: mdb.Connection, + formargs: dict, species_id, population_id) -> Optional[tuple[str, Response]]: """Check for the ProbeSet dataset.""" @@ -364,29 +405,18 @@ def check_probe_dataset(formargs: dict, if not bool(formargs.get("probe-dataset-id")): return "No probeset dataset was selected!", dsetinfopg + if not bool(probeset_dataset_by_id(conn, formargs["probe-dataset-id"])): + return ("No probeset dataset with the provided identifier exists", + dsetinfopg) + return None -def with_errors(conn: mdb.Connection, endpointthunk: Callable, *checkerstrings): +def with_errors(endpointthunk: Callable, *checkfns): """Run 'endpointthunk' with error checking.""" formargs = {**dict(request.args), **dict(request.form)} - species_id = formargs.get("species_id") or 0 - population_id = formargs.get("population_id") or 0 - fns = { - "species": lambda fargs: check_species(conn, fargs), - "population": lambda fargs: check_population( - conn, fargs, species_id), - "rqtl2_bundle_file": lambda fargs: check_r_qtl2_bundle( - fargs, species_id, population_id), - "geno_dataset": lambda fargs: check_geno_dataset( - fargs, species_id, population_id), - "probeset_study": lambda fargs: check_probe_study( - fargs, species_id, population_id), - "probeset_dataset": lambda fargs: check_probe_dataset( - fargs, species_id, population_id) - } - errors = tuple(item for item in form_errors( - formargs, *(fns[chkstr] for chkstr in checkerstrings)) if item is not None) + errors = tuple(item for item in (_fn(formargs=formargs) for _fn in checkfns) + if item is not None) if len(errors) > 0: flash(errors[0][0], "alert-error error-rqtl2") return errors[0][1] @@ -419,8 +449,17 @@ def select_geno_dataset(species_id: int, population_id: int): pgsrc="upload.rqtl2.select_geno_dataset"), code=307) - return with_errors( - conn, __thunk__, "species", "population", "rqtl2_bundle_file", "geno_dataset") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) @rqtl2.route(("/upload/species//population/" @@ -484,8 +523,12 @@ def create_geno_dataset(species_id: int, population_id: int): rqtl2_bundle_file=request.form["rqtl2_bundle_file"], geno_dataset={**new_dataset, "id": cursor.lastrowid}) - return with_errors(conn, __thunk__, "species", "population", - "rqtl2_bundle_file") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, conn=conn), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id)) @rqtl2.route(("/upload/species//population/" @@ -505,8 +548,18 @@ def select_tissue(species_id: int, population_id: int): pgsrc="upload.rqtl2.select_geno_dataset"), code=307) - return with_errors( - conn, __thunk__, "species", "population", "rqtl2_bundle_file", "geno_dataset") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) @rqtl2.route(("/upload/species//population/" "/rqtl2-bundle/create-tissue"), @@ -568,9 +621,23 @@ def select_probeset_study(species_id: int, population_id: int): return summary_page return summary_page - return with_errors(conn, __thunk__, "species", "population", - "rqtl2_bundle_file", "geno_dataset", - "probeset_study") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id)) @rqtl2.route(("/upload/species//population/" @@ -590,9 +657,27 @@ def select_probeset_dataset(species_id: int, population_id: int): return summary_page - return with_errors(conn, __thunk__, "species", "population", - "rqtl2_bundle_file", "geno_dataset", - "probeset_study", "probeset_dataset") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) @rqtl2.route(("/upload/species//population/" @@ -642,8 +727,19 @@ def create_probeset_study(species_id: int, population_id: int): int(request.form["geno-dataset-id"])), study=study) - return with_errors(conn, __thunk__, "species", "population", - "rqtl2_bundle_file", "geno_dataset") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn)) @rqtl2.route(("/upload/species//population/" @@ -713,9 +809,23 @@ def create_probeset_dataset(species_id: int, population_id: int):#pylint: disabl avgmethod=avgmethod, dataset=dset) - return with_errors(conn, __thunk__, "species", "population", - "rqtl2_bundle_file", "geno_dataset", - "probeset_study") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_tissue, conn=conn), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id)) @rqtl2.route(("/upload/species//population/" @@ -796,8 +906,14 @@ def select_dataset_info(species_id: int, population_id: int): probe_study=probeset_study, probe_dataset=probeset_dataset) - return with_errors(conn, __thunk__, "species", "population", - "rqtl2_bundle_file") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id)) @rqtl2.route(("/upload/species//population/" @@ -848,9 +964,26 @@ def confirm_bundle_details(species_id: int, population_id: int): return redirect(url_for("upload.rqtl2.rqtl2_processing_status", jobid=jobid)) - return with_errors(conn, __thunk__, "species", "population", - "rqtl2_bundle_file", "geno_dataset", - "probeset_study", "probeset_dataset") + return with_errors(__thunk__, + partial(check_species, conn=conn), + partial(check_population, + conn=conn, + species_id=species_id), + partial(check_r_qtl2_bundle, + species_id=species_id, + population_id=population_id), + partial(check_geno_dataset, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_study, + conn=conn, + species_id=species_id, + population_id=population_id), + partial(check_probe_dataset, + conn=conn, + species_id=species_id, + population_id=population_id)) @rqtl2.route("/status/") -- cgit v1.2.3