diff options
Diffstat (limited to 'uploader/phenotypes')
-rw-r--r-- | uploader/phenotypes/__init__.py | 2 | ||||
-rw-r--r-- | uploader/phenotypes/models.py | 256 | ||||
-rw-r--r-- | uploader/phenotypes/views.py | 846 |
3 files changed, 1104 insertions, 0 deletions
diff --git a/uploader/phenotypes/__init__.py b/uploader/phenotypes/__init__.py new file mode 100644 index 0000000..c17d32c --- /dev/null +++ b/uploader/phenotypes/__init__.py @@ -0,0 +1,2 @@ +"""Package for handling ('classical') phenotype data""" +from .views import phenotypesbp diff --git a/uploader/phenotypes/models.py b/uploader/phenotypes/models.py new file mode 100644 index 0000000..e1ec0c9 --- /dev/null +++ b/uploader/phenotypes/models.py @@ -0,0 +1,256 @@ +"""Database and utility functions for phenotypes.""" +from typing import Optional +from functools import reduce +from datetime import datetime + +import MySQLdb as mdb +from MySQLdb.cursors import Cursor, DictCursor +from flask import current_app as app + +from gn_libs.mysqldb import debug_query + +def datasets_by_population( + conn: mdb.Connection, + species_id: int, + population_id: int +) -> tuple[dict, ...]: + """Retrieve all of a population's phenotype studies.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT s.SpeciesId, pf.* FROM Species AS s " + "INNER JOIN InbredSet AS iset ON s.Id=iset.SpeciesId " + "INNER JOIN PublishFreeze AS pf ON iset.Id=pf.InbredSetId " + "WHERE s.Id=%s AND iset.Id=%s;", + (species_id, population_id)) + return tuple(dict(row) for row in cursor.fetchall()) + + +def dataset_by_id(conn: mdb.Connection, + species_id: int, + population_id: int, + dataset_id: int) -> dict: + """Fetch dataset details by identifier""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT s.SpeciesId, pf.* FROM Species AS s " + "INNER JOIN InbredSet AS iset ON s.Id=iset.SpeciesId " + "INNER JOIN PublishFreeze AS pf ON iset.Id=pf.InbredSetId " + "WHERE s.Id=%s AND iset.Id=%s AND pf.Id=%s", + (species_id, population_id, dataset_id)) + return dict(cursor.fetchone()) + + +def phenotypes_count(conn: mdb.Connection, + population_id: int, + dataset_id: int) -> int: + """Count the number of phenotypes in the dataset.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT COUNT(*) AS total_phenos FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId " + "WHERE pxr.InbredSetId=%s AND pf.Id=%s", + (population_id, dataset_id)) + return int(cursor.fetchone()["total_phenos"]) + + +def phenotype_publication_data(conn, phenotype_id) -> Optional[dict]: + """Retrieve the publication data for a phenotype if it exists.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute( + "SELECT DISTINCT pxr.PhenotypeId, pub.* FROM PublishXRef AS pxr " + "INNER JOIN Publication as pub ON pxr.PublicationId=pub.Id " + "WHERE pxr.PhenotypeId=%s", + (phenotype_id,)) + res = cursor.fetchone() + if res is None: + return res + return dict(res) + + +def dataset_phenotypes(conn: mdb.Connection, + population_id: int, + dataset_id: int, + offset: int = 0, + limit: Optional[int] = None) -> tuple[dict, ...]: + """Fetch the actual phenotypes.""" + _query = ( + "SELECT pheno.*, pxr.Id AS xref_id, ist.InbredSetCode FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishFreeze AS pf ON pxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS ist ON pf.InbredSetId=ist.Id " + "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + ( + f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(_query, (population_id, dataset_id)) + debug_query(cursor, app.logger) + return tuple(dict(row) for row in cursor.fetchall()) + + +def __phenotype_se__(cursor: Cursor, xref_id, dataids_and_strainids): + """Fetch standard-error values (if they exist) for a phenotype.""" + paramstr = ", ".join(["(%s, %s)"] * len(dataids_and_strainids)) + flat = tuple(item for sublist in dataids_and_strainids for item in sublist) + cursor.execute("SELECT * FROM PublishSE WHERE (DataId, StrainId) IN " + f"({paramstr})", + flat) + debug_query(cursor, app.logger) + _se = { + (row["DataId"], row["StrainId"]): { + "DataId": row["DataId"], + "StrainId": row["StrainId"], + "error": row["error"] + } + for row in cursor.fetchall() + } + + cursor.execute("SELECT * FROM NStrain WHERE (DataId, StrainId) IN " + f"({paramstr})", + flat) + debug_query(cursor, app.logger) + _n = { + (row["DataId"], row["StrainId"]): { + "DataId": row["DataId"], + "StrainId": row["StrainId"], + "count": row["count"] + } + for row in cursor.fetchall() + } + + keys = set(tuple(_se.keys()) + tuple(_n.keys())) + return { + key: {"xref_id": xref_id, **_se.get(key,{}), **_n.get(key,{})} + for key in keys + } + +def __organise_by_phenotype__(pheno, row): + """Organise disparate data rows into phenotype 'objects'.""" + _pheno = pheno.get(row["Id"]) + return { + **pheno, + row["Id"]: { + "Id": row["Id"], + "Pre_publication_description": row["Pre_publication_description"], + "Post_publication_description": row["Post_publication_description"], + "Original_description": row["Original_description"], + "Units": row["Units"], + "Pre_publication_abbreviation": row["Pre_publication_abbreviation"], + "Post_publication_abbreviation": row["Post_publication_abbreviation"], + "xref_id": row["pxr.Id"], + "data": { + **(_pheno["data"] if bool(_pheno) else {}), + (row["DataId"], row["StrainId"]): { + "DataId": row["DataId"], + "StrainId": row["StrainId"], + "mean": row["mean"], + "Locus": row["Locus"], + "LRS": row["LRS"], + "additive": row["additive"], + "Sequence": row["Sequence"], + "comments": row["comments"], + "value": row["value"], + "StrainName": row["Name"], + "StrainName2": row["Name2"], + "StrainSymbol": row["Symbol"], + "StrainAlias": row["Alias"] + } + } + } + } + + +def __merge_pheno_data_and_se__(data, sedata) -> dict: + """Merge phenotype data with the standard errors.""" + return { + key: {**value, **sedata.get(key, {})} + for key, value in data.items() + } + + +def phenotype_by_id( + conn: mdb.Connection, + species_id: int, + population_id: int, + dataset_id: int, + xref_id +) -> Optional[dict]: + """Fetch a specific phenotype.""" + _dataquery = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " + "FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " + "INNER JOIN Strain AS str ON pd.StrainId=str.Id " + "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " + "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " + "WHERE " + "(str.SpeciesId, pxr.InbredSetId, pf.Id, pxr.Id)=(%s, %s, %s, %s)") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(_dataquery, + (species_id, population_id, dataset_id, xref_id)) + _pheno: dict = reduce(__organise_by_phenotype__, cursor.fetchall(), {}) + if bool(_pheno) and len(_pheno.keys()) == 1: + _pheno = tuple(_pheno.values())[0] + return { + **_pheno, + "data": tuple(__merge_pheno_data_and_se__( + _pheno["data"], + __phenotype_se__( + cursor, xref_id, tuple(_pheno["data"].keys())) + ).values()) + } + if bool(_pheno) and len(_pheno.keys()) > 1: + raise Exception( + "We found more than one phenotype with the same identifier!") + + return None + + +def phenotypes_data(conn: mdb.Connection, + population_id: int, + dataset_id: int, + offset: int = 0, + limit: Optional[int] = None) -> tuple[dict, ...]: + """Fetch the data for the phenotypes.""" + # — Phenotype -> PublishXRef -> PublishData -> Strain -> StrainXRef -> PublishFreeze + _query = ("SELECT pheno.*, pxr.*, pd.*, str.*, iset.InbredSetCode " + "FROM Phenotype AS pheno " + "INNER JOIN PublishXRef AS pxr ON pheno.Id=pxr.PhenotypeId " + "INNER JOIN PublishData AS pd ON pxr.DataId=pd.Id " + "INNER JOIN Strain AS str ON pd.StrainId=str.Id " + "INNER JOIN StrainXRef AS sxr ON str.Id=sxr.StrainId " + "INNER JOIN PublishFreeze AS pf ON sxr.InbredSetId=pf.InbredSetId " + "INNER JOIN InbredSet AS iset ON pf.InbredSetId=iset.InbredSetId " + "WHERE pxr.InbredSetId=%s AND pf.Id=%s") + ( + f" LIMIT {limit} OFFSET {offset}" if bool(limit) else "") + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute(_query, (population_id, dataset_id)) + debug_query(cursor, app.logger) + return tuple(dict(row) for row in cursor.fetchall()) + + +def save_new_dataset(cursor: Cursor, + population_id: int, + dataset_name: str, + dataset_fullname: str, + dataset_shortname: str) -> dict: + """Create a new phenotype dataset.""" + params = { + "population_id": population_id, + "dataset_name": dataset_name, + "dataset_fullname": dataset_fullname, + "dataset_shortname": dataset_shortname, + "created": datetime.now().date().isoformat(), + "public": 2, + "confidentiality": 0, + "users": None + } + cursor.execute( + "INSERT INTO PublishFreeze(Name, FullName, ShortName, CreateTime, " + "public, InbredSetId, confidentiality, AuthorisedUsers) " + "VALUES(%(dataset_name)s, %(dataset_fullname)s, %(dataset_shortname)s, " + "%(created)s, %(public)s, %(population_id)s, %(confidentiality)s, " + "%(users)s)", + params) + debug_query(cursor, app.logger) + return {**params, "Id": cursor.lastrowid} diff --git a/uploader/phenotypes/views.py b/uploader/phenotypes/views.py new file mode 100644 index 0000000..dc2df8f --- /dev/null +++ b/uploader/phenotypes/views.py @@ -0,0 +1,846 @@ +"""Views handling ('classical') phenotypes.""" +import sys +import uuid +import json +import datetime +from typing import Any +from pathlib import Path +from zipfile import ZipFile +from functools import wraps, reduce +from logging import INFO, ERROR, DEBUG, FATAL, CRITICAL, WARNING + +from redis import Redis +from pymonad.either import Left +from requests.models import Response +from MySQLdb.cursors import DictCursor +from gn_libs.mysqldb import database_connection +from flask import (flash, + request, + url_for, + jsonify, + redirect, + Blueprint, + current_app as app) + +# from r_qtl import r_qtl2 as rqtl2 +from r_qtl import r_qtl2_qc as rqc +from r_qtl import exceptions as rqe + +from uploader import jobs +from uploader.files import save_file#, fullpath +from uploader.ui import make_template_renderer +from uploader.oauth2.client import oauth2_post +from uploader.authorisation import require_login +from uploader.route_utils import generic_select_population +from uploader.datautils import safe_int, enumerate_sequence +from uploader.species.models import all_species, species_by_id +from uploader.monadic_requests import make_either_error_handler +from uploader.request_checks import with_species, with_population +from uploader.input_validation import (encode_errors, + decode_errors, + is_valid_representative_name) + +from .models import (dataset_by_id, + phenotype_by_id, + phenotypes_count, + save_new_dataset, + dataset_phenotypes, + datasets_by_population, + phenotype_publication_data) + +phenotypesbp = Blueprint("phenotypes", __name__) +render_template = make_template_renderer("phenotypes") + +_FAMILIES_WITH_SE_AND_N_ = ( + "Reference Populations (replicate average, SE, N)",) + +@phenotypesbp.route("/phenotypes", methods=["GET"]) +@require_login +def index(): + """Direct entry-point for phenotypes data handling.""" + with database_connection(app.config["SQL_URI"]) as conn: + if not bool(request.args.get("species_id")): + return render_template("phenotypes/index.html", + species=all_species(conn), + activelink="phenotypes") + + species_id = request.args.get("species_id") + if species_id == "CREATE-SPECIES": + return redirect(url_for( + "species.create_species", + return_to="species.populations.phenotypes.select_population")) + + species = species_by_id(conn, species_id) + if not bool(species): + flash("No such species!", "alert-danger") + return redirect(url_for("species.populations.phenotypes.index")) + return redirect(url_for("species.populations.phenotypes.select_population", + species_id=species["SpeciesId"])) + + +@phenotypesbp.route("<int:species_id>/phenotypes/select-population", + methods=["GET"]) +@require_login +@with_species(redirect_uri="species.populations.phenotypes.index") +def select_population(species: dict, **kwargs):# pylint: disable=[unused-argument] + """Select the population for your phenotypes.""" + return generic_select_population( + species, + "phenotypes/select-population.html", + request.args.get("population_id") or "", + "species.populations.phenotypes.select_population", + "species.populations.phenotypes.list_datasets", + "phenotypes", + "No such population found!") + + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets", + methods=["GET"]) +@require_login +@with_population(species_redirect_uri="species.populations.phenotypes.index", + redirect_uri="species.populations.phenotypes.select_population") +def list_datasets(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument] + """List available phenotype datasets.""" + with database_connection(app.config["SQL_URI"]) as conn: + datasets = datasets_by_population( + conn, species["SpeciesId"], population["Id"]) + if len(datasets) == 1: + return redirect(url_for( + "species.populations.phenotypes.view_dataset", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=datasets[0]["Id"])) + return render_template("phenotypes/list-datasets.html", + species=species, + population=population, + datasets=datasets, + activelink="list-datasets") + + +def with_dataset( + species_redirect_uri: str, + population_redirect_uri: str, + redirect_uri: str +): + """Ensure the dataset actually exists.""" + def __decorator__(func): + @wraps(func) + @with_population(species_redirect_uri, population_redirect_uri) + def __with_dataset__(**kwargs): + try: + _spcid = int(kwargs["species_id"]) + _popid = int(kwargs["population_id"]) + _dsetid = int(kwargs.get("dataset_id")) + select_dataset_uri = redirect(url_for( + redirect_uri, species_id=_spcid, population_id=_popid)) + if not bool(_dsetid): + flash("You need to select a valid 'dataset_id' value.", + "alert-danger") + return select_dataset_uri + with database_connection(app.config["SQL_URI"]) as conn: + dataset = dataset_by_id(conn, _spcid, _popid, _dsetid) + if not bool(dataset): + flash("You must select a valid dataset.", + "alert-danger") + return select_dataset_uri + except ValueError as _verr: + app.logger.debug( + "Exception converting 'dataset_id' to integer: %s", + kwargs.get("dataset_id"), + exc_info=True) + flash("Expected 'dataset_id' value to be an integer." + "alert-danger") + return select_dataset_uri + return func(dataset=dataset, **kwargs) + return __with_dataset__ + return __decorator__ + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/view", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def view_dataset(# pylint: disable=[unused-argument] + species: dict, population: dict, dataset: dict, **kwargs): + """View a specific dataset""" + with database_connection(app.config["SQL_URI"]) as conn: + dataset = dataset_by_id( + conn, species["SpeciesId"], population["Id"], dataset["Id"]) + if not bool(dataset): + flash("Could not find such a phenotype dataset!", "alert-danger") + return redirect(url_for( + "species.populations.phenotypes.list_datasets", + species_id=species["SpeciesId"], + population_id=population["Id"])) + + start_at = max(safe_int(request.args.get("start_at") or 0), 0) + count = int(request.args.get("count") or 20) + return render_template("phenotypes/view-dataset.html", + species=species, + population=population, + dataset=dataset, + phenotype_count=phenotypes_count( + conn, population["Id"], dataset["Id"]), + phenotypes=enumerate_sequence( + dataset_phenotypes( + conn, + population["Id"], + dataset["Id"])), + start_from=start_at, + count=count, + activelink="view-dataset") + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/phenotype/<xref_id>", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def view_phenotype(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + xref_id: int, + **kwargs +): + """View an individual phenotype from the dataset.""" + def __render__(privileges): + phenotype = phenotype_by_id(conn, + species["SpeciesId"], + population["Id"], + dataset["Id"], + xref_id) + def __non_empty__(value) -> bool: + if isinstance(value, str): + return value.strip() != "" + return bool(value) + + return render_template( + "phenotypes/view-phenotype.html", + species=species, + population=population, + dataset=dataset, + xref_id=xref_id, + phenotype=phenotype, + has_se=any(bool(item.get("error")) for item in phenotype["data"]), + publish_data={ + key.replace("_", " "): val + for key,val in + (phenotype_publication_data(conn, phenotype["Id"]) or {}).items() + if (key in ("PubMed_ID", "Authors", "Title", "Journal") + and __non_empty__(val)) + }, + privileges=(privileges + ### For demo! Do not commit this part + + ("group:resource:edit-resource", + "group:resource:delete-resource",) + ### END: For demo! Do not commit this part + ), + activelink="view-phenotype") + + def __fail__(error): + if isinstance(error, Response) and error.json() == "No linked resource!": + return __render__(tuple()) + return make_either_error_handler( + "There was an error fetching the roles and privileges.")(error) + + with database_connection(app.config["SQL_URI"]) as conn: + return oauth2_post( + "/auth/resource/phenotypes/individual/linked-resource", + json={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "xref_id": xref_id + } + ).then( + lambda resource: tuple( + privilege["privilege_id"] for role in resource["roles"] + for privilege in role["privileges"]) + ).then(__render__).either(__fail__, lambda resp: resp) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets/create", + methods=["GET", "POST"]) +@require_login +@with_population( + species_redirect_uri="species.populations.phenotypes.index", + redirect_uri="species.populations.phenotypes.select_population") +def create_dataset(species: dict, population: dict, **kwargs):# pylint: disable=[unused-argument] + """Create a new phenotype dataset.""" + with (database_connection(app.config["SQL_URI"]) as conn, + conn.cursor(cursorclass=DictCursor) as cursor): + if request.method == "GET": + return render_template("phenotypes/create-dataset.html", + activelink="create-dataset", + species=species, + population=population, + **decode_errors( + request.args.get("error_values", ""))) + + form = request.form + _errors: tuple[tuple[str, str], ...] = tuple() + if not is_valid_representative_name( + (form.get("dataset-name") or "").strip()): + _errors = _errors + (("dataset-name", "Invalid dataset name."),) + + if not bool((form.get("dataset-fullname") or "").strip()): + _errors = _errors + (("dataset-fullname", + "You must provide a value for 'Full Name'."),) + + if bool(_errors) > 0: + return redirect(url_for( + "species.populations.phenotypes.create_dataset", + species_id=species["SpeciesId"], + population_id=population["Id"], + error_values=encode_errors(_errors, form))) + + dataset_shortname = ( + form["dataset-shortname"] or form["dataset-name"]).strip() + _pheno_dataset = save_new_dataset( + cursor, + population["Id"], + form["dataset-name"].strip(), + form["dataset-fullname"].strip(), + dataset_shortname) + return redirect(url_for("species.populations.phenotypes.list_datasets", + species_id=species["SpeciesId"], + population_id=population["Id"])) + + +def process_phenotypes_rqtl2_bundle(error_uri): + """Process phenotypes from the uploaded R/qtl2 bundle.""" + _redisuri = app.config["REDIS_URL"] + _sqluri = app.config["SQL_URI"] + try: + ## Handle huge files here... + phenobundle = save_file(request.files["phenotypes-bundle"], + Path(app.config["UPLOAD_FOLDER"])) + rqc.validate_bundle(phenobundle) + return phenobundle + except AssertionError as _aerr: + app.logger.debug("File upload error!", exc_info=True) + flash("Expected a zipped bundle of files with phenotypes' " + "information.", + "alert-danger") + return error_uri + except rqe.RQTLError as rqtlerr: + app.logger.debug("Bundle validation error!", exc_info=True) + flash("R/qtl2 Error: " + " ".join(rqtlerr.args), "alert-danger") + return error_uri + + +def process_phenotypes_individual_files(error_uri): + """Process the uploaded individual files.""" + form = request.form + cdata = { + "sep": form["file-separator"], + "comment.char": form["file-comment-character"], + "na.strings": form["file-na"].split(" "), + } + bundlepath = Path(app.config["UPLOAD_FOLDER"], + f"{str(uuid.uuid4()).replace('-', '')}.zip") + with ZipFile(bundlepath,mode="w") as zfile: + for rqtlkey, formkey in (("phenocovar", "phenotype-descriptions"), + ("pheno", "phenotype-data"), + ("phenose", "phenotype-se"), + ("phenonum", "phenotype-n")): + if form.get("resumable-upload", False): + # Chunked upload of large files was used + filedata = json.loads(form[formkey]) + zfile.write( + Path(app.config["UPLOAD_FOLDER"], filedata["uploaded-file"]), + arcname=filedata["original-name"]) + cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filedata["original-name"]] + else: + # TODO: Check this path: fix any bugs. + _sentfile = request.files[formkey] + if not bool(_sentfile): + flash(f"Expected file ('{formkey}') was not provided.", + "alert-danger") + return error_uri + + filepath = save_file( + _sentfile, Path(app.config["UPLOAD_FOLDER"]), hashed=False) + zfile.write( + Path(app.config["UPLOAD_FOLDER"], filepath), + arcname=filepath.name) + cdata[rqtlkey] = cdata.get(rqtlkey, []) + [filepath.name] + + zfile.writestr("control_data.json", data=json.dumps(cdata, indent=2)) + + return bundlepath + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/add-phenotypes", + methods=["GET", "POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def add_phenotypes(species: dict, population: dict, dataset: dict, **kwargs):# pylint: disable=[unused-argument, too-many-locals] + """Add one or more phenotypes to the dataset.""" + use_bundle = request.args.get("use_bundle", "").lower() == "true" + add_phenos_uri = redirect(url_for( + "species.populations.phenotypes.add_phenotypes", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"])) + _redisuri = app.config["REDIS_URL"] + _sqluri = app.config["SQL_URI"] + with (Redis.from_url(_redisuri, decode_responses=True) as rconn, + # database_connection(_sqluri) as conn, + # conn.cursor(cursorclass=DictCursor) as cursor + ): + if request.method == "GET": + today = datetime.date.today() + return render_template( + ("phenotypes/add-phenotypes-with-rqtl2-bundle.html" + if use_bundle else "phenotypes/add-phenotypes-raw-files.html"), + species=species, + population=population, + dataset=dataset, + monthnames=( + "January", "February", "March", "April", + "May", "June", "July", "August", + "September", "October", "November", + "December"), + current_month=today.strftime("%B"), + current_year=int(today.strftime("%Y")), + families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_, + use_bundle=use_bundle, + activelink="add-phenotypes") + + phenobundle = (process_phenotypes_rqtl2_bundle(add_phenos_uri) + if use_bundle else + process_phenotypes_individual_files(add_phenos_uri)) + + _jobid = uuid.uuid4() + _namespace = jobs.jobsnamespace() + _ttl_seconds = app.config["JOBS_TTL_SECONDS"] + _job = jobs.launch_job( + jobs.initialise_job( + rconn, + _namespace, + str(_jobid), + [sys.executable, "-m", "scripts.rqtl2.phenotypes_qc", _sqluri, + _redisuri, _namespace, str(_jobid), str(species["SpeciesId"]), + str(population["Id"]), + # str(dataset["Id"]), + str(phenobundle), + "--loglevel", + { + INFO: "INFO", + ERROR: "ERROR", + DEBUG: "DEBUG", + FATAL: "FATAL", + CRITICAL: "CRITICAL", + WARNING: "WARNING" + }[app.logger.getEffectiveLevel()], + "--redisexpiry", + str(_ttl_seconds)], "phenotype_qc", _ttl_seconds, + {"job-metadata": json.dumps({ + "speciesid": species["SpeciesId"], + "populationid": population["Id"], + "datasetid": dataset["Id"], + "bundle": str(phenobundle.absolute())})}), + _redisuri, + f"{app.config['UPLOAD_FOLDER']}/job_errors") + + app.logger.debug("JOB DETAILS: %s", _job) + jobstatusuri = url_for("species.populations.phenotypes.job_status", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"], + job_id=str(_job["jobid"])) + return ((jsonify({ + "redirect-to": jobstatusuri, + "statuscode": 200, + "message": ("Follow the 'redirect-to' URI to see the state " + "of the quality-control job started for your " + "uploaded files.") + }), 200) + if request.form.get("resumable-upload", False) else + redirect(jobstatusuri)) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/job/<uuid:job_id>", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def job_status( + species: dict, + population: dict, + dataset: dict, + job_id: uuid.UUID, + **kwargs +):# pylint: disable=[unused-argument] + """Retrieve current status of a particular phenotype QC job.""" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + try: + job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) + except jobs.JobNotFound as _jnf: + job = None + return render_template("phenotypes/job-status.html", + species=species, + population=population, + dataset=dataset, + job_id=job_id, + job=job, + errors=jobs.job_errors( + rconn, jobs.jobsnamespace(), job['jobid']), + metadata=jobs.job_files_metadata( + rconn, jobs.jobsnamespace(), job['jobid']), + activelink="add-phenotypes") + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/job/<uuid:job_id>/review", + methods=["GET"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def review_job_data( + species: dict, + population: dict, + dataset: dict, + job_id: uuid.UUID, + **kwargs +):# pylint: disable=[unused-argument] + """Review data one more time before entering it into the database.""" + with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: + try: + job = jobs.job(rconn, jobs.jobsnamespace(), str(job_id)) + except jobs.JobNotFound as _jnf: + job = None + + def __metadata_by_type__(by_type, item): + filetype = item[1]["filetype"] + return { + **by_type, + filetype: (by_type.get(filetype, tuple()) + + ({"filename": item[0], **item[1]},)) + } + metadata: dict[str, Any] = reduce( + __metadata_by_type__, + (jobs.job_files_metadata( + rconn, jobs.jobsnamespace(), job['jobid']) + if job else {}).items(), + {}) + + def __desc__(filetype): + match filetype: + case "phenocovar": + desc = "phenotypes" + case "pheno": + desc = "phenotypes data" + case "phenose": + desc = "phenotypes standard-errors" + case "phenonum": + desc = "phenotypes samples" + case _: + desc = f"unknown file type '{filetype}'." + + return desc + + def __summarise__(filetype, files): + return { + "filetype": filetype, + "number-of-files": len(files), + "total-data-rows": sum( + int(afile["linecount"]) - 1 for afile in files), + "description": __desc__(filetype) + } + + summary = { + filetype: __summarise__(filetype, meta) + for filetype,meta in metadata.items() + } + return render_template("phenotypes/review-job-data.html", + species=species, + population=population, + dataset=dataset, + job_id=job_id, + job=job, + summary=summary, + activelink="add-phenotypes") + + +def update_phenotype_metadata(conn, metadata: dict): + """Update a phenotype's basic metadata values.""" + with conn.cursor(cursorclass=DictCursor) as cursor: + cursor.execute("SELECT * FROM Phenotype WHERE Id=%(phenotype-id)s", + metadata) + res = { + **{ + _key: _val for _key,_val in { + key.lower().replace("_", "-"): value + for key, value in (cursor.fetchone() or {}).items() + }.items() + if _key in metadata.keys() + }, + "phenotype-id": metadata.get("phenotype-id") + } + if res == metadata: + return False + + cursor.execute( + "UPDATE Phenotype SET " + "Pre_publication_description=%(pre-publication-description)s, " + "Post_publication_description=%(post-publication-description)s, " + "Original_description=%(original-description)s, " + "Units=%(units)s, " + "Pre_publication_abbreviation=%(pre-publication-abbreviation)s, " + "Post_publication_abbreviation=%(post-publication-abbreviation)s " + "WHERE Id=%(phenotype-id)s", + metadata) + return cursor.rowcount + + +def update_phenotype_values(conn, values): + """Update a phenotype's data values.""" + with conn.cursor() as cursor: + cursor.executemany( + "UPDATE PublishData SET value=%(new)s " + "WHERE Id=%(data_id)s AND StrainId=%(strain_id)s", + tuple(item for item in values if item["new"] is not None)) + cursor.executemany( + "DELETE FROM PublishData " + "WHERE Id=%(data_id)s AND StrainId=%(strain_id)s", + tuple(item for item in values if item["new"] is None)) + return len(values) + return 0 + + +def update_phenotype_se(conn, serrs): + """Update a phenotype's standard-error values.""" + with conn.cursor() as cursor: + cursor.executemany( + "INSERT INTO PublishSE(DataId, StrainId, error) " + "VALUES(%(data_id)s, %(strain_id)s, %(new)s) " + "ON DUPLICATE KEY UPDATE error=VALUES(error)", + tuple(item for item in serrs if item["new"] is not None)) + cursor.executemany( + "DELETE FROM PublishSE " + "WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s", + tuple(item for item in serrs if item["new"] is None)) + return len(serrs) + return 0 + + +def update_phenotype_n(conn, counts): + """Update a phenotype's strain counts.""" + with conn.cursor() as cursor: + cursor.executemany( + "INSERT INTO NStrain(DataId, StrainId, count) " + "VALUES(%(data_id)s, %(strain_id)s, %(new)s) " + "ON DUPLICATE KEY UPDATE count=VALUES(count)", + tuple(item for item in counts if item["new"] is not None)) + cursor.executemany( + "DELETE FROM NStrain " + "WHERE DataId=%(data_id)s AND StrainId=%(strain_id)s", + tuple(item for item in counts if item["new"] is None)) + return len(counts) + + return 0 + + +def update_phenotype_data(conn, data: dict): + """Update the numeric data for a phenotype.""" + def __organise_by_dataid_and_strainid__(acc, current): + _key, dataid, strainid = current[0].split("::") + _keysrc, _keytype = _key.split("-") + newkey = f"{dataid}::{strainid}" + newitem = acc.get(newkey, {}) + newitem[_keysrc] = newitem.get(_keysrc, {}) + newitem[_keysrc][_keytype] = current[1] + return {**acc, newkey: newitem} + + def __separate_items__(acc, row): + key, val = row + return ({ + **acc[0], + key: { + **val["value"], + "changed?": (not val["value"]["new"] == val["value"]["original"]) + } + }, { + **acc[1], + key: { + **val["se"], + "changed?": (not val["se"]["new"] == val["se"]["original"]) + } + },{ + **acc[2], + key: { + **val["n"], + "changed?": (not val["n"]["new"] == val["n"]["original"]) + } + }) + + values, serrs, counts = tuple( + tuple({ + "data_id": row[0].split("::")[0], + "strain_id": row[0].split("::")[1], + "new": row[1]["new"] + } for row in item) + for item in ( + filter(lambda val: val[1]["changed?"], item.items())# type: ignore[arg-type] + for item in reduce(# type: ignore[var-annotated] + __separate_items__, + reduce(__organise_by_dataid_and_strainid__, + data.items(), + {}).items(), + ({}, {}, {})))) + + return (update_phenotype_values(conn, values), + update_phenotype_se(conn, serrs), + update_phenotype_n(conn, counts)) + + +@phenotypesbp.route( + "<int:species_id>/populations/<int:population_id>/phenotypes/datasets" + "/<int:dataset_id>/phenotype/<int:xref_id>/edit", + methods=["GET", "POST"]) +@require_login +@with_dataset( + species_redirect_uri="species.populations.phenotypes.index", + population_redirect_uri="species.populations.phenotypes.select_population", + redirect_uri="species.populations.phenotypes.list_datasets") +def edit_phenotype_data(# pylint: disable=[unused-argument] + species: dict, + population: dict, + dataset: dict, + xref_id: int, + **kwargs +): + """Edit the data for a particular phenotype.""" + def __render__(**kwargs): + processed_kwargs = { + **kwargs, + "privileges": (kwargs.get("privileges", tuple()) + ### For demo! Do not commit this part + + ("group:resource:edit-resource", + "group:resource:delete-resource",) + ### END: For demo! Do not commit this part + ) + } + return render_template( + "phenotypes/edit-phenotype.html", + species=species, + population=population, + dataset=dataset, + xref_id=xref_id, + families_with_se_and_n=_FAMILIES_WITH_SE_AND_N_, + **processed_kwargs, + activelink="edit-phenotype") + + with database_connection(app.config["SQL_URI"]) as conn: + if request.method == "GET": + def __fetch_phenotype__(privileges): + phenotype = phenotype_by_id(conn, + species["SpeciesId"], + population["Id"], + dataset["Id"], + xref_id) + if phenotype is None: + msg = ("Could not find the phenotype with cross-reference ID" + f" '{xref_id}' from dataset '{dataset['FullName']}' " + f" from the '{population['FullName']}' population of " + f" species '{species['FullName']}'.") + return Left({"privileges": privileges, "phenotype-error": msg}) + return {"privileges": privileges, "phenotype": phenotype} + + def __fetch_publication_data__(**kwargs): + pheno = kwargs["phenotype"] + return { + **kwargs, + "publication_data": phenotype_publication_data( + conn, pheno["Id"]) + } + + def __fail__(failure_object): + # process the object + return __render__(failure_object=failure_object) + + return oauth2_post( + "/auth/resource/phenotypes/individual/linked-resource", + json={ + "species_id": species["SpeciesId"], + "population_id": population["Id"], + "dataset_id": dataset["Id"], + "xref_id": xref_id + } + ).then( + lambda resource: tuple( + privilege["privilege_id"] for role in resource["roles"] + for privilege in role["privileges"]) + ).then( + __fetch_phenotype__ + ).then( + lambda args: __fetch_publication_data__(**args) + ).either(__fail__, lambda args: __render__(**args)) + + ## POST + _change = False + match request.form.get("submit", "invalid-action"): + case "update basic metadata": + _change = update_phenotype_metadata(conn, { + key: value.strip() if bool(value.strip()) else None + for key, value in request.form.items() + if key not in ("submit",) + }) + msg = "Basic metadata was updated successfully." + case "update data": + _update = update_phenotype_data(conn, { + key: value.strip() if bool(value.strip()) else None + for key, value in request.form.items() + if key not in ("submit",) + }) + msg = (f"{_update[0]} value rows, {_update[1]} standard-error " + f"rows and {_update[2]} 'N' rows were updated.") + _change = any(item != 0 for item in _update) + case "update publication": + flash("NOT IMPLEMENTED: Would update publication data.", "alert-success") + case _: + flash("Invalid phenotype editing action.", "alert-danger") + + if _change: + flash(msg, "alert-success") + return redirect(url_for( + "species.populations.phenotypes.view_phenotype", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"], + xref_id=xref_id)) + + flash("No change was made by the user.", "alert-info") + return redirect(url_for( + "species.populations.phenotypes.edit_phenotype_data", + species_id=species["SpeciesId"], + population_id=population["Id"], + dataset_id=dataset["Id"], + xref_id=xref_id)) |